diff --git a/.travis.yml b/.travis.yml
index a6409c50f87ee..3934ac1d99e32 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -55,8 +55,9 @@ notifications:
 # 5. Run maven build before running lints.
 install:
   - export MAVEN_SKIP_RC=1
-  - build/mvn ${PHASE} ${PROFILES} ${MODULES} ${ARGS}
-# 6. Run lints.
+  - build/mvn -T 4 -q -DskipTests -Pkubernetes -Pmesos -Pyarn -Pkinesis-asl -Phive -Phive-thriftserver install
+
+# 6. Run lint-java.
 script:
   - dev/lint-java
   - dev/lint-scala
diff --git a/NOTICE b/NOTICE
index f4b64b5c3f470..6ec240efbf12e 100644
--- a/NOTICE
+++ b/NOTICE
@@ -448,6 +448,12 @@ Copyright (C) 2011 Google Inc.
 Apache Commons Pool
 Copyright 1999-2009 The Apache Software Foundation
 
+This product includes/uses Kubernetes & OpenShift 3 Java Client (https://github.com/fabric8io/kubernetes-client)
+Copyright (C) 2015 Red Hat, Inc.
+
+This product includes/uses OkHttp (https://github.com/square/okhttp)
+Copyright (C) 2012 The Android Open Source Project
+
 =========================================================================
 ==  NOTICE file corresponding to section 4(d) of the Apache License,   ==
 ==  Version 2.0, in this case for the DataNucleus distribution.        ==
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index c708398052220..159fca61e0638 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.3.0
+Version: 2.4.0
 Title: R Frontend for Apache Spark
 Description: Provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
@@ -59,3 +59,4 @@ Collate:
     'window.R'
 RoxygenNote: 6.0.1
 VignetteBuilder: knitr
+NeedsCompilation: no
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 57838f52eac3f..c51eb0f39c4b1 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -76,7 +76,9 @@ exportMethods("glm",
 export("setJobGroup",
        "clearJobGroup",
        "cancelJobGroup",
-       "setJobDescription")
+       "setJobDescription",
+       "setLocalProperty",
+       "getLocalProperty")
 
 # Export Utility methods
 export("setLogLevel")
@@ -133,6 +135,7 @@ exportMethods("arrange",
               "isStreaming",
               "join",
               "limit",
+              "localCheckpoint",
               "merge",
               "mutate",
               "na.omit",
@@ -176,6 +179,7 @@ exportMethods("arrange",
               "with",
               "withColumn",
               "withColumnRenamed",
+              "withWatermark",
               "write.df",
               "write.jdbc",
               "write.json",
@@ -225,11 +229,14 @@ exportMethods("%<=>%",
               "crc32",
               "create_array",
               "create_map",
+              "current_date",
+              "current_timestamp",
               "hash",
               "cume_dist",
               "date_add",
               "date_format",
               "date_sub",
+              "date_trunc",
               "datediff",
               "dayofmonth",
               "dayofweek",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index b8d732a485862..6caa125e1e14a 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2297,6 +2297,7 @@ setClassUnion("characterOrColumn", c("character", "Column"))
 #' @param ... additional sorting fields
 #' @param decreasing a logical argument indicating sorting order for columns when
 #'                   a character vector is specified for col
+#' @param withinPartitions a logical argument indicating whether to sort only within each partition
 #' @return A SparkDataFrame where all elements are sorted.
 #' @family SparkDataFrame functions
 #' @aliases arrange,SparkDataFrame,Column-method
@@ -2312,16 +2313,21 @@ setClassUnion("characterOrColumn", c("character", "Column"))
 #' arrange(df, asc(df$col1), desc(abs(df$col2)))
 #' arrange(df, "col1", decreasing = TRUE)
 #' arrange(df, "col1", "col2", decreasing = c(TRUE, FALSE))
+#' arrange(df, "col1", "col2", withinPartitions = TRUE)
 #' }
 #' @note arrange(SparkDataFrame, Column) since 1.4.0
 setMethod("arrange",
           signature(x = "SparkDataFrame", col = "Column"),
-          function(x, col, ...) {
+          function(x, col, ..., withinPartitions = FALSE) {
               jcols <- lapply(list(col, ...), function(c) {
                 c@jc
               })
 
-            sdf <- callJMethod(x@sdf, "sort", jcols)
+            if (withinPartitions) {
+              sdf <- callJMethod(x@sdf, "sortWithinPartitions", jcols)
+            } else {
+              sdf <- callJMethod(x@sdf, "sort", jcols)
+            }
             dataFrame(sdf)
           })
 
@@ -2332,7 +2338,7 @@ setMethod("arrange",
 #' @note arrange(SparkDataFrame, character) since 1.4.0
 setMethod("arrange",
           signature(x = "SparkDataFrame", col = "character"),
-          function(x, col, ..., decreasing = FALSE) {
+          function(x, col, ..., decreasing = FALSE, withinPartitions = FALSE) {
 
             # all sorting columns
             by <- list(col, ...)
@@ -2356,7 +2362,7 @@ setMethod("arrange",
               }
             })
 
-            do.call("arrange", c(x, jcols))
+            do.call("arrange", c(x, jcols, withinPartitions = withinPartitions))
           })
 
 #' @rdname arrange
@@ -3048,10 +3054,10 @@ setMethod("describe",
 #' \item stddev
 #' \item min
 #' \item max
-#' \item arbitrary approximate percentiles specified as a percentage (eg, "75%")
+#' \item arbitrary approximate percentiles specified as a percentage (eg, "75\%")
 #' }
 #' If no statistics are given, this function computes count, mean, stddev, min,
-#' approximate quartiles (percentiles at 25%, 50%, and 75%), and max.
+#' approximate quartiles (percentiles at 25\%, 50\%, and 75\%), and max.
 #' This function is meant for exploratory data analysis, as we make no guarantee about the
 #' backward compatibility of the schema of the resulting Dataset. If you want to
 #' programmatically compute summary statistics, use the \code{agg} function instead.
@@ -3655,7 +3661,8 @@ setMethod("getNumPartitions",
 #' isStreaming
 #'
 #' Returns TRUE if this SparkDataFrame contains one or more sources that continuously return data
-#' as it arrives.
+#' as it arrives. A dataset that reads data from a streaming source must be executed as a
+#' \code{StreamingQuery} using \code{write.stream}.
 #'
 #' @param x A SparkDataFrame
 #' @return TRUE if this SparkDataFrame is from a streaming source
@@ -3701,7 +3708,17 @@ setMethod("isStreaming",
 #' @param df a streaming SparkDataFrame.
 #' @param source a name for external data source.
 #' @param outputMode one of 'append', 'complete', 'update'.
-#' @param ... additional argument(s) passed to the method.
+#' @param partitionBy a name or a list of names of columns to partition the output by on the file
+#'        system. If specified, the output is laid out on the file system similar to Hive's
+#'        partitioning scheme.
+#' @param trigger.processingTime a processing time interval as a string, e.g. '5 seconds',
+#'        '1 minute'. This is a trigger that runs a query periodically based on the processing
+#'        time. If value is '0 seconds', the query will run as fast as possible, this is the
+#'        default. Only one trigger can be set.
+#' @param trigger.once a logical, must be set to \code{TRUE}. This is a trigger that processes only
+#'        one batch of data in a streaming query then terminates the query. Only one trigger can be
+#'        set.
+#' @param ... additional external data source specific named options.
 #'
 #' @family SparkDataFrame functions
 #' @seealso \link{read.stream}
@@ -3719,7 +3736,8 @@ setMethod("isStreaming",
 #' # console
 #' q <- write.stream(wordCounts, "console", outputMode = "complete")
 #' # text stream
-#' q <- write.stream(df, "text", path = "/home/user/out", checkpointLocation = "/home/user/cp")
+#' q <- write.stream(df, "text", path = "/home/user/out", checkpointLocation = "/home/user/cp"
+#'                   partitionBy = c("year", "month"), trigger.processingTime = "30 seconds")
 #' # memory stream
 #' q <- write.stream(wordCounts, "memory", queryName = "outs", outputMode = "complete")
 #' head(sql("SELECT * from outs"))
@@ -3731,7 +3749,8 @@ setMethod("isStreaming",
 #' @note experimental
 setMethod("write.stream",
           signature(df = "SparkDataFrame"),
-          function(df, source = NULL, outputMode = NULL, ...) {
+          function(df, source = NULL, outputMode = NULL, partitionBy = NULL,
+                   trigger.processingTime = NULL, trigger.once = NULL, ...) {
             if (!is.null(source) && !is.character(source)) {
               stop("source should be character, NULL or omitted. It is the data source specified ",
                    "in 'spark.sql.sources.default' configuration by default.")
@@ -3742,12 +3761,43 @@ setMethod("write.stream",
             if (is.null(source)) {
               source <- getDefaultSqlSource()
             }
+            cols <- NULL
+            if (!is.null(partitionBy)) {
+              if (!all(sapply(partitionBy, function(c) { is.character(c) }))) {
+                stop("All partitionBy column names should be characters.")
+              }
+              cols <- as.list(partitionBy)
+            }
+            jtrigger <- NULL
+            if (!is.null(trigger.processingTime) && !is.na(trigger.processingTime)) {
+              if (!is.null(trigger.once)) {
+                stop("Multiple triggers not allowed.")
+              }
+              interval <- as.character(trigger.processingTime)
+              if (nchar(interval) == 0) {
+                stop("Value for trigger.processingTime must be a non-empty string.")
+              }
+              jtrigger <- handledCallJStatic("org.apache.spark.sql.streaming.Trigger",
+                                             "ProcessingTime",
+                                             interval)
+            } else if (!is.null(trigger.once) && !is.na(trigger.once)) {
+              if (!is.logical(trigger.once) || !trigger.once) {
+                stop("Value for trigger.once must be TRUE.")
+              }
+              jtrigger <- callJStatic("org.apache.spark.sql.streaming.Trigger", "Once")
+            }
             options <- varargsToStrEnv(...)
             write <- handledCallJMethod(df@sdf, "writeStream")
             write <- callJMethod(write, "format", source)
             if (!is.null(outputMode)) {
               write <- callJMethod(write, "outputMode", outputMode)
             }
+            if (!is.null(cols)) {
+              write <- callJMethod(write, "partitionBy", cols)
+            }
+            if (!is.null(jtrigger)) {
+              write <- callJMethod(write, "trigger", jtrigger)
+            }
             write <- callJMethod(write, "options", options)
             ssq <- handledCallJMethod(write, "start")
             streamingQuery(ssq)
@@ -3782,6 +3832,33 @@ setMethod("checkpoint",
             dataFrame(df)
           })
 
+#' localCheckpoint
+#'
+#' Returns a locally checkpointed version of this SparkDataFrame. Checkpointing can be used to
+#' truncate the logical plan, which is especially useful in iterative algorithms where the plan
+#' may grow exponentially. Local checkpoints are stored in the executors using the caching
+#' subsystem and therefore they are not reliable.
+#'
+#' @param x A SparkDataFrame
+#' @param eager whether to locally checkpoint this SparkDataFrame immediately
+#' @return a new locally checkpointed SparkDataFrame
+#' @family SparkDataFrame functions
+#' @aliases localCheckpoint,SparkDataFrame-method
+#' @rdname localCheckpoint
+#' @name localCheckpoint
+#' @export
+#' @examples
+#'\dontrun{
+#' df <- localCheckpoint(df)
+#' }
+#' @note localCheckpoint since 2.3.0
+setMethod("localCheckpoint",
+          signature(x = "SparkDataFrame"),
+          function(x, eager = TRUE) {
+            df <- callJMethod(x@sdf, "localCheckpoint", as.logical(eager))
+            dataFrame(df)
+          })
+
 #' cube
 #'
 #' Create a multi-dimensional cube for the SparkDataFrame using the specified columns.
@@ -3934,3 +4011,47 @@ setMethod("broadcast",
             sdf <- callJStatic("org.apache.spark.sql.functions", "broadcast", x@sdf)
             dataFrame(sdf)
           })
+
+#' withWatermark
+#'
+#' Defines an event time watermark for this streaming SparkDataFrame. A watermark tracks a point in
+#' time before which we assume no more late data is going to arrive.
+#'
+#' Spark will use this watermark for several purposes:
+#' \itemize{
+#'  \item To know when a given time window aggregation can be finalized and thus can be emitted
+#' when using output modes that do not allow updates.
+#'  \item To minimize the amount of state that we need to keep for on-going aggregations.
+#' }
+#' The current watermark is computed by looking at the \code{MAX(eventTime)} seen across
+#' all of the partitions in the query minus a user specified \code{delayThreshold}. Due to the cost
+#' of coordinating this value across partitions, the actual watermark used is only guaranteed
+#' to be at least \code{delayThreshold} behind the actual event time.  In some cases we may still
+#' process records that arrive more than \code{delayThreshold} late.
+#'
+#' @param x a streaming SparkDataFrame
+#' @param eventTime a string specifying the name of the Column that contains the event time of the
+#'                  row.
+#' @param delayThreshold a string specifying the minimum delay to wait to data to arrive late,
+#'                       relative to the latest record that has been processed in the form of an
+#'                       interval (e.g. "1 minute" or "5 hours"). NOTE: This should not be negative.
+#' @return a SparkDataFrame.
+#' @aliases withWatermark,SparkDataFrame,character,character-method
+#' @family SparkDataFrame functions
+#' @rdname withWatermark
+#' @name withWatermark
+#' @export
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' schema <- structType(structField("time", "timestamp"), structField("value", "double"))
+#' df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = 1)
+#' df <- withWatermark(df, "time", "10 minutes")
+#' }
+#' @note withWatermark since 2.3.0
+setMethod("withWatermark",
+          signature(x = "SparkDataFrame", eventTime = "character", delayThreshold = "character"),
+          function(x, eventTime, delayThreshold) {
+            sdf <- callJMethod(x@sdf, "withWatermark", eventTime, delayThreshold)
+            dataFrame(sdf)
+          })
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 3b7f71bbbffb8..9d0a2d5e074e4 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -727,7 +727,9 @@ read.jdbc <- function(url, tableName,
 #' @param schema The data schema defined in structType or a DDL-formatted string, this is
 #'               required for file-based streaming data source
 #' @param ... additional external data source specific named options, for instance \code{path} for
-#'        file-based streaming data source
+#'        file-based streaming data source. \code{timeZone} to indicate a timezone to be used to 
+#'        parse timestamps in the JSON/CSV data sources or partition values; If it isn't set, it
+#'        uses the default value, session local timezone.
 #' @return SparkDataFrame
 #' @rdname read.stream
 #' @name read.stream
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 237ef061e8071..55365a41d774b 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -39,11 +39,19 @@ NULL
 #' Date time functions defined for \code{Column}.
 #'
 #' @param x Column to compute on. In \code{window}, it must be a time Column of
-#'          \code{TimestampType}.
-#' @param format For \code{to_date} and \code{to_timestamp}, it is the string to use to parse
-#'               Column \code{x} to DateType or TimestampType. For \code{trunc}, it is the string
-#'               to use to specify the truncation method. For example, "year", "yyyy", "yy" for
-#'               truncate by year, or "month", "mon", "mm" for truncate by month.
+#'          \code{TimestampType}. This is not used with \code{current_date} and
+#'          \code{current_timestamp}
+#' @param format The format for the given dates or timestamps in Column \code{x}. See the
+#'               format used in the following methods:
+#'               \itemize{
+#'               \item \code{to_date} and \code{to_timestamp}: it is the string to use to parse
+#'                    Column \code{x} to DateType or TimestampType.
+#'               \item \code{trunc}: it is the string to use to specify the truncation method.
+#'                    For example, "year", "yyyy", "yy" for truncate by year, or "month", "mon",
+#'                    "mm" for truncate by month.
+#'               \item \code{date_trunc}: it is similar with \code{trunc}'s but additionally
+#'                    supports "day", "dd", "second", "minute", "hour", "week" and "quarter".
+#'               }
 #' @param ... additional argument(s).
 #' @name column_datetime_functions
 #' @rdname column_datetime_functions
@@ -1102,10 +1110,11 @@ setMethod("lower",
           })
 
 #' @details
-#' \code{ltrim}: Trims the spaces from left end for the specified string value.
+#' \code{ltrim}: Trims the spaces from left end for the specified string value. Optionally a
+#' \code{trimString} can be specified.
 #'
 #' @rdname column_string_functions
-#' @aliases ltrim ltrim,Column-method
+#' @aliases ltrim ltrim,Column,missing-method
 #' @export
 #' @examples
 #'
@@ -1121,12 +1130,24 @@ setMethod("lower",
 #' head(tmp)}
 #' @note ltrim since 1.5.0
 setMethod("ltrim",
-          signature(x = "Column"),
-          function(x) {
+          signature(x = "Column", trimString = "missing"),
+          function(x, trimString) {
             jc <- callJStatic("org.apache.spark.sql.functions", "ltrim", x@jc)
             column(jc)
           })
 
+#' @param trimString a character string to trim with
+#' @rdname column_string_functions
+#' @aliases ltrim,Column,character-method
+#' @export
+#' @note ltrim(Column, character) since 2.3.0
+setMethod("ltrim",
+          signature(x = "Column", trimString = "character"),
+          function(x, trimString) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "ltrim", x@jc, trimString)
+            column(jc)
+          })
+
 #' @details
 #' \code{max}: Returns the maximum value of the expression in a group.
 #'
@@ -1341,19 +1362,31 @@ setMethod("bround",
           })
 
 #' @details
-#' \code{rtrim}: Trims the spaces from right end for the specified string value.
+#' \code{rtrim}: Trims the spaces from right end for the specified string value. Optionally a
+#' \code{trimString} can be specified.
 #'
 #' @rdname column_string_functions
-#' @aliases rtrim rtrim,Column-method
+#' @aliases rtrim rtrim,Column,missing-method
 #' @export
 #' @note rtrim since 1.5.0
 setMethod("rtrim",
-          signature(x = "Column"),
-          function(x) {
+          signature(x = "Column", trimString = "missing"),
+          function(x, trimString) {
             jc <- callJStatic("org.apache.spark.sql.functions", "rtrim", x@jc)
             column(jc)
           })
 
+#' @rdname column_string_functions
+#' @aliases rtrim,Column,character-method
+#' @export
+#' @note rtrim(Column, character) since 2.3.0
+setMethod("rtrim",
+          signature(x = "Column", trimString = "character"),
+          function(x, trimString) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "rtrim", x@jc, trimString)
+            column(jc)
+          })
+
 #' @details
 #' \code{sd}: Alias for \code{stddev_samp}.
 #'
@@ -1782,19 +1815,31 @@ setMethod("to_timestamp",
           })
 
 #' @details
-#' \code{trim}: Trims the spaces from both ends for the specified string column.
+#' \code{trim}: Trims the spaces from both ends for the specified string column. Optionally a
+#' \code{trimString} can be specified.
 #'
 #' @rdname column_string_functions
-#' @aliases trim trim,Column-method
+#' @aliases trim trim,Column,missing-method
 #' @export
 #' @note trim since 1.5.0
 setMethod("trim",
-          signature(x = "Column"),
-          function(x) {
+          signature(x = "Column", trimString = "missing"),
+          function(x, trimString) {
             jc <- callJStatic("org.apache.spark.sql.functions", "trim", x@jc)
             column(jc)
           })
 
+#' @rdname column_string_functions
+#' @aliases trim,Column,character-method
+#' @export
+#' @note trim(Column, character) since 2.3.0
+setMethod("trim",
+          signature(x = "Column", trimString = "character"),
+          function(x, trimString) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "trim", x@jc, trimString)
+            column(jc)
+          })
+
 #' @details
 #' \code{unbase64}: Decodes a BASE64 encoded string column and returns it as a binary column.
 #' This is the reverse of base64.
@@ -2088,7 +2133,8 @@ setMethod("countDistinct",
           })
 
 #' @details
-#' \code{concat}: Concatenates multiple input string columns together into a single string column.
+#' \code{concat}: Concatenates multiple input columns together into a single column.
+#' If all inputs are binary, concat returns an output as binary. Otherwise, it returns as string.
 #'
 #' @rdname column_string_functions
 #' @aliases concat concat,Column-method
@@ -2770,11 +2816,11 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"),
           })
 
 #' @details
-#' \code{substring_index}: Returns the substring from string str before count occurrences of
-#' the delimiter delim. If count is positive, everything the left of the final delimiter
-#' (counting from left) is returned. If count is negative, every to the right of the final
-#' delimiter (counting from the right) is returned. substring_index performs a case-sensitive
-#' match when searching for delim.
+#' \code{substring_index}: Returns the substring from string (\code{x}) before \code{count}
+#' occurrences of the delimiter (\code{delim}). If \code{count} is positive, everything the left of
+#' the final delimiter (counting from left) is returned. If \code{count} is negative, every to the
+#' right of the final delimiter (counting from the right) is returned. \code{substring_index}
+#' performs a case-sensitive match when searching for the delimiter.
 #'
 #' @param delim a delimiter string.
 #' @param count number of occurrences of \code{delim} before the substring is returned.
@@ -3478,3 +3524,53 @@ setMethod("trunc",
                               x@jc, as.character(format))
             column(jc)
           })
+
+#' @details
+#' \code{date_trunc}: Returns timestamp truncated to the unit specified by the format.
+#'
+#' @rdname column_datetime_functions
+#' @aliases date_trunc date_trunc,character,Column-method
+#' @export
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, df$time, date_trunc("hour", df$time), date_trunc("minute", df$time),
+#'             date_trunc("week", df$time), date_trunc("quarter", df$time)))}
+#' @note date_trunc since 2.3.0
+setMethod("date_trunc",
+          signature(format = "character", x = "Column"),
+          function(format, x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "date_trunc", format, x@jc)
+            column(jc)
+          })
+
+#' @details
+#' \code{current_date}: Returns the current date as a date column.
+#'
+#' @rdname column_datetime_functions
+#' @aliases current_date current_date,missing-method
+#' @export
+#' @examples
+#' \dontrun{
+#' head(select(df, current_date(), current_timestamp()))}
+#' @note current_date since 2.3.0
+setMethod("current_date",
+          signature("missing"),
+          function() {
+            jc <- callJStatic("org.apache.spark.sql.functions", "current_date")
+            column(jc)
+          })
+
+#' @details
+#' \code{current_timestamp}: Returns the current timestamp as a timestamp column.
+#'
+#' @rdname column_datetime_functions
+#' @aliases current_timestamp current_timestamp,missing-method
+#' @export
+#' @note current_timestamp since 2.3.0
+setMethod("current_timestamp",
+          signature("missing"),
+          function() {
+            jc <- callJStatic("org.apache.spark.sql.functions", "current_timestamp")
+            column(jc)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 8fcf269087c7d..e0dde3339fabc 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -611,6 +611,10 @@ setGeneric("isStreaming", function(x) { standardGeneric("isStreaming") })
 #' @export
 setGeneric("limit", function(x, num) {standardGeneric("limit") })
 
+#' @rdname localCheckpoint
+#' @export
+setGeneric("localCheckpoint", function(x, eager = TRUE) { standardGeneric("localCheckpoint") })
+
 #' @rdname merge
 #' @export
 setGeneric("merge")
@@ -795,6 +799,12 @@ setGeneric("withColumn", function(x, colName, col) { standardGeneric("withColumn
 setGeneric("withColumnRenamed",
            function(x, existingCol, newCol) { standardGeneric("withColumnRenamed") })
 
+#' @rdname withWatermark
+#' @export
+setGeneric("withWatermark", function(x, eventTime, delayThreshold) {
+  standardGeneric("withWatermark")
+})
+
 #' @rdname write.df
 #' @export
 setGeneric("write.df", function(df, path = NULL, ...) { standardGeneric("write.df") })
@@ -1023,6 +1033,17 @@ setGeneric("hash", function(x, ...) { standardGeneric("hash") })
 #' @name NULL
 setGeneric("cume_dist", function(x = "missing") { standardGeneric("cume_dist") })
 
+#' @rdname column_datetime_functions
+#' @export
+#' @name NULL
+setGeneric("current_date", function(x = "missing") { standardGeneric("current_date") })
+
+#' @rdname column_datetime_functions
+#' @export
+#' @name NULL
+setGeneric("current_timestamp", function(x = "missing") { standardGeneric("current_timestamp") })
+
+
 #' @rdname column_datetime_diff_functions
 #' @export
 #' @name NULL
@@ -1043,6 +1064,11 @@ setGeneric("date_format", function(y, x) { standardGeneric("date_format") })
 #' @name NULL
 setGeneric("date_sub", function(y, x) { standardGeneric("date_sub") })
 
+#' @rdname column_datetime_functions
+#' @export
+#' @name NULL
+setGeneric("date_trunc", function(format, x) { standardGeneric("date_trunc") })
+
 #' @rdname column_datetime_functions
 #' @export
 #' @name NULL
@@ -1221,7 +1247,7 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
 #' @rdname column_string_functions
 #' @export
 #' @name NULL
-setGeneric("ltrim", function(x) { standardGeneric("ltrim") })
+setGeneric("ltrim", function(x, trimString) { standardGeneric("ltrim") })
 
 #' @rdname column_collection_functions
 #' @export
@@ -1371,7 +1397,7 @@ setGeneric("rpad", function(x, len, pad) { standardGeneric("rpad") })
 #' @rdname column_string_functions
 #' @export
 #' @name NULL
-setGeneric("rtrim", function(x) { standardGeneric("rtrim") })
+setGeneric("rtrim", function(x, trimString) { standardGeneric("rtrim") })
 
 #' @rdname column_aggregate_functions
 #' @export
@@ -1511,7 +1537,7 @@ setGeneric("translate", function(x, matchingString, replaceString) { standardGen
 #' @rdname column_string_functions
 #' @export
 #' @name NULL
-setGeneric("trim", function(x) { standardGeneric("trim") })
+setGeneric("trim", function(x, trimString) { standardGeneric("trim") })
 
 #' @rdname column_string_functions
 #' @export
diff --git a/R/pkg/R/mllib_recommendation.R b/R/pkg/R/mllib_recommendation.R
index fa794249085d7..5441c4a4022a9 100644
--- a/R/pkg/R/mllib_recommendation.R
+++ b/R/pkg/R/mllib_recommendation.R
@@ -48,6 +48,8 @@ setClass("ALSModel", representation(jobj = "jobj"))
 #' @param numUserBlocks number of user blocks used to parallelize computation (> 0).
 #' @param numItemBlocks number of item blocks used to parallelize computation (> 0).
 #' @param checkpointInterval number of checkpoint intervals (>= 1) or disable checkpoint (-1).
+#'                           Note: this setting will be ignored if the checkpoint directory is not
+#'                           set.
 #' @param ... additional argument(s) passed to the method.
 #' @return \code{spark.als} returns a fitted ALS model.
 #' @rdname spark.als
diff --git a/R/pkg/R/mllib_tree.R b/R/pkg/R/mllib_tree.R
index 89a58bf0aadae..4e5ddf22ee16d 100644
--- a/R/pkg/R/mllib_tree.R
+++ b/R/pkg/R/mllib_tree.R
@@ -161,6 +161,8 @@ print.summary.decisionTree <- function(x) {
 #'                            >= 1.
 #' @param minInfoGain Minimum information gain for a split to be considered at a tree node.
 #' @param checkpointInterval Param for set checkpoint interval (>= 1) or disable checkpoint (-1).
+#'                           Note: this setting will be ignored if the checkpoint directory is not
+#'                           set.
 #' @param maxMemoryInMB Maximum memory in MB allocated to histogram aggregation.
 #' @param cacheNodeIds If FALSE, the algorithm will pass trees to executors to match instances with
 #'                     nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching
@@ -382,6 +384,8 @@ setMethod("write.ml", signature(object = "GBTClassificationModel", path = "chara
 #' @param minInstancesPerNode Minimum number of instances each child must have after split.
 #' @param minInfoGain Minimum information gain for a split to be considered at a tree node.
 #' @param checkpointInterval Param for set checkpoint interval (>= 1) or disable checkpoint (-1).
+#'                           Note: this setting will be ignored if the checkpoint directory is not
+#'                           set.
 #' @param maxMemoryInMB Maximum memory in MB allocated to histogram aggregation.
 #' @param cacheNodeIds If FALSE, the algorithm will pass trees to executors to match instances with
 #'                     nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching
@@ -595,6 +599,8 @@ setMethod("write.ml", signature(object = "RandomForestClassificationModel", path
 #' @param minInstancesPerNode Minimum number of instances each child must have after split.
 #' @param minInfoGain Minimum information gain for a split to be considered at a tree node.
 #' @param checkpointInterval Param for set checkpoint interval (>= 1) or disable checkpoint (-1).
+#'                           Note: this setting will be ignored if the checkpoint directory is not
+#'                           set.
 #' @param maxMemoryInMB Maximum memory in MB allocated to histogram aggregation.
 #' @param cacheNodeIds If FALSE, the algorithm will pass trees to executors to match instances with
 #'                     nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching
diff --git a/R/pkg/R/mllib_utils.R b/R/pkg/R/mllib_utils.R
index a53c92c2c4815..23dda42c325be 100644
--- a/R/pkg/R/mllib_utils.R
+++ b/R/pkg/R/mllib_utils.R
@@ -130,3 +130,4 @@ read.ml <- function(path) {
     stop("Unsupported model: ", jobj)
   }
 }
+
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index fb5f1d21fc723..965471f3b07a0 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -560,10 +560,55 @@ cancelJobGroup <- function(sc, groupId) {
 #'}
 #' @note setJobDescription since 2.3.0
 setJobDescription <- function(value) {
+  if (!is.null(value)) {
+    value <- as.character(value)
+  }
   sc <- getSparkContext()
   invisible(callJMethod(sc, "setJobDescription", value))
 }
 
+#' Set a local property that affects jobs submitted from this thread, such as the
+#' Spark fair scheduler pool.
+#'
+#' @param key The key for a local property.
+#' @param value The value for a local property.
+#' @rdname setLocalProperty
+#' @name setLocalProperty
+#' @examples
+#'\dontrun{
+#' setLocalProperty("spark.scheduler.pool", "poolA")
+#'}
+#' @note setLocalProperty since 2.3.0
+setLocalProperty <- function(key, value) {
+  if (is.null(key) || is.na(key)) {
+    stop("key should not be NULL or NA.")
+  }
+  if (!is.null(value)) {
+    value <- as.character(value)
+  }
+  sc <- getSparkContext()
+  invisible(callJMethod(sc, "setLocalProperty", as.character(key), value))
+}
+
+#' Get a local property set in this thread, or \code{NULL} if it is missing. See
+#' \code{setLocalProperty}.
+#'
+#' @param key The key for a local property.
+#' @rdname getLocalProperty
+#' @name getLocalProperty
+#' @examples
+#'\dontrun{
+#' getLocalProperty("spark.scheduler.pool")
+#'}
+#' @note getLocalProperty since 2.3.0
+getLocalProperty <- function(key) {
+  if (is.null(key) || is.na(key)) {
+    stop("key should not be NULL or NA.")
+  }
+  sc <- getSparkContext()
+  callJMethod(sc, "getLocalProperty", as.character(key))
+}
+
 sparkConfToSubmitOps <- new.env()
 sparkConfToSubmitOps[["spark.driver.memory"]]           <- "--driver-memory"
 sparkConfToSubmitOps[["spark.driver.extraClassPath"]]   <- "--driver-class-path"
diff --git a/R/pkg/tests/fulltests/test_Windows.R b/R/pkg/tests/fulltests/test_Windows.R
index b2ec6c67311db..209827d9fdc2f 100644
--- a/R/pkg/tests/fulltests/test_Windows.R
+++ b/R/pkg/tests/fulltests/test_Windows.R
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+
 context("Windows-specific tests")
 
 test_that("sparkJars tag in SparkContext", {
diff --git a/R/pkg/tests/fulltests/test_context.R b/R/pkg/tests/fulltests/test_context.R
index 77635c5a256b9..f0d0a5114f89f 100644
--- a/R/pkg/tests/fulltests/test_context.R
+++ b/R/pkg/tests/fulltests/test_context.R
@@ -100,7 +100,6 @@ test_that("job group functions can be called", {
   setJobGroup("groupId", "job description", TRUE)
   cancelJobGroup("groupId")
   clearJobGroup()
-  setJobDescription("job description")
 
   suppressWarnings(setJobGroup(sc, "groupId", "job description", TRUE))
   suppressWarnings(cancelJobGroup(sc, "groupId"))
@@ -108,6 +107,38 @@ test_that("job group functions can be called", {
   sparkR.session.stop()
 })
 
+test_that("job description and local properties can be set and got", {
+  sc <- sparkR.sparkContext(master = sparkRTestMaster)
+  setJobDescription("job description")
+  expect_equal(getLocalProperty("spark.job.description"), "job description")
+  setJobDescription(1234)
+  expect_equal(getLocalProperty("spark.job.description"), "1234")
+  setJobDescription(NULL)
+  expect_equal(getLocalProperty("spark.job.description"), NULL)
+  setJobDescription(NA)
+  expect_equal(getLocalProperty("spark.job.description"), NULL)
+
+  setLocalProperty("spark.scheduler.pool", "poolA")
+  expect_equal(getLocalProperty("spark.scheduler.pool"), "poolA")
+  setLocalProperty("spark.scheduler.pool", NULL)
+  expect_equal(getLocalProperty("spark.scheduler.pool"), NULL)
+  setLocalProperty("spark.scheduler.pool", NA)
+  expect_equal(getLocalProperty("spark.scheduler.pool"), NULL)
+
+  setLocalProperty(4321, 1234)
+  expect_equal(getLocalProperty(4321), "1234")
+  setLocalProperty(4321, NULL)
+  expect_equal(getLocalProperty(4321), NULL)
+  setLocalProperty(4321, NA)
+  expect_equal(getLocalProperty(4321), NULL)
+
+  expect_error(setLocalProperty(NULL, "should fail"), "key should not be NULL or NA")
+  expect_error(getLocalProperty(NULL), "key should not be NULL or NA")
+  expect_error(setLocalProperty(NA, "should fail"), "key should not be NULL or NA")
+  expect_error(getLocalProperty(NA), "key should not be NULL or NA")
+  sparkR.session.stop()
+})
+
 test_that("utility function can be called", {
   sparkR.sparkContext(master = sparkRTestMaster)
   setLogLevel("ERROR")
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index d87f5d2705732..5197838eaac66 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -957,6 +957,28 @@ test_that("setCheckpointDir(), checkpoint() on a DataFrame", {
   }
 })
 
+test_that("localCheckpoint() on a DataFrame", {
+  if (windows_with_hadoop()) {
+    # Checkpoint directory shouldn't matter in localCheckpoint.
+    checkpointDir <- file.path(tempdir(), "lcproot")
+    expect_true(length(list.files(path = checkpointDir, all.files = TRUE, recursive = TRUE)) == 0)
+    setCheckpointDir(checkpointDir)
+
+    textPath <- tempfile(pattern = "textPath", fileext = ".txt")
+    writeLines(mockLines, textPath)
+    # Read it lazily and then locally checkpoint eagerly.
+    df <- read.df(textPath, "text")
+    df <- localCheckpoint(df, eager = TRUE)
+    # Here, we remove the source path to check eagerness.
+    unlink(textPath)
+    expect_is(df, "SparkDataFrame")
+    expect_equal(colnames(df), c("value"))
+    expect_equal(count(df), 3)
+
+    expect_true(length(list.files(path = checkpointDir, all.files = TRUE, recursive = TRUE)) == 0)
+  }
+})
+
 test_that("schema(), dtypes(), columns(), names() return the correct values/format", {
   df <- read.json(jsonPath)
   testSchema <- schema(df)
@@ -1405,7 +1427,7 @@ test_that("column functions", {
   c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c)
   c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c)
   c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
-  c12 <- variance(c)
+  c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
   c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)
   c14 <- cume_dist() + ntile(1) + corr(c, c1)
   c15 <- dense_rank() + percent_rank() + rank() + row_number()
@@ -1418,6 +1440,8 @@ test_that("column functions", {
   c22 <- not(c)
   c23 <- trunc(c, "year") + trunc(c, "yyyy") + trunc(c, "yy") +
     trunc(c, "month") + trunc(c, "mon") + trunc(c, "mm")
+  c24 <- date_trunc("hour", c) + date_trunc("minute", c) + date_trunc("week", c) +
+    date_trunc("quarter", c) + current_date() + current_timestamp()
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))
@@ -1729,6 +1753,7 @@ test_that("date functions on a DataFrame", {
   expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), "yyyy-MM-dd")))[1, 1], 0)
+  expect_equal(collect(select(df2, month(date_trunc("yyyy", df2$b))))[, 1], c(1, 1))
 
   l3 <- list(list(a = 1000), list(a = -1000))
   df3 <- createDataFrame(l3)
@@ -2105,6 +2130,11 @@ test_that("arrange() and orderBy() on a DataFrame", {
 
   sorted7 <- arrange(df, "name", decreasing = FALSE)
   expect_equal(collect(sorted7)[2, "age"], 19)
+
+  df <- createDataFrame(cars, numPartitions = 10)
+  expect_equal(getNumPartitions(df), 10)
+  sorted8 <- arrange(df, "dist", withinPartitions = TRUE)
+  expect_equal(collect(sorted8)[5:6, "dist"], c(22, 10))
 })
 
 test_that("filter() on a DataFrame", {
diff --git a/R/pkg/tests/fulltests/test_streaming.R b/R/pkg/tests/fulltests/test_streaming.R
index 54f40bbd5f517..a354d50c6b54e 100644
--- a/R/pkg/tests/fulltests/test_streaming.R
+++ b/R/pkg/tests/fulltests/test_streaming.R
@@ -172,6 +172,113 @@ test_that("Terminated by error", {
   stopQuery(q)
 })
 
+test_that("PartitionBy", {
+  parquetPath <- tempfile(pattern = "sparkr-test", fileext = ".parquet")
+  checkpointPath <- tempfile(pattern = "sparkr-test", fileext = ".checkpoint")
+  textPath <- tempfile(pattern = "sparkr-test", fileext = ".text")
+  df <- read.df(jsonPath, "json", stringSchema)
+  write.df(df, parquetPath, "parquet", "overwrite")
+
+  df <- read.stream(path = parquetPath, schema = stringSchema)
+
+  expect_error(write.stream(df, "json", path = textPath, checkpointLocation = "append",
+                            partitionBy = c(1, 2)),
+                            "All partitionBy column names should be characters")
+
+  q <- write.stream(df, "json", path = textPath, checkpointLocation = "append",
+                    partitionBy = "name")
+  awaitTermination(q, 5 * 1000)
+  callJMethod(q@ssq, "processAllAvailable")
+
+  dirs <- list.files(textPath)
+  expect_equal(length(dirs[substring(dirs, 1, nchar("name=")) == "name="]), 3)
+
+  unlink(checkpointPath)
+  unlink(textPath)
+  unlink(parquetPath)
+})
+
+test_that("Watermark", {
+  parquetPath <- tempfile(pattern = "sparkr-test", fileext = ".parquet")
+  schema <- structType(structField("value", "string"))
+  t <- Sys.time()
+  df <- as.DataFrame(lapply(list(t), as.character), schema)
+  write.df(df, parquetPath, "parquet", "append")
+  df <- read.stream(path = parquetPath, schema = "value STRING")
+  df <- withColumn(df, "eventTime", cast(df$value, "timestamp"))
+  df <- withWatermark(df, "eventTime", "10 seconds")
+  counts <- count(group_by(df, "eventTime"))
+  q <- write.stream(counts, "memory", queryName = "times", outputMode = "append")
+
+  # first events
+  df <- as.DataFrame(lapply(list(t + 1, t, t + 2), as.character), schema)
+  write.df(df, parquetPath, "parquet", "append")
+  awaitTermination(q, 5 * 1000)
+  callJMethod(q@ssq, "processAllAvailable")
+
+  # advance watermark to 15
+  df <- as.DataFrame(lapply(list(t + 25), as.character), schema)
+  write.df(df, parquetPath, "parquet", "append")
+  awaitTermination(q, 5 * 1000)
+  callJMethod(q@ssq, "processAllAvailable")
+
+  # old events, should be dropped
+  df <- as.DataFrame(lapply(list(t), as.character), schema)
+  write.df(df, parquetPath, "parquet", "append")
+  awaitTermination(q, 5 * 1000)
+  callJMethod(q@ssq, "processAllAvailable")
+
+  # evict events less than previous watermark
+  df <- as.DataFrame(lapply(list(t + 25), as.character), schema)
+  write.df(df, parquetPath, "parquet", "append")
+  awaitTermination(q, 5 * 1000)
+  callJMethod(q@ssq, "processAllAvailable")
+
+  times <- collect(sql("SELECT * FROM times"))
+  # looks like write timing can affect the first bucket; but it should be t
+  expect_equal(times[order(times$eventTime),][1, 2], 2)
+
+  stopQuery(q)
+  unlink(parquetPath)
+})
+
+test_that("Trigger", {
+  parquetPath <- tempfile(pattern = "sparkr-test", fileext = ".parquet")
+  schema <- structType(structField("value", "string"))
+  df <- as.DataFrame(lapply(list(Sys.time()), as.character), schema)
+  write.df(df, parquetPath, "parquet", "append")
+  df <- read.stream(path = parquetPath, schema = "value STRING")
+
+  expect_error(write.stream(df, "memory", queryName = "times", outputMode = "append",
+               trigger.processingTime = "", trigger.once = ""), "Multiple triggers not allowed.")
+
+  expect_error(write.stream(df, "memory", queryName = "times", outputMode = "append",
+               trigger.processingTime = ""),
+               "Value for trigger.processingTime must be a non-empty string.")
+
+  expect_error(write.stream(df, "memory", queryName = "times", outputMode = "append",
+               trigger.processingTime = "invalid"), "illegal argument")
+
+  expect_error(write.stream(df, "memory", queryName = "times", outputMode = "append",
+               trigger.once = ""), "Value for trigger.once must be TRUE.")
+
+  expect_error(write.stream(df, "memory", queryName = "times", outputMode = "append",
+               trigger.once = FALSE), "Value for trigger.once must be TRUE.")
+
+  q <- write.stream(df, "memory", queryName = "times", outputMode = "append", trigger.once = TRUE)
+  awaitTermination(q, 5 * 1000)
+  callJMethod(q@ssq, "processAllAvailable")
+  df <- as.DataFrame(lapply(list(Sys.time()), as.character), schema)
+  write.df(df, parquetPath, "parquet", "append")
+  awaitTermination(q, 5 * 1000)
+  callJMethod(q@ssq, "processAllAvailable")
+
+  expect_equal(nrow(collect(sql("SELECT * FROM times"))), 1)
+
+  stopQuery(q)
+  unlink(parquetPath)
+})
+
 unlink(jsonPath)
 unlink(jsonPathNa)
 
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index 63812ba70bb50..94d75188fb948 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -27,7 +27,10 @@ if (.Platform$OS.type == "windows") {
 
 # Setup global test environment
 # Install Spark first to set SPARK_HOME
-install.spark()
+
+# NOTE(shivaram): We set overwrite to handle any old tar.gz files or directories left behind on
+# CRAN machines. For Jenkins we should already have SPARK_HOME set.
+install.spark(overwrite = TRUE)
 
 sparkRDir <- file.path(Sys.getenv("SPARK_HOME"), "R")
 sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db")
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 8c4ea2f2db188..feca617c2554c 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -391,8 +391,7 @@ We convert `mpg` to `kmpg` (kilometers per gallon). `carsSubDF` is a `SparkDataF
 
 ```{r}
 carsSubDF <- select(carsDF, "model", "mpg")
-schema <- structType(structField("model", "string"), structField("mpg", "double"),
-                     structField("kmpg", "double"))
+schema <- "model STRING, mpg DOUBLE, kmpg DOUBLE"
 out <- dapply(carsSubDF, function(x) { x <- cbind(x, x$mpg * 1.61) }, schema)
 head(collect(out))
 ```
@@ -1043,7 +1042,7 @@ unlink(modelPath)
 
 ## Structured Streaming
 
-SparkR supports the Structured Streaming API (experimental).
+SparkR supports the Structured Streaming API.
 
 You can check the Structured Streaming Programming Guide for [an introduction](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#programming-model) to its programming model and basic concepts.
 
diff --git a/appveyor.yml b/appveyor.yml
index 48740920cd09b..aee94c59612d2 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -42,7 +42,9 @@ install:
   # Install maven and dependencies
   - ps: .\dev\appveyor-install-dependencies.ps1
   # Required package for R unit tests
-  - cmd: R -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival'), repos='http://cran.us.r-project.org')"
+  - cmd: R -e "install.packages(c('knitr', 'rmarkdown', 'devtools', 'e1071', 'survival'), repos='http://cran.us.r-project.org')"
+  # Here, we use the fixed version of testthat. For more details, please see SPARK-22817.
+  - cmd: R -e "devtools::install_version('testthat', version = '1.0.2', repos='http://cran.us.r-project.org')"
   - cmd: R -e "packageVersion('knitr'); packageVersion('rmarkdown'); packageVersion('testthat'); packageVersion('e1071'); packageVersion('survival')"
 
 build_script:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index db19329fa8dec..810edf7f7c41d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
new file mode 100755
index 0000000000000..071406336d1b1
--- /dev/null
+++ b/bin/docker-image-tool.sh
@@ -0,0 +1,145 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This script builds and pushes docker images when run from a release of Spark
+# with Kubernetes support.
+
+function error {
+  echo "$@" 1>&2
+  exit 1
+}
+
+if [ -z "${SPARK_HOME}" ]; then
+  SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+function image_ref {
+  local image="$1"
+  local add_repo="${2:-1}"
+  if [ $add_repo = 1 ] && [ -n "$REPO" ]; then
+    image="$REPO/$image"
+  fi
+  if [ -n "$TAG" ]; then
+    image="$image:$TAG"
+  fi
+  echo "$image"
+}
+
+function build {
+  local BUILD_ARGS
+  local IMG_PATH
+
+  if [ ! -f "$SPARK_HOME/RELEASE" ]; then
+    # Set image build arguments accordingly if this is a source repo and not a distribution archive.
+    IMG_PATH=resource-managers/kubernetes/docker/src/main/dockerfiles
+    BUILD_ARGS=(
+      --build-arg
+      img_path=$IMG_PATH
+      --build-arg
+      spark_jars=assembly/target/scala-$SPARK_SCALA_VERSION/jars
+    )
+  else
+    # Not passed as an argument to docker, but used to validate the Spark directory.
+    IMG_PATH="kubernetes/dockerfiles"
+  fi
+
+  if [ ! -d "$IMG_PATH" ]; then
+    error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
+  fi
+
+  docker build "${BUILD_ARGS[@]}" \
+    -t $(image_ref spark) \
+    -f "$IMG_PATH/spark/Dockerfile" .
+}
+
+function push {
+  docker push "$(image_ref spark)"
+}
+
+function usage {
+  cat <<EOF
+Usage: $0 [options] [command]
+Builds or pushes the built-in Spark Docker image.
+
+Commands:
+  build       Build image. Requires a repository address to be provided if the image will be
+              pushed to a different registry.
+  push        Push a pre-built image to a registry. Requires a repository address to be provided.
+
+Options:
+  -r repo     Repository address.
+  -t tag      Tag to apply to the built image, or to identify the image to be pushed.
+  -m          Use minikube's Docker daemon.
+
+Using minikube when building images will do so directly into minikube's Docker daemon.
+There is no need to push the images into minikube in that case, they'll be automatically
+available when running applications inside the minikube cluster.
+
+Check the following documentation for more information on using the minikube Docker daemon:
+
+  https://kubernetes.io/docs/getting-started-guides/minikube/#reusing-the-docker-daemon
+
+Examples:
+  - Build image in minikube with tag "testing"
+    $0 -m -t testing build
+
+  - Build and push image with tag "v2.3.0" to docker.io/myrepo
+    $0 -r docker.io/myrepo -t v2.3.0 build
+    $0 -r docker.io/myrepo -t v2.3.0 push
+EOF
+}
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  usage
+  exit 0
+fi
+
+REPO=
+TAG=
+while getopts mr:t: option
+do
+ case "${option}"
+ in
+ r) REPO=${OPTARG};;
+ t) TAG=${OPTARG};;
+ m)
+   if ! which minikube 1>/dev/null; then
+     error "Cannot find minikube."
+   fi
+   eval $(minikube docker-env)
+   ;;
+ esac
+done
+
+case "${@: -1}" in
+  build)
+    build
+    ;;
+  push)
+    if [ -z "$REPO" ]; then
+      usage
+      exit 1
+    fi
+    push
+    ;;
+  *)
+    usage
+    exit 1
+    ;;
+esac
diff --git a/bin/find-spark-home b/bin/find-spark-home
index fa78407d4175a..617dbaa4fff86 100755
--- a/bin/find-spark-home
+++ b/bin/find-spark-home
@@ -21,7 +21,7 @@
 
 FIND_SPARK_HOME_PYTHON_SCRIPT="$(cd "$(dirname "$0")"; pwd)/find_spark_home.py"
 
-# Short cirtuit if the user already has this set.
+# Short circuit if the user already has this set.
 if [ ! -z "${SPARK_HOME}" ]; then
    exit 0
 elif [ ! -f "$FIND_SPARK_HOME_PYTHON_SCRIPT" ]; then
diff --git a/circle.yml b/circle.yml
index 58bb67f0c11aa..00142165d6f8f 100644
--- a/circle.yml
+++ b/circle.yml
@@ -43,13 +43,13 @@ dependencies:
         # Copy contents into current build directory
         rsync --info=stats2,misc1,flist0 -a build_classes/ .
       fi
-    - ./build/mvn -DskipTests -Phadoop-cloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pyarn -Phive -Psparkr install
+    - set -o pipefail && ./build/mvn -DskipTests -Phadoop-cloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pyarn -Phive -Psparkr install | tee -a "$CIRCLE_ARTIFACTS/mvn-install.log"
       # Copy all of */target/scala_2.11/classes to build_classes/
     - >
       rsync --info=stats2,misc1,flist0 -a --delete-excluded --prune-empty-dirs --exclude build_classes/ --exclude 'target/streams' --exclude 'assembly/target' --exclude 'common/network-yarn/target' --exclude 'examples/target' --exclude '***/*.jar' --include 'target/***' --include '**/' --exclude '*' . build_classes/
     - |
       # Make sbt fetch all the external deps to ~/.ivy2 so it gets cached
-      ./build/sbt -Phadoop-cloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pyarn -Phive -Psparkr externalDependencyClasspath
+      set -o pipefail && ./build/sbt -Phadoop-cloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pyarn -Phive -Psparkr externalDependencyClasspath | tee -a "$CIRCLE_ARTIFACTS/sbt-classpath.log"
   cache_directories:
     - "build_classes"
     - "build"
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index cf93d41cd77cf..8c148359c3029 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java
index de2ef1625882d..904eeca35c2a4 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java
@@ -83,6 +83,7 @@ public LevelDB(File path, KVStoreSerializer serializer) throws Exception {
     if (versionData != null) {
       long version = serializer.deserializeLong(versionData);
       if (version != STORE_VERSION) {
+        close();
         throw new UnsupportedStoreVersionException();
       }
     } else {
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
index 93b0284a019bc..448ba78bd23d0 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
@@ -85,7 +85,7 @@ class LevelDBIterator<T> implements KVStoreIterator<T> {
         end = index.start(parent, params.last);
       }
       if (it.hasNext()) {
-        // When descending, the caller may have set up the start of iteration at a non-existant
+        // When descending, the caller may have set up the start of iteration at a non-existent
         // entry that is guaranteed to be after the desired entry. For example, if you have a
         // compound key (a, b) where b is a, integer, you may seek to the end of the elements that
         // have the same "a" value by specifying Integer.MAX_VALUE for "b", and that value may not
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 18cbdadd224ab..8ca7733507f1b 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
index ea9b3ce4e3522..8b8f9892847c3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -18,12 +18,12 @@
 package org.apache.spark.network.buffer;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
-import java.nio.file.Files;
 import java.nio.file.StandardOpenOption;
 
 import com.google.common.base.Objects;
@@ -94,9 +94,9 @@ public ByteBuffer nioByteBuffer() throws IOException {
 
   @Override
   public InputStream createInputStream() throws IOException {
-    InputStream is = null;
+    FileInputStream is = null;
     try {
-      is = Files.newInputStream(file.toPath());
+      is = new FileInputStream(file);
       ByteStreams.skipFully(is, offset);
       return new LimitedInputStream(is, length);
     } catch (IOException e) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
index 7376d1ddc4818..e04524dde0a75 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
@@ -30,10 +30,10 @@
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
 import io.netty.channel.*;
-import io.netty.util.AbstractReferenceCounted;
 import org.apache.commons.crypto.stream.CryptoInputStream;
 import org.apache.commons.crypto.stream.CryptoOutputStream;
 
+import org.apache.spark.network.util.AbstractFileRegion;
 import org.apache.spark.network.util.ByteArrayReadableChannel;
 import org.apache.spark.network.util.ByteArrayWritableChannel;
 
@@ -161,7 +161,7 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception {
     }
   }
 
-  private static class EncryptedMessage extends AbstractReferenceCounted implements FileRegion {
+  private static class EncryptedMessage extends AbstractFileRegion {
     private final boolean isByteBuf;
     private final ByteBuf buf;
     private final FileRegion region;
@@ -199,10 +199,45 @@ public long position() {
     }
 
     @Override
-    public long transfered() {
+    public long transferred() {
       return transferred;
     }
 
+    @Override
+    public EncryptedMessage touch(Object o) {
+      super.touch(o);
+      if (region != null) {
+        region.touch(o);
+      }
+      if (buf != null) {
+        buf.touch(o);
+      }
+      return this;
+    }
+
+    @Override
+    public EncryptedMessage retain(int increment) {
+      super.retain(increment);
+      if (region != null) {
+        region.retain(increment);
+      }
+      if (buf != null) {
+        buf.retain(increment);
+      }
+      return this;
+    }
+
+    @Override
+    public boolean release(int decrement) {
+      if (region != null) {
+        region.release(decrement);
+      }
+      if (buf != null) {
+        buf.release(decrement);
+      }
+      return super.release(decrement);
+    }
+
     @Override
     public long transferTo(WritableByteChannel target, long position) throws IOException {
       Preconditions.checkArgument(position == transfered(), "Invalid position.");
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java
index 4f8781b42a0e4..a5337656cbd84 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java
@@ -25,17 +25,17 @@
 import com.google.common.base.Preconditions;
 import io.netty.buffer.ByteBuf;
 import io.netty.channel.FileRegion;
-import io.netty.util.AbstractReferenceCounted;
 import io.netty.util.ReferenceCountUtil;
 
 import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.util.AbstractFileRegion;
 
 /**
  * A wrapper message that holds two separate pieces (a header and a body).
  *
  * The header must be a ByteBuf, while the body can be a ByteBuf or a FileRegion.
  */
-class MessageWithHeader extends AbstractReferenceCounted implements FileRegion {
+class MessageWithHeader extends AbstractFileRegion {
 
   @Nullable private final ManagedBuffer managedBuffer;
   private final ByteBuf header;
@@ -47,7 +47,7 @@ class MessageWithHeader extends AbstractReferenceCounted implements FileRegion {
   /**
    * When the write buffer size is larger than this limit, I/O will be done in chunks of this size.
    * The size should not be too large as it will waste underlying memory copy. e.g. If network
-   * avaliable buffer is smaller than this limit, the data cannot be sent within one single write
+   * available buffer is smaller than this limit, the data cannot be sent within one single write
    * operation while it still will make memory copy with this size.
    */
   private static final int NIO_BUFFER_LIMIT = 256 * 1024;
@@ -91,7 +91,7 @@ public long position() {
   }
 
   @Override
-  public long transfered() {
+  public long transferred() {
     return totalBytesTransferred;
   }
 
@@ -100,7 +100,7 @@ public long transfered() {
    * transferTo invocations in order to transfer a single MessageWithHeader to avoid busy waiting.
    *
    * The contract is that the caller will ensure position is properly set to the total number
-   * of bytes transferred so far (i.e. value returned by transfered()).
+   * of bytes transferred so far (i.e. value returned by transferred()).
    */
   @Override
   public long transferTo(final WritableByteChannel target, final long position) throws IOException {
@@ -160,4 +160,37 @@ private int writeNioBuffer(
 
     return ret;
   }
+
+  @Override
+  public MessageWithHeader touch(Object o) {
+    super.touch(o);
+    header.touch(o);
+    ReferenceCountUtil.touch(body, o);
+    return this;
+  }
+
+  @Override
+  public MessageWithHeader retain(int increment) {
+    super.retain(increment);
+    header.retain(increment);
+    ReferenceCountUtil.retain(body, increment);
+    if (managedBuffer != null) {
+      for (int i = 0; i < increment; i++) {
+        managedBuffer.retain();
+      }
+    }
+    return this;
+  }
+
+  @Override
+  public boolean release(int decrement) {
+    header.release(decrement);
+    ReferenceCountUtil.release(body, decrement);
+    if (managedBuffer != null) {
+      for (int i = 0; i < decrement; i++) {
+        managedBuffer.release();
+      }
+    }
+    return super.release(decrement);
+  }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java
index 3d71ebaa7ea0c..3ac9081d78a75 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java
@@ -32,13 +32,13 @@
 import io.netty.channel.ChannelPromise;
 import io.netty.channel.FileRegion;
 import io.netty.handler.codec.MessageToMessageDecoder;
-import io.netty.util.AbstractReferenceCounted;
 
+import org.apache.spark.network.util.AbstractFileRegion;
 import org.apache.spark.network.util.ByteArrayWritableChannel;
 import org.apache.spark.network.util.NettyUtils;
 
 /**
- * Provides SASL-based encription for transport channels. The single method exposed by this
+ * Provides SASL-based encryption for transport channels. The single method exposed by this
  * class installs the needed channel handlers on a connected channel.
  */
 class SaslEncryption {
@@ -129,7 +129,7 @@ protected void decode(ChannelHandlerContext ctx, ByteBuf msg, List<Object> out)
   }
 
   @VisibleForTesting
-  static class EncryptedMessage extends AbstractReferenceCounted implements FileRegion {
+  static class EncryptedMessage extends AbstractFileRegion {
 
     private final SaslEncryptionBackend backend;
     private final boolean isByteBuf;
@@ -166,7 +166,7 @@ static class EncryptedMessage extends AbstractReferenceCounted implements FileRe
      * This makes assumptions about how netty treats FileRegion instances, because there's no way
      * to know beforehand what will be the size of the encrypted message. Namely, it assumes
      * that netty will try to transfer data from this message while
-     * <code>transfered() < count()</code>. So these two methods return, technically, wrong data,
+     * <code>transferred() < count()</code>. So these two methods return, technically, wrong data,
      * but netty doesn't know better.
      */
     @Override
@@ -183,10 +183,45 @@ public long position() {
      * Returns an approximation of the amount of data transferred. See {@link #count()}.
      */
     @Override
-    public long transfered() {
+    public long transferred() {
       return transferred;
     }
 
+    @Override
+    public EncryptedMessage touch(Object o) {
+      super.touch(o);
+      if (buf != null) {
+        buf.touch(o);
+      }
+      if (region != null) {
+        region.touch(o);
+      }
+      return this;
+    }
+
+    @Override
+    public EncryptedMessage retain(int increment) {
+      super.retain(increment);
+      if (buf != null) {
+        buf.retain(increment);
+      }
+      if (region != null) {
+        region.retain(increment);
+      }
+      return this;
+    }
+
+    @Override
+    public boolean release(int decrement) {
+      if (region != null) {
+        region.release(decrement);
+      }
+      if (buf != null) {
+        buf.release(decrement);
+      }
+      return super.release(decrement);
+    }
+
     /**
      * Transfers data from the original message to the channel, encrypting it in the process.
      *
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/AbstractFileRegion.java b/common/network-common/src/main/java/org/apache/spark/network/util/AbstractFileRegion.java
new file mode 100644
index 0000000000000..8651297d97ec2
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/AbstractFileRegion.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import io.netty.channel.FileRegion;
+import io.netty.util.AbstractReferenceCounted;
+
+public abstract class AbstractFileRegion extends AbstractReferenceCounted implements FileRegion {
+
+  @Override
+  @SuppressWarnings("deprecation")
+  public final long transfered() {
+    return transferred();
+  }
+
+  @Override
+  public AbstractFileRegion retain() {
+    super.retain();
+    return this;
+  }
+
+  @Override
+  public AbstractFileRegion retain(int increment) {
+    super.retain(increment);
+    return this;
+  }
+
+  @Override
+  public AbstractFileRegion touch() {
+    super.touch();
+    return this;
+  }
+
+  @Override
+  public AbstractFileRegion touch(Object o) {
+    return this;
+  }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
index 50d9651ccbbb2..8e73ab077a5c1 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
@@ -29,7 +29,7 @@
 /**
  * A customized frame decoder that allows intercepting raw data.
  * <p>
- * This behaves like Netty's frame decoder (with harcoded parameters that match this library's
+ * This behaves like Netty's frame decoder (with hard coded parameters that match this library's
  * needs), except it allows an interceptor to be installed to read data directly before it's
  * framed.
  * <p>
diff --git a/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java
index bb1c40c4b0e06..bc94f7ca63a96 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java
@@ -56,7 +56,7 @@ private void testServerToClient(Message msg) {
         NettyUtils.createFrameDecoder(), MessageDecoder.INSTANCE);
 
     while (!serverChannel.outboundMessages().isEmpty()) {
-      clientChannel.writeInbound(serverChannel.readOutbound());
+      clientChannel.writeOneInbound(serverChannel.readOutbound());
     }
 
     assertEquals(1, clientChannel.inboundMessages().size());
@@ -72,7 +72,7 @@ private void testClientToServer(Message msg) {
         NettyUtils.createFrameDecoder(), MessageDecoder.INSTANCE);
 
     while (!clientChannel.outboundMessages().isEmpty()) {
-      serverChannel.writeInbound(clientChannel.readOutbound());
+      serverChannel.writeOneInbound(clientChannel.readOutbound());
     }
 
     assertEquals(1, serverChannel.inboundMessages().size());
diff --git a/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java
index b341c5681e00c..ecb66fcf2ff76 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java
@@ -23,8 +23,7 @@
 
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
-import io.netty.channel.FileRegion;
-import io.netty.util.AbstractReferenceCounted;
+import org.apache.spark.network.util.AbstractFileRegion;
 import org.junit.Test;
 import org.mockito.Mockito;
 
@@ -108,7 +107,7 @@ private ByteBuf doWrite(MessageWithHeader msg, int minExpectedWrites) throws Exc
     return Unpooled.wrappedBuffer(channel.getData());
   }
 
-  private static class TestFileRegion extends AbstractReferenceCounted implements FileRegion {
+  private static class TestFileRegion extends AbstractFileRegion {
 
     private final int writeCount;
     private final int writesPerCall;
@@ -130,7 +129,7 @@ public long position() {
     }
 
     @Override
-    public long transfered() {
+    public long transferred() {
       return 8 * written;
     }
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 9968480ab7658..05335df61a664 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
index 3f2f20b4149f1..9cac7d00cc6b6 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -18,11 +18,11 @@
 package org.apache.spark.network.shuffle;
 
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
-import java.nio.file.Files;
 import java.util.Arrays;
 
 import org.slf4j.Logger;
@@ -165,7 +165,7 @@ private class DownloadCallback implements StreamCallback {
 
     DownloadCallback(int chunkIndex) throws IOException {
       this.targetFile = tempFileManager.createTempFile();
-      this.channel = Channels.newChannel(Files.newOutputStream(targetFile.toPath()));
+      this.channel = Channels.newChannel(new FileOutputStream(targetFile));
       this.chunkIndex = chunkIndex;
     }
 
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
index 23438a08fa094..6d201b8fe8d7d 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
@@ -127,7 +127,7 @@ public void jsonSerializationOfExecutorRegistration() throws IOException {
       mapper.readValue(shuffleJson, ExecutorShuffleInfo.class);
     assertEquals(parsedShuffleInfo, shuffleInfo);
 
-    // Intentionally keep these hard-coded strings in here, to check backwards-compatability.
+    // Intentionally keep these hard-coded strings in here, to check backwards-compatibility.
     // its not legacy yet, but keeping this here in case anybody changes it
     String legacyAppIdJson = "{\"appId\":\"foo\", \"execId\":\"bar\"}";
     assertEquals(appId, mapper.readValue(legacyAppIdJson, AppExecId.class));
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index ec2db6e5bb88c..564e6583c909e 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 2d59c71cc3757..2f04abe8c7e88 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java
index c0b425e729595..37803c7a3b104 100644
--- a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java
+++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java
@@ -34,7 +34,7 @@
  *   <li>{@link String}</li>
  * </ul>
  * The false positive probability ({@code FPP}) of a Bloom filter is defined as the probability that
- * {@linkplain #mightContain(Object)} will erroneously return {@code true} for an object that hasu
+ * {@linkplain #mightContain(Object)} will erroneously return {@code true} for an object that has
  * not actually been put in the {@code BloomFilter}.
  *
  * The implementation is largely based on the {@code BloomFilter} class from Guava.
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index f7e586ee777e1..ba127408e1c59 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index a3772a2620088..1527854730394 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
index f121b1cd745b8..a6b1f7a16d605 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
@@ -66,7 +66,7 @@ public static boolean arrayEquals(
         i += 1;
       }
     }
-    // for architectures that suport unaligned accesses, chew it up 8 bytes at a time
+    // for architectures that support unaligned accesses, chew it up 8 bytes at a time
     if (unaligned || (((leftOffset + i) % 8 == 0) && ((rightOffset + i) % 8 == 0))) {
       while (i <= length - 8) {
         if (Platform.getLong(leftBase, leftOffset + i) !=
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
index cc9cc429643ad..a9603c1aba051 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
@@ -31,8 +31,7 @@
 public class HeapMemoryAllocator implements MemoryAllocator {
 
   @GuardedBy("this")
-  private final Map<Long, LinkedList<WeakReference<MemoryBlock>>> bufferPoolsBySize =
-    new HashMap<>();
+  private final Map<Long, LinkedList<WeakReference<long[]>>> bufferPoolsBySize = new HashMap<>();
 
   private static final int POOLING_THRESHOLD_BYTES = 1024 * 1024;
 
@@ -49,13 +48,14 @@ private boolean shouldPool(long size) {
   public MemoryBlock allocate(long size) throws OutOfMemoryError {
     if (shouldPool(size)) {
       synchronized (this) {
-        final LinkedList<WeakReference<MemoryBlock>> pool = bufferPoolsBySize.get(size);
+        final LinkedList<WeakReference<long[]>> pool = bufferPoolsBySize.get(size);
         if (pool != null) {
           while (!pool.isEmpty()) {
-            final WeakReference<MemoryBlock> blockReference = pool.pop();
-            final MemoryBlock memory = blockReference.get();
-            if (memory != null) {
-              assert (memory.size() == size);
+            final WeakReference<long[]> arrayReference = pool.pop();
+            final long[] array = arrayReference.get();
+            if (array != null) {
+              assert (array.length * 8L >= size);
+              MemoryBlock memory = new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, size);
               if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
                 memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
               }
@@ -76,18 +76,36 @@ public MemoryBlock allocate(long size) throws OutOfMemoryError {
 
   @Override
   public void free(MemoryBlock memory) {
+    assert (memory.obj != null) :
+      "baseObject was null; are you trying to use the on-heap allocator to free off-heap memory?";
+    assert (memory.pageNumber != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
+      "page has already been freed";
+    assert ((memory.pageNumber == MemoryBlock.NO_PAGE_NUMBER)
+            || (memory.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER)) :
+      "TMM-allocated pages must first be freed via TMM.freePage(), not directly in allocator " +
+        "free()";
+
     final long size = memory.size();
     if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
       memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_FREED_VALUE);
     }
+
+    // Mark the page as freed (so we can detect double-frees).
+    memory.pageNumber = MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER;
+
+    // As an additional layer of defense against use-after-free bugs, we mutate the
+    // MemoryBlock to null out its reference to the long[] array.
+    long[] array = (long[]) memory.obj;
+    memory.setObjAndOffset(null, 0);
+
     if (shouldPool(size)) {
       synchronized (this) {
-        LinkedList<WeakReference<MemoryBlock>> pool = bufferPoolsBySize.get(size);
+        LinkedList<WeakReference<long[]>> pool = bufferPoolsBySize.get(size);
         if (pool == null) {
           pool = new LinkedList<>();
           bufferPoolsBySize.put(size, pool);
         }
-        pool.add(new WeakReference<>(memory));
+        pool.add(new WeakReference<>(array));
       }
     } else {
       // Do nothing
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
index cd1d378bc1470..c333857358d30 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
@@ -26,6 +26,25 @@
  */
 public class MemoryBlock extends MemoryLocation {
 
+  /** Special `pageNumber` value for pages which were not allocated by TaskMemoryManagers */
+  public static final int NO_PAGE_NUMBER = -1;
+
+  /**
+   * Special `pageNumber` value for marking pages that have been freed in the TaskMemoryManager.
+   * We set `pageNumber` to this value in TaskMemoryManager.freePage() so that MemoryAllocator
+   * can detect if pages which were allocated by TaskMemoryManager have been freed in the TMM
+   * before being passed to MemoryAllocator.free() (it is an error to allocate a page in
+   * TaskMemoryManager and then directly free it in a MemoryAllocator without going through
+   * the TMM freePage() call).
+   */
+  public static final int FREED_IN_TMM_PAGE_NUMBER = -2;
+
+  /**
+   * Special `pageNumber` value for pages that have been freed by the MemoryAllocator. This allows
+   * us to detect double-frees.
+   */
+  public static final int FREED_IN_ALLOCATOR_PAGE_NUMBER = -3;
+
   private final long length;
 
   /**
@@ -33,7 +52,7 @@ public class MemoryBlock extends MemoryLocation {
    * TaskMemoryManager. This field is public so that it can be modified by the TaskMemoryManager,
    * which lives in a different package.
    */
-  public int pageNumber = -1;
+  public int pageNumber = NO_PAGE_NUMBER;
 
   public MemoryBlock(@Nullable Object obj, long offset, long length) {
     super(obj, offset);
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
index 55bcdf1ed7b06..4368fb615ba1e 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
@@ -38,9 +38,20 @@ public MemoryBlock allocate(long size) throws OutOfMemoryError {
   public void free(MemoryBlock memory) {
     assert (memory.obj == null) :
       "baseObject not null; are you trying to use the off-heap allocator to free on-heap memory?";
+    assert (memory.pageNumber != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
+      "page has already been freed";
+    assert ((memory.pageNumber == MemoryBlock.NO_PAGE_NUMBER)
+            || (memory.pageNumber == MemoryBlock.FREED_IN_TMM_PAGE_NUMBER)) :
+      "TMM-allocated pages must be freed via TMM.freePage(), not directly in allocator free()";
+
     if (MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED) {
       memory.fill(MemoryAllocator.MEMORY_DEBUG_FILL_FREED_VALUE);
     }
     Platform.freeMemory(memory.offset);
+    // As an additional layer of defense against use-after-free bugs, we mutate the
+    // MemoryBlock to reset its pointer.
+    memory.offset = 0;
+    // Mark the page as freed (so we can detect double-frees).
+    memory.pageNumber = MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER;
   }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
index 7ced13d357237..c03caf0076f61 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
@@ -74,4 +74,29 @@ public static byte[] subStringSQL(byte[] bytes, int pos, int len) {
     }
     return Arrays.copyOfRange(bytes, start, end);
   }
+
+  public static byte[] concat(byte[]... inputs) {
+    // Compute the total length of the result
+    int totalLength = 0;
+    for (int i = 0; i < inputs.length; i++) {
+      if (inputs[i] != null) {
+        totalLength += inputs[i].length;
+      } else {
+        return null;
+      }
+    }
+
+    // Allocate a new byte array, and copy the inputs one by one into it
+    final byte[] result = new byte[totalLength];
+    int offset = 0;
+    for (int i = 0; i < inputs.length; i++) {
+      int len = inputs[i].length;
+      Platform.copyMemory(
+        inputs[i], Platform.BYTE_ARRAY_OFFSET,
+        result, Platform.BYTE_ARRAY_OFFSET + offset,
+        len);
+      offset += len;
+    }
+    return result;
+  }
 }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
index 4b141339ec816..62854837b05ed 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -62,6 +62,52 @@ public void overlappingCopyMemory() {
     }
   }
 
+  @Test
+  public void onHeapMemoryAllocatorPoolingReUsesLongArrays() {
+    MemoryBlock block1 = MemoryAllocator.HEAP.allocate(1024 * 1024);
+    Object baseObject1 = block1.getBaseObject();
+    MemoryAllocator.HEAP.free(block1);
+    MemoryBlock block2 = MemoryAllocator.HEAP.allocate(1024 * 1024);
+    Object baseObject2 = block2.getBaseObject();
+    Assert.assertSame(baseObject1, baseObject2);
+    MemoryAllocator.HEAP.free(block2);
+  }
+
+  @Test
+  public void freeingOnHeapMemoryBlockResetsBaseObjectAndOffset() {
+    MemoryBlock block = MemoryAllocator.HEAP.allocate(1024);
+    Assert.assertNotNull(block.getBaseObject());
+    MemoryAllocator.HEAP.free(block);
+    Assert.assertNull(block.getBaseObject());
+    Assert.assertEquals(0, block.getBaseOffset());
+    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, block.pageNumber);
+  }
+
+  @Test
+  public void freeingOffHeapMemoryBlockResetsOffset() {
+    MemoryBlock block = MemoryAllocator.UNSAFE.allocate(1024);
+    Assert.assertNull(block.getBaseObject());
+    Assert.assertNotEquals(0, block.getBaseOffset());
+    MemoryAllocator.UNSAFE.free(block);
+    Assert.assertNull(block.getBaseObject());
+    Assert.assertEquals(0, block.getBaseOffset());
+    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, block.pageNumber);
+  }
+
+  @Test(expected = AssertionError.class)
+  public void onHeapMemoryAllocatorThrowsAssertionErrorOnDoubleFree() {
+    MemoryBlock block = MemoryAllocator.HEAP.allocate(1024);
+    MemoryAllocator.HEAP.free(block);
+    MemoryAllocator.HEAP.free(block);
+  }
+
+  @Test(expected = AssertionError.class)
+  public void offHeapMemoryAllocatorThrowsAssertionErrorOnDoubleFree() {
+    MemoryBlock block = MemoryAllocator.UNSAFE.allocate(1024);
+    MemoryAllocator.UNSAFE.free(block);
+    MemoryAllocator.UNSAFE.free(block);
+  }
+
   @Test
   public void memoryDebugFillEnabledInTest() {
     Assert.assertTrue(MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED);
@@ -71,9 +117,11 @@ public void memoryDebugFillEnabledInTest() {
       MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
 
     MemoryBlock onheap1 = MemoryAllocator.HEAP.allocate(1024 * 1024);
+    Object onheap1BaseObject = onheap1.getBaseObject();
+    long onheap1BaseOffset = onheap1.getBaseOffset();
     MemoryAllocator.HEAP.free(onheap1);
     Assert.assertEquals(
-      Platform.getByte(onheap1.getBaseObject(), onheap1.getBaseOffset()),
+      Platform.getByte(onheap1BaseObject, onheap1BaseOffset),
       MemoryAllocator.MEMORY_DEBUG_FILL_FREED_VALUE);
     MemoryBlock onheap2 = MemoryAllocator.HEAP.allocate(1024 * 1024);
     Assert.assertEquals(
diff --git a/core/pom.xml b/core/pom.xml
index fc394d2f0feaa..c7925ea71f10b 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -355,6 +355,14 @@
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-launcher_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+    </dependency>
+
     <!--
       This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
       them will yield errors.
diff --git a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
index 2dff241900e82..115e1fbb79a2e 100644
--- a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
+++ b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
@@ -88,7 +88,7 @@ public void spill() throws IOException {
    * `LongArray` is too large to fit in a single page. The caller side should take care of these
    * two exceptions, or make sure the `size` is small enough that won't trigger exceptions.
    *
-   * @throws OutOfMemoryError
+   * @throws SparkOutOfMemoryError
    * @throws TooLargePageException
    */
   public LongArray allocateArray(long size) {
@@ -154,6 +154,7 @@ private void throwOom(final MemoryBlock page, final long required) {
       taskMemoryManager.freePage(page, this);
     }
     taskMemoryManager.showMemoryUsage();
-    throw new OutOfMemoryError("Unable to acquire " + required + " bytes of memory, got " + got);
+    throw new SparkOutOfMemoryError("Unable to acquire " + required + " bytes of memory, got " +
+      got);
   }
 }
diff --git a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java
new file mode 100644
index 0000000000000..ca00ca58e9713
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.memory;
+
+import org.apache.spark.annotation.Private;
+
+/**
+ * This exception is thrown when a task can not acquire memory from the Memory manager.
+ * Instead of throwing {@link OutOfMemoryError}, which kills the executor,
+ * we should use throw this exception, which just kills the current task.
+ */
+@Private
+public final class SparkOutOfMemoryError extends OutOfMemoryError {
+
+    public SparkOutOfMemoryError(String s) {
+        super(s);
+    }
+
+    public SparkOutOfMemoryError(OutOfMemoryError e) {
+        super(e.getMessage());
+    }
+}
diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index f6b5ea3c0ad26..632d718062212 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -192,7 +192,7 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) {
             throw new RuntimeException(e.getMessage());
           } catch (IOException e) {
             logger.error("error while calling spill() on " + c, e);
-            throw new OutOfMemoryError("error while calling spill() on " + c + " : "
+            throw new SparkOutOfMemoryError("error while calling spill() on " + c + " : "
               + e.getMessage());
           }
         }
@@ -213,7 +213,7 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) {
           throw new RuntimeException(e.getMessage());
         } catch (IOException e) {
           logger.error("error while calling spill() on " + consumer, e);
-          throw new OutOfMemoryError("error while calling spill() on " + consumer + " : "
+          throw new SparkOutOfMemoryError("error while calling spill() on " + consumer + " : "
             + e.getMessage());
         }
       }
@@ -321,8 +321,12 @@ public MemoryBlock allocatePage(long size, MemoryConsumer consumer) {
    * Free a block of memory allocated via {@link TaskMemoryManager#allocatePage}.
    */
   public void freePage(MemoryBlock page, MemoryConsumer consumer) {
-    assert (page.pageNumber != -1) :
+    assert (page.pageNumber != MemoryBlock.NO_PAGE_NUMBER) :
       "Called freePage() on memory that wasn't allocated with allocatePage()";
+    assert (page.pageNumber != MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER) :
+      "Called freePage() on a memory block that has already been freed";
+    assert (page.pageNumber != MemoryBlock.FREED_IN_TMM_PAGE_NUMBER) :
+            "Called freePage() on a memory block that has already been freed";
     assert(allocatedPages.get(page.pageNumber));
     pageTable[page.pageNumber] = null;
     synchronized (this) {
@@ -332,6 +336,10 @@ public void freePage(MemoryBlock page, MemoryConsumer consumer) {
       logger.trace("Freed page number {} ({} bytes)", page.pageNumber, page.size());
     }
     long pageSize = page.size();
+    // Clear the page number before passing the block to the MemoryAllocator's free().
+    // Doing this allows the MemoryAllocator to detect when a TaskMemoryManager-managed
+    // page has been inappropriately directly freed without calling TMM.freePage().
+    page.pageNumber = MemoryBlock.FREED_IN_TMM_PAGE_NUMBER;
     memoryManager.tungstenMemoryAllocator().free(page);
     releaseExecutionMemory(pageSize, consumer);
   }
@@ -358,7 +366,7 @@ public long encodePageNumberAndOffset(MemoryBlock page, long offsetInPage) {
 
   @VisibleForTesting
   public static long encodePageNumberAndOffset(int pageNumber, long offsetInPage) {
-    assert (pageNumber != -1) : "encodePageNumberAndOffset called with invalid page";
+    assert (pageNumber >= 0) : "encodePageNumberAndOffset called with invalid page";
     return (((long) pageNumber) << OFFSET_BITS) | (offsetInPage & MASK_LONG_LOWER_51_BITS);
   }
 
@@ -424,6 +432,7 @@ public long cleanUpAllAllocatedMemory() {
       for (MemoryBlock page : pageTable) {
         if (page != null) {
           logger.debug("unreleased page: " + page + " in task " + taskAttemptId);
+          page.pageNumber = MemoryBlock.FREED_IN_TMM_PAGE_NUMBER;
           memoryManager.tungstenMemoryAllocator().free(page);
         }
       }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index a9b5236ab8173..323a5d3c52831 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -18,9 +18,9 @@
 package org.apache.spark.shuffle.sort;
 
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
-import java.nio.channels.FileChannel;
-import static java.nio.file.StandardOpenOption.*;
 import javax.annotation.Nullable;
 
 import scala.None$;
@@ -75,6 +75,7 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   private static final Logger logger = LoggerFactory.getLogger(BypassMergeSortShuffleWriter.class);
 
   private final int fileBufferSize;
+  private final boolean transferToEnabled;
   private final int numPartitions;
   private final BlockManager blockManager;
   private final Partitioner partitioner;
@@ -106,6 +107,7 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
       SparkConf conf) {
     // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
     this.fileBufferSize = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024;
+    this.transferToEnabled = conf.getBoolean("spark.file.transferTo", true);
     this.blockManager = blockManager;
     final ShuffleDependency<K, V, V> dep = handle.dependency();
     this.mapId = mapId;
@@ -186,21 +188,17 @@ private long[] writePartitionedFile(File outputFile) throws IOException {
       return lengths;
     }
 
-    // This file needs to opened in append mode in order to work around a Linux kernel bug that
-    // affects transferTo; see SPARK-3948 for more details.
-    final FileChannel out = FileChannel.open(outputFile.toPath(), WRITE, APPEND, CREATE);
+    final FileOutputStream out = new FileOutputStream(outputFile, true);
     final long writeStartTime = System.nanoTime();
     boolean threwException = true;
     try {
       for (int i = 0; i < numPartitions; i++) {
         final File file = partitionWriterSegments[i].file();
         if (file.exists()) {
-          final FileChannel in = FileChannel.open(file.toPath(), READ);
+          final FileInputStream in = new FileInputStream(file);
           boolean copyThrewException = true;
           try {
-            long size = in.size();
-            Utils.copyFileStreamNIO(in, out, 0, size);
-            lengths[i] = size;
+            lengths[i] = Utils.copyStream(in, out, false, transferToEnabled);
             copyThrewException = false;
           } finally {
             Closeables.close(in, copyThrewException);
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
index e80f9734ecf7b..c3a07b2abf896 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -33,6 +33,7 @@
 import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.internal.config.package$;
 import org.apache.spark.memory.MemoryConsumer;
+import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.memory.TooLargePageException;
 import org.apache.spark.serializer.DummySerializerInstance;
@@ -337,7 +338,7 @@ private void growPointerArrayIfNecessary() throws IOException {
         // The pointer array is too big to fix in a single page, spill.
         spill();
         return;
-      } catch (OutOfMemoryError e) {
+      } catch (SparkOutOfMemoryError e) {
         // should have trigger spilling
         if (!inMemSorter.hasSpaceForAnotherRecord()) {
           logger.error("Unable to grow the pointer array");
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index e9c2a69c47cba..4839d04522f10 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -20,7 +20,6 @@
 import javax.annotation.Nullable;
 import java.io.*;
 import java.nio.channels.FileChannel;
-import static java.nio.file.StandardOpenOption.*;
 import java.util.Iterator;
 
 import scala.Option;
@@ -291,7 +290,7 @@ private long[] mergeSpills(SpillInfo[] spills, File outputFile) throws IOExcepti
     final boolean encryptionEnabled = blockManager.serializerManager().encryptionEnabled();
     try {
       if (spills.length == 0) {
-        java.nio.file.Files.newOutputStream(outputFile.toPath()).close(); // Create an empty file
+        new FileOutputStream(outputFile).close(); // Create an empty file
         return new long[partitioner.numPartitions()];
       } else if (spills.length == 1) {
         // Here, we don't need to perform any metrics updates because the bytes written to this
@@ -368,7 +367,7 @@ private long[] mergeSpillsWithFileStream(
     final InputStream[] spillInputStreams = new InputStream[spills.length];
 
     final OutputStream bos = new BufferedOutputStream(
-            java.nio.file.Files.newOutputStream(outputFile.toPath()),
+            new FileOutputStream(outputFile),
             outputBufferSizeInBytes);
     // Use a counting output stream to avoid having to close the underlying file and ask
     // the file system for its size after each partition is written.
@@ -443,11 +442,11 @@ private long[] mergeSpillsWithTransferTo(SpillInfo[] spills, File outputFile) th
     boolean threwException = true;
     try {
       for (int i = 0; i < spills.length; i++) {
-        spillInputChannels[i] = FileChannel.open(spills[i].file.toPath(), READ);
+        spillInputChannels[i] = new FileInputStream(spills[i].file).getChannel();
       }
       // This file needs to opened in append mode in order to work around a Linux kernel bug that
       // affects transferTo; see SPARK-3948 for more details.
-      mergedFileOutputChannel = FileChannel.open(outputFile.toPath(), WRITE, CREATE, APPEND);
+      mergedFileOutputChannel = new FileOutputStream(outputFile, true).getChannel();
 
       long bytesWrittenToMergedFile = 0;
       for (int partition = 0; partition < numPartitions; partition++) {
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 4fadfe36cd716..5f0045507aaab 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -347,6 +347,8 @@ public long spill(long numBytes) throws IOException {
           return 0L;
         }
 
+        updatePeakMemoryUsed();
+
         // TODO: use existing ShuffleWriteMetrics
         ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics();
 
@@ -424,6 +426,7 @@ public MapIterator iterator() {
    * `lookup()`, the behavior of the returned iterator is undefined.
    */
   public MapIterator destructiveIterator() {
+    updatePeakMemoryUsed();
     return new MapIterator(numValues, loc, true);
   }
 
@@ -879,6 +882,7 @@ public LongArray getArray() {
    * Reset this map to initialized state.
    */
   public void reset() {
+    updatePeakMemoryUsed();
     numKeys = 0;
     numValues = 0;
     freeArray(longArray);
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 8b8e15e3f78ed..66118f454159b 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -25,6 +25,7 @@
 import java.util.function.Supplier;
 
 import com.google.common.annotations.VisibleForTesting;
+import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -349,7 +350,7 @@ private void growPointerArrayIfNecessary() throws IOException {
         // The pointer array is too big to fix in a single page, spill.
         spill();
         return;
-      } catch (OutOfMemoryError e) {
+      } catch (SparkOutOfMemoryError e) {
         // should have trigger spilling
         if (!inMemSorter.hasSpaceForAnotherRecord()) {
           logger.error("Unable to grow the pointer array");
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
index 3bb87a6ed653d..951d076420ee6 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
@@ -24,6 +24,7 @@
 
 import org.apache.spark.TaskContext;
 import org.apache.spark.memory.MemoryConsumer;
+import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.UnsafeAlignedOffset;
@@ -212,7 +213,7 @@ public boolean hasSpaceForAnotherRecord() {
 
   public void expandPointerArray(LongArray newArray) {
     if (newArray.size() < array.size()) {
-      throw new OutOfMemoryError("Not enough memory to grow pointer array");
+      throw new SparkOutOfMemoryError("Not enough memory to grow pointer array");
     }
     Platform.copyMemory(
       array.getBaseObject(),
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-common.js b/core/src/main/resources/org/apache/spark/ui/static/historypage-common.js
index 55d540d8317a0..4cfe46ec914ae 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage-common.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-common.js
@@ -16,9 +16,10 @@
  */
 
 $(document).ready(function() {
-    if ($('#last-updated').length) {
-      var lastUpdatedMillis = Number($('#last-updated').text());
-      var updatedDate = new Date(lastUpdatedMillis);
-      $('#last-updated').text(updatedDate.toLocaleDateString()+", "+updatedDate.toLocaleTimeString())
-    }
+  if ($('#last-updated').length) {
+    var lastUpdatedMillis = Number($('#last-updated').text());
+    $('#last-updated').text(formatTimeMillis(lastUpdatedMillis));
+  }
+
+  $('#time-zone').text(getTimeZone());
 });
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index aa7e860372553..2cde66b081a1c 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -37,11 +37,6 @@ function makeIdNumeric(id) {
   return resl;
 }
 
-function formatDate(date) {
-  if (date <= 0) return "-";
-  else return date.split(".")[0].replace("T", " ");
-}
-
 function getParameterByName(name, searchString) {
   var regex = new RegExp("[\\?&]" + name + "=([^&#]*)"),
   results = regex.exec(searchString);
@@ -129,9 +124,9 @@ $(document).ready(function() {
         var num = app["attempts"].length;
         for (j in app["attempts"]) {
           var attempt = app["attempts"][j];
-          attempt["startTime"] = formatDate(attempt["startTime"]);
-          attempt["endTime"] = formatDate(attempt["endTime"]);
-          attempt["lastUpdated"] = formatDate(attempt["lastUpdated"]);
+          attempt["startTime"] = formatTimeMillis(attempt["startTimeEpoch"]);
+          attempt["endTime"] = formatTimeMillis(attempt["endTimeEpoch"]);
+          attempt["lastUpdated"] = formatTimeMillis(attempt["lastUpdatedEpoch"]);
           attempt["log"] = uiRoot + "/api/v1/applications/" + id + "/" +
             (attempt.hasOwnProperty("attemptId") ? attempt["attemptId"] + "/" : "") + "logs";
           attempt["durationMillisec"] = attempt["duration"];
diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js
index edc0ee2ce181d..4f63f6413d6de 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/utils.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js
@@ -46,3 +46,31 @@ function formatBytes(bytes, type) {
     var i = Math.floor(Math.log(bytes) / Math.log(k));
     return parseFloat((bytes / Math.pow(k, i)).toFixed(dm)) + ' ' + sizes[i];
 }
+
+function padZeroes(num) {
+  return ("0" + num).slice(-2);
+}
+
+function formatTimeMillis(timeMillis) {
+  if (timeMillis <= 0) {
+    return "-";
+  } else {
+    var dt = new Date(timeMillis);
+    return dt.getFullYear() + "-" +
+      padZeroes(dt.getMonth() + 1) + "-" +
+      padZeroes(dt.getDate()) + " " +
+      padZeroes(dt.getHours()) + ":" +
+      padZeroes(dt.getMinutes()) + ":" +
+      padZeroes(dt.getSeconds());
+  }
+}
+
+function getTimeZone() {
+  try {
+    return Intl.DateTimeFormat().resolvedOptions().timeZone;
+  } catch(ex) {
+    // Get time zone from a string representing the date,
+    // eg. "Thu Nov 16 2017 01:13:32 GMT+0800 (CST)" -> "CST"
+    return new Date().toString().match(/\((.*)\)/)[1];
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 5bc2d9ef1b949..6c59038f2a6c1 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -141,7 +141,11 @@ private[spark] class ExecutorAllocationManager(
   private val removeTimes = new mutable.HashMap[String, Long]
 
   // Polling loop interval (ms)
-  private val intervalMillis: Long = 100
+  private val intervalMillis: Long = if (Utils.isTesting) {
+      conf.getLong(TESTING_SCHEDULE_INTERVAL_KEY, 100)
+    } else {
+      100
+    }
 
   // Clock used to schedule when executors should be added and removed
   private var clock: Clock = new SystemClock()
@@ -191,8 +195,11 @@ private[spark] class ExecutorAllocationManager(
       throw new SparkException(
         "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout must be > 0!")
     }
-    if (executorIdleTimeoutS <= 0) {
-      throw new SparkException("spark.dynamicAllocation.executorIdleTimeout must be > 0!")
+    if (executorIdleTimeoutS < 0) {
+      throw new SparkException("spark.dynamicAllocation.executorIdleTimeout must be >= 0!")
+    }
+    if (cachedExecutorIdleTimeoutS < 0) {
+      throw new SparkException("spark.dynamicAllocation.cachedExecutorIdleTimeout must be >= 0!")
     }
     // Require external shuffle service for dynamic allocation
     // Otherwise, we may lose shuffle files when killing executors
@@ -856,4 +863,5 @@ private[spark] class ExecutorAllocationManager(
 
 private object ExecutorAllocationManager {
   val NOT_SET = Long.MaxValue
+  val TESTING_SCHEDULE_INTERVAL_KEY = "spark.testing.dynamicAllocation.scheduleInterval"
 }
diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index debbd8d7c26c9..437bbaae1968b 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -21,6 +21,7 @@ import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
+import scala.math.log10
 import scala.reflect.ClassTag
 import scala.util.hashing.byteswap32
 
@@ -42,7 +43,9 @@ object Partitioner {
   /**
    * Choose a partitioner to use for a cogroup-like operation between a number of RDDs.
    *
-   * If any of the RDDs already has a partitioner, choose that one.
+   * If any of the RDDs already has a partitioner, and the number of partitions of the
+   * partitioner is either greater than or is less than and within a single order of
+   * magnitude of the max number of upstream partitions, choose that one.
    *
    * Otherwise, we use a default HashPartitioner. For the number of partitions, if
    * spark.default.parallelism is set, then we'll use the value from SparkContext
@@ -57,8 +60,15 @@ object Partitioner {
   def defaultPartitioner(rdd: RDD[_], others: RDD[_]*): Partitioner = {
     val rdds = (Seq(rdd) ++ others)
     val hasPartitioner = rdds.filter(_.partitioner.exists(_.numPartitions > 0))
-    if (hasPartitioner.nonEmpty) {
-      hasPartitioner.maxBy(_.partitions.length).partitioner.get
+
+    val hasMaxPartitioner: Option[RDD[_]] = if (hasPartitioner.nonEmpty) {
+      Some(hasPartitioner.maxBy(_.partitions.length))
+    } else {
+      None
+    }
+
+    if (isEligiblePartitioner(hasMaxPartitioner, rdds)) {
+      hasMaxPartitioner.get.partitioner.get
     } else {
       if (rdd.context.conf.contains("spark.default.parallelism")) {
         new HashPartitioner(rdd.context.defaultParallelism)
@@ -67,6 +77,22 @@ object Partitioner {
       }
     }
   }
+
+  /**
+   * Returns true if the number of partitions of the RDD is either greater
+   * than or is less than and within a single order of magnitude of the
+   * max number of upstream partitions;
+   * otherwise, returns false
+   */
+  private def isEligiblePartitioner(
+     hasMaxPartitioner: Option[RDD[_]],
+     rdds: Seq[RDD[_]]): Boolean = {
+    if (hasMaxPartitioner.isEmpty) {
+      return false
+    }
+    val maxPartitions = rdds.map(_.partitions.length).max
+    log10(maxPartitions) - log10(hasMaxPartitioner.get.getNumPartitions) < 1
+  }
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 2480e56b72ccf..4c1dbe3ffb4ad 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -19,6 +19,7 @@ package org.apache.spark
 
 import java.lang.{Byte => JByte}
 import java.net.{Authenticator, PasswordAuthentication}
+import java.nio.charset.StandardCharsets.UTF_8
 import java.security.{KeyStore, SecureRandom}
 import java.security.cert.X509Certificate
 import javax.net.ssl._
@@ -26,10 +27,11 @@ import javax.net.ssl._
 import com.google.common.hash.HashCodes
 import com.google.common.io.Files
 import org.apache.hadoop.io.Text
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
-import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.sasl.SecretKeyHolder
 import org.apache.spark.util.Utils
 
@@ -225,7 +227,6 @@ private[spark] class SecurityManager(
   setViewAclsGroups(sparkConf.get("spark.ui.view.acls.groups", ""));
   setModifyAclsGroups(sparkConf.get("spark.modify.acls.groups", ""));
 
-  private val secretKey = generateSecretKey()
   logInfo("SecurityManager: authentication " + (if (authOn) "enabled" else "disabled") +
     "; ui acls " + (if (aclsOn) "enabled" else "disabled") +
     "; users  with view permissions: " + viewAcls.toString() +
@@ -416,50 +417,6 @@ private[spark] class SecurityManager(
 
   def getIOEncryptionKey(): Option[Array[Byte]] = ioEncryptionKey
 
-  /**
-   * Generates or looks up the secret key.
-   *
-   * The way the key is stored depends on the Spark deployment mode. Yarn
-   * uses the Hadoop UGI.
-   *
-   * For non-Yarn deployments, If the config variable is not set
-   * we throw an exception.
-   */
-  private def generateSecretKey(): String = {
-    if (!isAuthenticationEnabled) {
-      null
-    } else if (SparkHadoopUtil.get.isYarnMode) {
-      // In YARN mode, the secure cookie will be created by the driver and stashed in the
-      // user's credentials, where executors can get it. The check for an array of size 0
-      // is because of the test code in YarnSparkHadoopUtilSuite.
-      val secretKey = SparkHadoopUtil.get.getSecretKeyFromUserCredentials(SECRET_LOOKUP_KEY)
-      if (secretKey == null || secretKey.length == 0) {
-        logDebug("generateSecretKey: yarn mode, secret key from credentials is null")
-        val rnd = new SecureRandom()
-        val length = sparkConf.getInt("spark.authenticate.secretBitLength", 256) / JByte.SIZE
-        val secret = new Array[Byte](length)
-        rnd.nextBytes(secret)
-
-        val cookie = HashCodes.fromBytes(secret).toString()
-        SparkHadoopUtil.get.addSecretKeyToUserCredentials(SECRET_LOOKUP_KEY, cookie)
-        cookie
-      } else {
-        new Text(secretKey).toString
-      }
-    } else {
-      // user must have set spark.authenticate.secret config
-      // For Master/Worker, auth secret is in conf; for Executors, it is in env variable
-      Option(sparkConf.getenv(SecurityManager.ENV_AUTH_SECRET))
-        .orElse(sparkConf.getOption(SecurityManager.SPARK_AUTH_SECRET_CONF)) match {
-        case Some(value) => value
-        case None =>
-          throw new IllegalArgumentException(
-            "Error: a secret key must be specified via the " +
-              SecurityManager.SPARK_AUTH_SECRET_CONF + " config")
-      }
-    }
-  }
-
   /**
    * Check to see if Acls for the UI are enabled
    * @return true if UI authentication is enabled, otherwise false
@@ -542,7 +499,51 @@ private[spark] class SecurityManager(
    * Gets the secret key.
    * @return the secret key as a String if authentication is enabled, otherwise returns null
    */
-  def getSecretKey(): String = secretKey
+  def getSecretKey(): String = {
+    if (isAuthenticationEnabled) {
+      val creds = UserGroupInformation.getCurrentUser().getCredentials()
+      Option(creds.getSecretKey(SECRET_LOOKUP_KEY))
+        .map { bytes => new String(bytes, UTF_8) }
+        .orElse(Option(sparkConf.getenv(ENV_AUTH_SECRET)))
+        .orElse(sparkConf.getOption(SPARK_AUTH_SECRET_CONF))
+        .getOrElse {
+          throw new IllegalArgumentException(
+            s"A secret key must be specified via the $SPARK_AUTH_SECRET_CONF config")
+        }
+    } else {
+      null
+    }
+  }
+
+  /**
+   * Initialize the authentication secret.
+   *
+   * If authentication is disabled, do nothing.
+   *
+   * In YARN mode, generate a new secret and store it in the current user's credentials.
+   *
+   * In other modes, assert that the auth secret is set in the configuration.
+   */
+  def initializeAuth(): Unit = {
+    if (!sparkConf.get(NETWORK_AUTH_ENABLED)) {
+      return
+    }
+
+    if (sparkConf.get(SparkLauncher.SPARK_MASTER, null) != "yarn") {
+      require(sparkConf.contains(SPARK_AUTH_SECRET_CONF),
+        s"A secret key must be specified via the $SPARK_AUTH_SECRET_CONF config.")
+      return
+    }
+
+    val rnd = new SecureRandom()
+    val length = sparkConf.getInt("spark.authenticate.secretBitLength", 256) / JByte.SIZE
+    val secretBytes = new Array[Byte](length)
+    rnd.nextBytes(secretBytes)
+
+    val creds = new Credentials()
+    creds.addSecretKey(SECRET_LOOKUP_KEY, secretBytes)
+    UserGroupInformation.getCurrentUser().addCredentials(creds)
+  }
 
   // Default SecurityManager only has a single secret key, so ignore appId.
   override def getSaslUser(appId: String): String = getSaslUser()
@@ -551,13 +552,12 @@ private[spark] class SecurityManager(
 
 private[spark] object SecurityManager {
 
-  val SPARK_AUTH_CONF: String = "spark.authenticate"
-  val SPARK_AUTH_SECRET_CONF: String = "spark.authenticate.secret"
+  val SPARK_AUTH_CONF = NETWORK_AUTH_ENABLED.key
+  val SPARK_AUTH_SECRET_CONF = "spark.authenticate.secret"
   // This is used to set auth secret to an executor's env variable. It should have the same
   // value as SPARK_AUTH_SECRET_CONF set in SparkConf
   val ENV_AUTH_SECRET = "_SPARK_AUTH_SECRET"
 
   // key used to store the spark secret in the Hadoop UGI
-  val SECRET_LOOKUP_KEY = "sparkCookie"
-
+  val SECRET_LOOKUP_KEY = new Text("sparkCookie")
 }
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 0e08ff65e4784..d77303e6fdf8b 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -564,6 +564,14 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
     val encryptionEnabled = get(NETWORK_ENCRYPTION_ENABLED) || get(SASL_ENCRYPTION_ENABLED)
     require(!encryptionEnabled || get(NETWORK_AUTH_ENABLED),
       s"${NETWORK_AUTH_ENABLED.key} must be enabled when enabling encryption.")
+
+    val executorTimeoutThreshold = getTimeAsSeconds("spark.network.timeout", "120s")
+    val executorHeartbeatInterval = getTimeAsSeconds("spark.executor.heartbeatInterval", "10s")
+    // If spark.executor.heartbeatInterval bigger than spark.network.timeout,
+    // it will almost always cause ExecutorLostFailure. See SPARK-22754.
+    require(executorTimeoutThreshold > executorHeartbeatInterval, "The value of " +
+      s"spark.network.timeout=${executorTimeoutThreshold}s must be no less than the value of " +
+      s"spark.executor.heartbeatInterval=${executorHeartbeatInterval}s.")
   }
 
   /**
@@ -668,7 +676,11 @@ private[spark] object SparkConf extends Logging {
     MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM.key -> Seq(
       AlternateConfig("spark.reducer.maxReqSizeShuffleToMem", "2.3")),
     LISTENER_BUS_EVENT_QUEUE_CAPACITY.key -> Seq(
-      AlternateConfig("spark.scheduler.listenerbus.eventqueue.size", "2.3"))
+      AlternateConfig("spark.scheduler.listenerbus.eventqueue.size", "2.3")),
+    DRIVER_MEMORY_OVERHEAD.key -> Seq(
+      AlternateConfig("spark.yarn.driver.memoryOverhead", "2.3")),
+    EXECUTOR_MEMORY_OVERHEAD.key -> Seq(
+      AlternateConfig("spark.yarn.executor.memoryOverhead", "2.3"))
   )
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 7ede95ec3c515..92af10cfa41c6 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -18,7 +18,6 @@
 package org.apache.spark
 
 import java.io._
-import java.lang.reflect.Constructor
 import java.net.URI
 import java.util.{Arrays, Locale, Properties, ServiceLoader, UUID}
 import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap}
@@ -56,11 +55,10 @@ import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, StandaloneSchedulerBackend}
 import org.apache.spark.scheduler.local.LocalSchedulerBackend
-import org.apache.spark.status.{AppStatusPlugin, AppStatusStore}
+import org.apache.spark.status.AppStatusStore
 import org.apache.spark.storage._
 import org.apache.spark.storage.BlockManagerMessages.TriggerThreadDump
 import org.apache.spark.ui.{ConsoleProgressBar, SparkUI}
-import org.apache.spark.ui.jobs.JobProgressListener
 import org.apache.spark.util._
 
 /**
@@ -197,7 +195,6 @@ class SparkContext(config: SparkConf) extends Logging {
   private var _eventLogCodec: Option[String] = None
   private var _listenerBus: LiveListenerBus = _
   private var _env: SparkEnv = _
-  private var _jobProgressListener: JobProgressListener = _
   private var _statusTracker: SparkStatusTracker = _
   private var _progressBar: Option[ConsoleProgressBar] = None
   private var _ui: Option[SparkUI] = None
@@ -272,8 +269,6 @@ class SparkContext(config: SparkConf) extends Logging {
     val map: ConcurrentMap[Int, RDD[_]] = new MapMaker().weakValues().makeMap[Int, RDD[_]]()
     map.asScala
   }
-  private[spark] def jobProgressListener: JobProgressListener = _jobProgressListener
-
   def statusTracker: SparkStatusTracker = _statusTracker
 
   private[spark] def progressBar: Option[ConsoleProgressBar] = _progressBar
@@ -422,18 +417,12 @@ class SparkContext(config: SparkConf) extends Logging {
       }
     }
 
-    if (master == "yarn" && deployMode == "client") System.setProperty("SPARK_YARN_MODE", "true")
-
     _listenerBus = new LiveListenerBus(_conf)
 
-    // "_jobProgressListener" should be set up before creating SparkEnv because when creating
-    // "SparkEnv", some messages will be posted to "listenerBus" and we should not miss them.
-    _jobProgressListener = new JobProgressListener(_conf)
-    listenerBus.addToStatusQueue(jobProgressListener)
-
     // Initialize the app status store and listener before SparkEnv is created so that it gets
     // all events.
-    _statusStore = AppStatusStore.createLiveStore(conf, l => listenerBus.addToStatusQueue(l))
+    _statusStore = AppStatusStore.createLiveStore(conf)
+    listenerBus.addToStatusQueue(_statusStore.listener.get)
 
     // Create the Spark execution environment (cache, map output tracker, etc)
     _env = createSparkEnv(_conf, isLocal, listenerBus)
@@ -445,7 +434,7 @@ class SparkContext(config: SparkConf) extends Logging {
       _conf.set("spark.repl.class.uri", replUri)
     }
 
-    _statusTracker = new SparkStatusTracker(this)
+    _statusTracker = new SparkStatusTracker(this, _statusStore)
 
     _progressBar =
       if (_conf.get(UI_SHOW_CONSOLE_PROGRESS) && !log.isInfoEnabled) {
@@ -462,14 +451,9 @@ class SparkContext(config: SparkConf) extends Logging {
         // For tests, do not enable the UI
         None
       }
-    _ui.foreach { ui =>
-      // Load any plugins that might want to modify the UI.
-      AppStatusPlugin.loadPlugins().foreach(_.setupUI(ui))
-
-      // Bind the UI before starting the task scheduler to communicate
-      // the bound port to the cluster manager properly
-      ui.bind()
-    }
+    // Bind the UI before starting the task scheduler to communicate
+    // the bound port to the cluster manager properly
+    _ui.foreach(_.bind())
 
     _hadoopConfiguration = SparkHadoopUtil.get.newConfiguration(_conf)
 
@@ -1596,10 +1580,10 @@ class SparkContext(config: SparkConf) extends Logging {
 
   private[spark] def getExecutorIds(): Seq[String] = {
     schedulerBackend match {
-      case b: CoarseGrainedSchedulerBackend =>
+      case b: ExecutorAllocationClient =>
         b.getExecutorIds()
       case _ =>
-        logWarning("Requesting executors is only supported in coarse-grained mode")
+        logWarning("Requesting executors is not supported by current scheduler.")
         Nil
     }
   }
@@ -1625,10 +1609,10 @@ class SparkContext(config: SparkConf) extends Logging {
       hostToLocalTaskCount: scala.collection.immutable.Map[String, Int]
     ): Boolean = {
     schedulerBackend match {
-      case b: CoarseGrainedSchedulerBackend =>
+      case b: ExecutorAllocationClient =>
         b.requestTotalExecutors(numExecutors, localityAwareTasks, hostToLocalTaskCount)
       case _ =>
-        logWarning("Requesting executors is only supported in coarse-grained mode")
+        logWarning("Requesting executors is not supported by current scheduler.")
         false
     }
   }
@@ -1641,10 +1625,10 @@ class SparkContext(config: SparkConf) extends Logging {
   @DeveloperApi
   def requestExecutors(numAdditionalExecutors: Int): Boolean = {
     schedulerBackend match {
-      case b: CoarseGrainedSchedulerBackend =>
+      case b: ExecutorAllocationClient =>
         b.requestExecutors(numAdditionalExecutors)
       case _ =>
-        logWarning("Requesting executors is only supported in coarse-grained mode")
+        logWarning("Requesting executors is not supported by current scheduler.")
         false
     }
   }
@@ -1663,10 +1647,10 @@ class SparkContext(config: SparkConf) extends Logging {
   @DeveloperApi
   def killExecutors(executorIds: Seq[String]): Boolean = {
     schedulerBackend match {
-      case b: CoarseGrainedSchedulerBackend =>
+      case b: ExecutorAllocationClient =>
         b.killExecutors(executorIds, replace = false, force = true).nonEmpty
       case _ =>
-        logWarning("Killing executors is only supported in coarse-grained mode")
+        logWarning("Killing executors is not supported by current scheduler.")
         false
     }
   }
@@ -1701,10 +1685,10 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   private[spark] def killAndReplaceExecutor(executorId: String): Boolean = {
     schedulerBackend match {
-      case b: CoarseGrainedSchedulerBackend =>
+      case b: ExecutorAllocationClient =>
         b.killExecutors(Seq(executorId), replace = true, force = true).nonEmpty
       case _ =>
-        logWarning("Killing executors is only supported in coarse-grained mode")
+        logWarning("Killing executors is not supported by current scheduler.")
         false
     }
   }
@@ -1851,7 +1835,11 @@ class SparkContext(config: SparkConf) extends Logging {
         Utils.validateURL(uri)
         uri.getScheme match {
           // A JAR file which exists only on the driver node
-          case null | "file" => addJarFile(new File(uri.getPath))
+          case null =>
+            // SPARK-22585 path without schema is not url encoded
+            addJarFile(new File(uri.getRawPath))
+          // A JAR file which exists only on the driver node
+          case "file" => addJarFile(new File(uri.getPath))
           // A JAR file which exists locally on every worker node
           case "local" => "file:" + uri.getPath
           case _ => path
@@ -1991,7 +1979,6 @@ class SparkContext(config: SparkConf) extends Logging {
     // `SparkContext` is stopped.
     localProperties.remove()
     // Unset YARN mode system env variable, to allow switching between cluster types.
-    System.clearProperty("SPARK_YARN_MODE")
     SparkContext.clearActiveContext()
     logInfo("Successfully stopped SparkContext")
   }
@@ -2320,7 +2307,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
-   * Clean a closure to make it ready to serialized and send to tasks
+   * Clean a closure to make it ready to be serialized and send to tasks
    * (removes unreferenced variables in $outer's, updates REPL variables)
    * If <tt>checkSerializable</tt> is set, <tt>clean</tt> will also proactively
    * check to see if <tt>f</tt> is serializable and throw a <tt>SparkException</tt>
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 4b0d84f465725..dd087071113fe 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -272,6 +272,10 @@ object SparkEnv extends Logging {
     }
 
     val securityManager = new SecurityManager(conf, ioEncryptionKey)
+    if (isDriver) {
+      securityManager.initializeAuth()
+    }
+
     ioEncryptionKey.foreach { _ =>
       if (!securityManager.isEncryptionEnabled()) {
         logWarning("I/O encryption enabled without RPC encryption: keys will be visible on the " +
diff --git a/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
index 22a553e68439a..70865cb58c571 100644
--- a/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
+++ b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
@@ -17,7 +17,10 @@
 
 package org.apache.spark
 
-import org.apache.spark.scheduler.TaskSchedulerImpl
+import java.util.Arrays
+
+import org.apache.spark.status.AppStatusStore
+import org.apache.spark.status.api.v1.StageStatus
 
 /**
  * Low-level status reporting APIs for monitoring job and stage progress.
@@ -33,9 +36,7 @@ import org.apache.spark.scheduler.TaskSchedulerImpl
  *
  * NOTE: this class's constructor should be considered private and may be subject to change.
  */
-class SparkStatusTracker private[spark] (sc: SparkContext) {
-
-  private val jobProgressListener = sc.jobProgressListener
+class SparkStatusTracker private[spark] (sc: SparkContext, store: AppStatusStore) {
 
   /**
    * Return a list of all known jobs in a particular job group.  If `jobGroup` is `null`, then
@@ -46,9 +47,8 @@ class SparkStatusTracker private[spark] (sc: SparkContext) {
    * its result.
    */
   def getJobIdsForGroup(jobGroup: String): Array[Int] = {
-    jobProgressListener.synchronized {
-      jobProgressListener.jobGroupToJobIds.getOrElse(jobGroup, Seq.empty).toArray
-    }
+    val expected = Option(jobGroup)
+    store.jobsList(null).filter(_.jobGroup == expected).map(_.jobId).toArray
   }
 
   /**
@@ -57,9 +57,7 @@ class SparkStatusTracker private[spark] (sc: SparkContext) {
    * This method does not guarantee the order of the elements in its result.
    */
   def getActiveStageIds(): Array[Int] = {
-    jobProgressListener.synchronized {
-      jobProgressListener.activeStages.values.map(_.stageId).toArray
-    }
+    store.stageList(Arrays.asList(StageStatus.ACTIVE)).map(_.stageId).toArray
   }
 
   /**
@@ -68,19 +66,15 @@ class SparkStatusTracker private[spark] (sc: SparkContext) {
    * This method does not guarantee the order of the elements in its result.
    */
   def getActiveJobIds(): Array[Int] = {
-    jobProgressListener.synchronized {
-      jobProgressListener.activeJobs.values.map(_.jobId).toArray
-    }
+    store.jobsList(Arrays.asList(JobExecutionStatus.RUNNING)).map(_.jobId).toArray
   }
 
   /**
    * Returns job information, or `None` if the job info could not be found or was garbage collected.
    */
   def getJobInfo(jobId: Int): Option[SparkJobInfo] = {
-    jobProgressListener.synchronized {
-      jobProgressListener.jobIdToData.get(jobId).map { data =>
-        new SparkJobInfoImpl(jobId, data.stageIds.toArray, data.status)
-      }
+    store.asOption(store.job(jobId)).map { job =>
+      new SparkJobInfoImpl(jobId, job.stageIds.toArray, job.status)
     }
   }
 
@@ -89,21 +83,16 @@ class SparkStatusTracker private[spark] (sc: SparkContext) {
    * garbage collected.
    */
   def getStageInfo(stageId: Int): Option[SparkStageInfo] = {
-    jobProgressListener.synchronized {
-      for (
-        info <- jobProgressListener.stageIdToInfo.get(stageId);
-        data <- jobProgressListener.stageIdToData.get((stageId, info.attemptId))
-      ) yield {
-        new SparkStageInfoImpl(
-          stageId,
-          info.attemptId,
-          info.submissionTime.getOrElse(0),
-          info.name,
-          info.numTasks,
-          data.numActiveTasks,
-          data.numCompleteTasks,
-          data.numFailedTasks)
-      }
+    store.asOption(store.lastStageAttempt(stageId)).map { stage =>
+      new SparkStageInfoImpl(
+        stageId,
+        stage.attemptId,
+        stage.submissionTime.map(_.getTime()).getOrElse(0L),
+        stage.name,
+        stage.numTasks,
+        stage.numActiveTasks,
+        stage.numCompleteTasks,
+        stage.numFailedTasks)
     }
   }
 
@@ -111,17 +100,20 @@ class SparkStatusTracker private[spark] (sc: SparkContext) {
    * Returns information of all known executors, including host, port, cacheSize, numRunningTasks.
    */
   def getExecutorInfos: Array[SparkExecutorInfo] = {
-    val executorIdToRunningTasks: Map[String, Int] =
-      sc.taskScheduler.asInstanceOf[TaskSchedulerImpl].runningTasksByExecutors
+    store.executorList(true).map { exec =>
+      val (host, port) = exec.hostPort.split(":", 2) match {
+        case Array(h, p) => (h, p.toInt)
+        case Array(h) => (h, -1)
+      }
+      val cachedMem = exec.memoryMetrics.map { mem =>
+        mem.usedOnHeapStorageMemory + mem.usedOffHeapStorageMemory
+      }.getOrElse(0L)
 
-    sc.getExecutorStorageStatus.map { status =>
-      val bmId = status.blockManagerId
       new SparkExecutorInfoImpl(
-        bmId.host,
-        bmId.port,
-        status.cacheSize,
-        executorIdToRunningTasks.getOrElse(bmId.executorId, 0)
-      )
-    }
+        host,
+        port,
+        cachedMem,
+        exec.activeTasks)
+    }.toArray
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
index 0b87cd503d4fa..69739745aa6cf 100644
--- a/core/src/main/scala/org/apache/spark/TaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -66,7 +66,7 @@ object TaskContext {
    * An empty task context that does not represent an actual task.  This is only used in tests.
    */
   private[spark] def empty(): TaskContextImpl = {
-    new TaskContextImpl(0, 0, 0, 0, null, new Properties, null)
+    new TaskContextImpl(0, 0, 0, 0, 0, null, new Properties, null)
   }
 }
 
@@ -150,6 +150,13 @@ abstract class TaskContext extends Serializable {
    */
   def stageId(): Int
 
+  /**
+   * How many times the stage that this task belongs to has been attempted. The first stage attempt
+   * will be assigned stageAttemptNumber = 0, and subsequent attempts will have increasing attempt
+   * numbers.
+   */
+  def stageAttemptNumber(): Int
+
   /**
    * The ID of the RDD partition that is computed by this task.
    */
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
index 01d8973e1bb06..cccd3ea457ba4 100644
--- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -41,8 +41,9 @@ import org.apache.spark.util._
  * `TaskMetrics` & `MetricsSystem` objects are not thread safe.
  */
 private[spark] class TaskContextImpl(
-    val stageId: Int,
-    val partitionId: Int,
+    override val stageId: Int,
+    override val stageAttemptNumber: Int,
+    override val partitionId: Int,
     override val taskAttemptId: Long,
     override val attemptNumber: Int,
     override val taskMemoryManager: TaskMemoryManager,
diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index a80016dd22fc5..93e7ee3d2a404 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -23,7 +23,7 @@ import java.nio.charset.StandardCharsets
 import java.security.SecureRandom
 import java.security.cert.X509Certificate
 import java.util.Arrays
-import java.util.concurrent.{CountDownLatch, TimeUnit}
+import java.util.concurrent.{CountDownLatch, TimeoutException, TimeUnit}
 import java.util.jar.{JarEntry, JarOutputStream}
 import javax.net.ssl._
 import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
@@ -232,6 +232,30 @@ private[spark] object TestUtils {
     }
   }
 
+  /**
+   * Wait until at least `numExecutors` executors are up, or throw `TimeoutException` if the waiting
+   * time elapsed before `numExecutors` executors up. Exposed for testing.
+   *
+   * @param numExecutors the number of executors to wait at least
+   * @param timeout time to wait in milliseconds
+   */
+  private[spark] def waitUntilExecutorsUp(
+      sc: SparkContext,
+      numExecutors: Int,
+      timeout: Long): Unit = {
+    val finishTime = System.nanoTime() + TimeUnit.MILLISECONDS.toNanos(timeout)
+    while (System.nanoTime() < finishTime) {
+      if (sc.statusTracker.getExecutorInfos.length > numExecutors) {
+        return
+      }
+      // Sleep rather than using wait/notify, because this is used only for testing and wait/notify
+      // add overhead in the general case.
+      Thread.sleep(10)
+    }
+    throw new TimeoutException(
+      s"Can't find $numExecutors executors before $timeout milliseconds elapsed")
+  }
+
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index 4d7eb29c87d67..c09e3baf6a0b9 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -39,6 +39,13 @@ private[spark] object PythonEvalType {
 
   val SQL_PANDAS_SCALAR_UDF = 200
   val SQL_PANDAS_GROUP_MAP_UDF = 201
+
+  def toString(pythonEvalType: Int): String = pythonEvalType match {
+    case NON_UDF => "NON_UDF"
+    case SQL_BATCHED_UDF => "SQL_BATCHED_UDF"
+    case SQL_PANDAS_SCALAR_UDF => "SQL_PANDAS_SCALAR_UDF"
+    case SQL_PANDAS_GROUP_MAP_UDF => "SQL_PANDAS_GROUP_MAP_UDF"
+  }
 }
 
 /**
@@ -318,10 +325,6 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
         logDebug("Exception thrown after task interruption", e)
         throw new TaskKilledException(context.getKillReason().getOrElse("unknown reason"))
 
-      case e: Exception if env.isStopped =>
-        logDebug("Exception thrown after context is stopped", e)
-        null.asInstanceOf[OUT]  // exit silently
-
       case e: Exception if writerThread.exception.isDefined =>
         logError("Python worker exited unexpectedly (crashed)", e)
         logError("This may have been caused by a prior exception:", writerThread.exception.get)
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index c183e3c9bb325..a76a635777692 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -38,10 +38,10 @@ private[spark] class PythonWorkerFactory(requestedPythonExec: Option[String],
 
   import PythonWorkerFactory._
 
-  // Because forking processes from Java is expensive, we prefer to launch a single Python daemon
-  // (pyspark/daemon.py) and tell it to fork new workers for our tasks. This daemon currently
-  // only works on UNIX-based systems now because it uses signals for child management, so we can
-  // also fall back to launching workers (pyspark/worker.py) directly.
+  // Because forking processes from Java is expensive, we prefer to launch a single Python daemon,
+  // pyspark/daemon.py (by default) and tell it to fork new workers for our tasks. This daemon
+  // currently only works on UNIX-based systems now because it uses signals for child management,
+  // so we can also fall back to launching workers, pyspark/worker.py (by default) directly.
   val useDaemon = {
     val useDaemonEnabled = SparkEnv.get.conf.getBoolean("spark.python.use.daemon", true)
 
@@ -49,6 +49,28 @@ private[spark] class PythonWorkerFactory(requestedPythonExec: Option[String],
     !System.getProperty("os.name").startsWith("Windows") && useDaemonEnabled
   }
 
+  // WARN: Both configurations, 'spark.python.daemon.module' and 'spark.python.worker.module' are
+  // for very advanced users and they are experimental. This should be considered
+  // as expert-only option, and shouldn't be used before knowing what it means exactly.
+
+  // This configuration indicates the module to run the daemon to execute its Python workers.
+  val daemonModule = SparkEnv.get.conf.getOption("spark.python.daemon.module").map { value =>
+    logInfo(
+      s"Python daemon module in PySpark is set to [$value] in 'spark.python.daemon.module', " +
+      "using this to start the daemon up. Note that this configuration only has an effect when " +
+      "'spark.python.use.daemon' is enabled and the platform is not Windows.")
+    value
+  }.getOrElse("pyspark.daemon")
+
+  // This configuration indicates the module to run each Python worker.
+  val workerModule = SparkEnv.get.conf.getOption("spark.python.worker.module").map { value =>
+    logInfo(
+      s"Python worker module in PySpark is set to [$value] in 'spark.python.worker.module', " +
+      "using this to start the worker up. Note that this configuration only has an effect when " +
+      "'spark.python.use.daemon' is disabled or the platform is Windows.")
+    value
+  }.getOrElse("pyspark.worker")
+
   var daemon: Process = null
   val daemonHost = InetAddress.getByAddress(Array(127, 0, 0, 1))
   var daemonPort: Int = 0
@@ -97,8 +119,9 @@ private[spark] class PythonWorkerFactory(requestedPythonExec: Option[String],
   }
 
   /**
-   * Connect to a worker launched through pyspark/daemon.py, which forks python processes itself
-   * to avoid the high cost of forking from Java. This currently only works on UNIX-based systems.
+   * Connect to a worker launched through pyspark/daemon.py (by default), which forks python
+   * processes itself to avoid the high cost of forking from Java. This currently only works
+   * on UNIX-based systems.
    */
   private def createThroughDaemon(): Socket = {
 
@@ -131,7 +154,7 @@ private[spark] class PythonWorkerFactory(requestedPythonExec: Option[String],
   }
 
   /**
-   * Launch a worker by executing worker.py directly and telling it to connect to us.
+   * Launch a worker by executing worker.py (by default) directly and telling it to connect to us.
    */
   private def createSimpleWorker(): Socket = {
     var serverSocket: ServerSocket = null
@@ -139,7 +162,7 @@ private[spark] class PythonWorkerFactory(requestedPythonExec: Option[String],
       serverSocket = new ServerSocket(0, 1, InetAddress.getByAddress(Array(127, 0, 0, 1)))
 
       // Create and start the worker
-      val pb = new ProcessBuilder(Arrays.asList(pythonExec, "-m", "pyspark.worker"))
+      val pb = new ProcessBuilder(Arrays.asList(pythonExec, "-m", workerModule))
       val workerEnv = pb.environment()
       workerEnv.putAll(envVars.asJava)
       workerEnv.put("PYTHONPATH", pythonPath)
@@ -182,7 +205,7 @@ private[spark] class PythonWorkerFactory(requestedPythonExec: Option[String],
 
       try {
         // Create and start the daemon
-        val pb = new ProcessBuilder(Arrays.asList(pythonExec, "-m", "pyspark.daemon"))
+        val pb = new ProcessBuilder(Arrays.asList(pythonExec, "-m", daemonModule))
         val workerEnv = pb.environment()
         workerEnv.putAll(envVars.asJava)
         workerEnv.put("PYTHONPATH", pythonPath)
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
index e88988fe03b2e..8d7a4a353a792 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
@@ -21,6 +21,8 @@ import java.util.concurrent.atomic.AtomicLong
 
 import scala.reflect.ClassTag
 
+import org.apache.commons.collections.map.{AbstractReferenceMap, ReferenceMap}
+
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.internal.Logging
 
@@ -52,6 +54,10 @@ private[spark] class BroadcastManager(
 
   private val nextBroadcastId = new AtomicLong(0)
 
+  private[broadcast] val cachedValues = {
+    new ReferenceMap(AbstractReferenceMap.HARD, AbstractReferenceMap.WEAK)
+  }
+
   def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean): Broadcast[T] = {
     broadcastFactory.newBroadcast[T](value_, isLocal, nextBroadcastId.getAndIncrement())
   }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 67e993c7f02e2..e125095cf4777 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -99,7 +99,8 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   private def calcChecksum(block: ByteBuffer): Int = {
     val adler = new Adler32()
     if (block.hasArray) {
-      adler.update(block.array, block.arrayOffset + block.position, block.limit - block.position)
+      adler.update(block.array, block.arrayOffset + block.position(), block.limit()
+        - block.position())
     } else {
       val bytes = new Array[Byte](block.remaining())
       block.duplicate.get(bytes)
@@ -205,36 +206,50 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
 
   private def readBroadcastBlock(): T = Utils.tryOrIOException {
     TorrentBroadcast.synchronized {
-      setConf(SparkEnv.get.conf)
-      val blockManager = SparkEnv.get.blockManager
-      blockManager.getLocalValues(broadcastId) match {
-        case Some(blockResult) =>
-          if (blockResult.data.hasNext) {
-            val x = blockResult.data.next().asInstanceOf[T]
-            releaseLock(broadcastId)
-            x
-          } else {
-            throw new SparkException(s"Failed to get locally stored broadcast data: $broadcastId")
-          }
-        case None =>
-          logInfo("Started reading broadcast variable " + id)
-          val startTimeMs = System.currentTimeMillis()
-          val blocks = readBlocks()
-          logInfo("Reading broadcast variable " + id + " took" + Utils.getUsedTimeMs(startTimeMs))
-
-          try {
-            val obj = TorrentBroadcast.unBlockifyObject[T](
-              blocks.map(_.toInputStream()), SparkEnv.get.serializer, compressionCodec)
-            // Store the merged copy in BlockManager so other tasks on this executor don't
-            // need to re-fetch it.
-            val storageLevel = StorageLevel.MEMORY_AND_DISK
-            if (!blockManager.putSingle(broadcastId, obj, storageLevel, tellMaster = false)) {
-              throw new SparkException(s"Failed to store $broadcastId in BlockManager")
+      val broadcastCache = SparkEnv.get.broadcastManager.cachedValues
+
+      Option(broadcastCache.get(broadcastId)).map(_.asInstanceOf[T]).getOrElse {
+        setConf(SparkEnv.get.conf)
+        val blockManager = SparkEnv.get.blockManager
+        blockManager.getLocalValues(broadcastId) match {
+          case Some(blockResult) =>
+            if (blockResult.data.hasNext) {
+              val x = blockResult.data.next().asInstanceOf[T]
+              releaseLock(broadcastId)
+
+              if (x != null) {
+                broadcastCache.put(broadcastId, x)
+              }
+
+              x
+            } else {
+              throw new SparkException(s"Failed to get locally stored broadcast data: $broadcastId")
             }
-            obj
-          } finally {
-            blocks.foreach(_.dispose())
-          }
+          case None =>
+            logInfo("Started reading broadcast variable " + id)
+            val startTimeMs = System.currentTimeMillis()
+            val blocks = readBlocks()
+            logInfo("Reading broadcast variable " + id + " took" + Utils.getUsedTimeMs(startTimeMs))
+
+            try {
+              val obj = TorrentBroadcast.unBlockifyObject[T](
+                blocks.map(_.toInputStream()), SparkEnv.get.serializer, compressionCodec)
+              // Store the merged copy in BlockManager so other tasks on this executor don't
+              // need to re-fetch it.
+              val storageLevel = StorageLevel.MEMORY_AND_DISK
+              if (!blockManager.putSingle(broadcastId, obj, storageLevel, tellMaster = false)) {
+                throw new SparkException(s"Failed to store $broadcastId in BlockManager")
+              }
+
+              if (obj != null) {
+                broadcastCache.put(broadcastId, obj)
+              }
+
+              obj
+            } finally {
+              blocks.foreach(_.dispose())
+            }
+        }
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index 7acb5c55bb252..d5145094ec079 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -217,8 +217,13 @@ object Client {
       println("Use ./bin/spark-submit with \"--master spark://host:port\"")
     }
     // scalastyle:on println
+    new ClientApp().start(args, new SparkConf())
+  }
+}
 
-    val conf = new SparkConf()
+private[spark] class ClientApp extends SparkApplication {
+
+  override def start(args: Array[String], conf: SparkConf): Unit = {
     val driverArgs = new ClientArguments(args)
 
     if (!conf.contains("spark.rpc.askTimeout")) {
@@ -235,4 +240,5 @@ object Client {
 
     rpcEnv.awaitTermination()
   }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 17c7319b40f24..e14f9845e6db6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -75,9 +75,7 @@ class SparkHadoopUtil extends Logging {
   }
 
   def transferCredentials(source: UserGroupInformation, dest: UserGroupInformation) {
-    for (token <- source.getTokens.asScala) {
-      dest.addToken(token)
-    }
+    dest.addCredentials(source.getCredentials())
   }
 
   /**
@@ -120,16 +118,9 @@ class SparkHadoopUtil extends Logging {
    * Add any user credentials to the job conf which are necessary for running on a secure Hadoop
    * cluster.
    */
-  def addCredentials(conf: JobConf) {}
-
-  def isYarnMode(): Boolean = { false }
-
-  def addSecretKeyToUserCredentials(key: String, secret: String) {}
-
-  def getSecretKeyFromUserCredentials(key: String): Array[Byte] = { null }
-
-  def getCurrentUserCredentials(): Credentials = {
-    UserGroupInformation.getCurrentUser().getCredentials()
+  def addCredentials(conf: JobConf): Unit = {
+    val jobCreds = conf.getCredentials()
+    jobCreds.mergeAll(UserGroupInformation.getCurrentUser().getCredentials())
   }
 
   def addCurrentUserCredentials(creds: Credentials): Unit = {
@@ -328,17 +319,6 @@ class SparkHadoopUtil extends Logging {
     }
   }
 
-  /**
-   * Start a thread to periodically update the current user's credentials with new credentials so
-   * that access to secured service does not fail.
-   */
-  private[spark] def startCredentialUpdater(conf: SparkConf) {}
-
-  /**
-   * Stop the thread that does the credential updates.
-   */
-  private[spark] def stopCredentialUpdater() {}
-
   /**
    * Return a fresh Hadoop configuration, bypassing the HDFS cache mechanism.
    * This is to prevent the DFSClient from using an old cached token to connect to the NameNode.
@@ -441,14 +421,7 @@ class SparkHadoopUtil extends Logging {
 
 object SparkHadoopUtil {
 
-  private lazy val hadoop = new SparkHadoopUtil
-  private lazy val yarn = try {
-    Utils.classForName("org.apache.spark.deploy.yarn.YarnSparkHadoopUtil")
-      .newInstance()
-      .asInstanceOf[SparkHadoopUtil]
-  } catch {
-    case e: Exception => throw new SparkException("Unable to load YARN support", e)
-  }
+  private lazy val instance = new SparkHadoopUtil
 
   val SPARK_YARN_CREDS_TEMP_EXTENSION = ".tmp"
 
@@ -462,16 +435,7 @@ object SparkHadoopUtil {
    */
   private[spark] val UPDATE_INPUT_METRICS_INTERVAL_RECORDS = 1000
 
-  def get: SparkHadoopUtil = {
-    // Check each time to support changing to/from YARN
-    val yarnMode = java.lang.Boolean.parseBoolean(
-        System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE")))
-    if (yarnMode) {
-      yarn
-    } else {
-      hadoop
-    }
-  }
+  def get: SparkHadoopUtil = instance
 
   /**
    * Given an expiration date (e.g. for Hadoop Delegation Tokens) return a the date
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index be443dbef0da9..890da7ff9b23c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -18,13 +18,13 @@
 package org.apache.spark.deploy
 
 import java.io._
-import java.lang.reflect.{InvocationTargetException, Modifier, UndeclaredThrowableException}
+import java.lang.reflect.{InvocationTargetException, UndeclaredThrowableException}
 import java.net.URL
 import java.security.PrivilegedExceptionAction
 import java.text.ParseException
 
 import scala.annotation.tailrec
-import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
+import scala.collection.mutable.ArrayBuffer
 import scala.util.{Properties, Try}
 
 import org.apache.commons.lang3.StringUtils
@@ -77,7 +77,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
   private val MESOS = 4
   private val LOCAL = 8
   private val KUBERNETES = 16
-  private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | KUBERNETES | LOCAL
+  private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | LOCAL | KUBERNETES
 
   // Deploy modes
   private val CLIENT = 1
@@ -93,6 +93,14 @@ object SparkSubmit extends CommandLineUtils with Logging {
 
   private val CLASS_NOT_FOUND_EXIT_STATUS = 101
 
+  // Following constants are visible for testing.
+  private[deploy] val YARN_CLUSTER_SUBMIT_CLASS =
+    "org.apache.spark.deploy.yarn.YarnClusterApplication"
+  private[deploy] val REST_CLUSTER_SUBMIT_CLASS = classOf[RestSubmissionClientApp].getName()
+  private[deploy] val STANDALONE_CLUSTER_SUBMIT_CLASS = classOf[ClientApp].getName()
+  private[deploy] val KUBERNETES_CLUSTER_SUBMIT_CLASS =
+    "org.apache.spark.deploy.k8s.submit.Client"
+
   // scalastyle:off println
   private[spark] def printVersionAndExit(): Unit = {
     printStream.println("""Welcome to
@@ -283,13 +291,23 @@ object SparkSubmit extends CommandLineUtils with Logging {
       }
 
       // Make sure YARN is included in our build if we're trying to use it
-      if (!Utils.classIsLoadable("org.apache.spark.deploy.yarn.Client") && !Utils.isTesting) {
+      if (!Utils.classIsLoadable(YARN_CLUSTER_SUBMIT_CLASS) && !Utils.isTesting) {
         printErrorAndExit(
           "Could not load YARN classes. " +
           "This copy of Spark may not have been compiled with YARN support.")
       }
     }
 
+    if (clusterManager == KUBERNETES) {
+      args.master = Utils.checkAndGetK8sMasterUrl(args.master)
+      // Make sure KUBERNETES is included in our build if we're trying to use it
+      if (!Utils.classIsLoadable(KUBERNETES_CLUSTER_SUBMIT_CLASS) && !Utils.isTesting) {
+        printErrorAndExit(
+          "Could not load KUBERNETES classes. " +
+            "This copy of Spark may not have been compiled with KUBERNETES support.")
+      }
+    }
+
     // Fail fast, the following modes are not supported or applicable
     (clusterManager, deployMode) match {
       case (KUBERNETES, CLIENT) =>
@@ -302,6 +320,10 @@ object SparkSubmit extends CommandLineUtils with Logging {
       case (STANDALONE, CLUSTER) if args.isR =>
         printErrorAndExit("Cluster deploy mode is currently not supported for R " +
           "applications on standalone clusters.")
+      case (KUBERNETES, _) if args.isPython =>
+        printErrorAndExit("Python applications are currently not supported for Kubernetes.")
+      case (KUBERNETES, _) if args.isR =>
+        printErrorAndExit("R applications are currently not supported for Kubernetes.")
       case (LOCAL, CLUSTER) =>
         printErrorAndExit("Cluster deploy mode is not compatible with master \"local\"")
       case (_, CLUSTER) if isShell(args.primaryResource) =>
@@ -370,10 +392,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
     args.pyFiles = Option(args.pyFiles).map(resolveGlobPaths(_, hadoopConf)).orNull
     args.archives = Option(args.archives).map(resolveGlobPaths(_, hadoopConf)).orNull
 
-    // This security manager will not need an auth secret, but set a dummy value in case
-    // spark.authenticate is enabled, otherwise an exception is thrown.
-    lazy val downloadConf = sparkConf.clone().set(SecurityManager.SPARK_AUTH_SECRET_CONF, "unused")
-    lazy val secMgr = new SecurityManager(downloadConf)
+    lazy val secMgr = new SecurityManager(sparkConf)
 
     // In client mode, download remote files.
     var localPrimaryResource: String = null
@@ -381,13 +400,13 @@ object SparkSubmit extends CommandLineUtils with Logging {
     var localPyFiles: String = null
     if (deployMode == CLIENT) {
       localPrimaryResource = Option(args.primaryResource).map {
-        downloadFile(_, targetDir, downloadConf, hadoopConf, secMgr)
+        downloadFile(_, targetDir, sparkConf, hadoopConf, secMgr)
       }.orNull
       localJars = Option(args.jars).map {
-        downloadFileList(_, targetDir, downloadConf, hadoopConf, secMgr)
+        downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr)
       }.orNull
       localPyFiles = Option(args.pyFiles).map {
-        downloadFileList(_, targetDir, downloadConf, hadoopConf, secMgr)
+        downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr)
       }.orNull
     }
 
@@ -398,8 +417,6 @@ object SparkSubmit extends CommandLineUtils with Logging {
     // For yarn client mode, since we already download them with above code, so we only need to
     // figure out the local path and replace the remote one.
     if (clusterManager == YARN) {
-      sparkConf.setIfMissing(SecurityManager.SPARK_AUTH_SECRET_CONF, "unused")
-      val secMgr = new SecurityManager(sparkConf)
       val forceDownloadSchemes = sparkConf.get(FORCE_DOWNLOAD_SCHEMES)
 
       def shouldDownload(scheme: String): Boolean = {
@@ -416,7 +433,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
             if (file.exists()) {
               file.toURI.toString
             } else {
-              downloadFile(resource, targetDir, downloadConf, hadoopConf, secMgr)
+              downloadFile(resource, targetDir, sparkConf, hadoopConf, secMgr)
             }
           case _ => uri.toString
         }
@@ -565,21 +582,21 @@ object SparkSubmit extends CommandLineUtils with Logging {
       OptionAssigner(args.kubernetesNamespace, KUBERNETES, ALL_DEPLOY_MODES,
         confKey = "spark.kubernetes.namespace"),
 
-        // Other options
-      OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES,
+      // Other options
+      OptionAssigner(args.executorCores, STANDALONE | YARN | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = "spark.executor.cores"),
-      OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES,
+      OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = "spark.executor.memory"),
-      OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES,
+      OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = "spark.cores.max"),
       OptionAssigner(args.files, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = "spark.files"),
       OptionAssigner(args.jars, LOCAL, CLIENT, confKey = "spark.jars"),
       OptionAssigner(args.jars, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = "spark.jars"),
-      OptionAssigner(args.driverMemory, STANDALONE | MESOS | YARN, CLUSTER,
+      OptionAssigner(args.driverMemory, STANDALONE | MESOS | YARN | KUBERNETES, CLUSTER,
         confKey = "spark.driver.memory"),
-      OptionAssigner(args.driverCores, STANDALONE | MESOS | YARN, CLUSTER,
+      OptionAssigner(args.driverCores, STANDALONE | MESOS | YARN | KUBERNETES, CLUSTER,
         confKey = "spark.driver.cores"),
       OptionAssigner(args.supervise.toString, STANDALONE | MESOS, CLUSTER,
         confKey = "spark.driver.supervise"),
@@ -646,11 +663,11 @@ object SparkSubmit extends CommandLineUtils with Logging {
     // All Spark parameters are expected to be passed to the client through system properties.
     if (args.isStandaloneCluster) {
       if (args.useRest) {
-        childMainClass = "org.apache.spark.deploy.rest.RestSubmissionClient"
+        childMainClass = REST_CLUSTER_SUBMIT_CLASS
         childArgs += (args.primaryResource, args.mainClass)
       } else {
         // In legacy standalone cluster mode, use Client as a wrapper around the user class
-        childMainClass = "org.apache.spark.deploy.Client"
+        childMainClass = STANDALONE_CLUSTER_SUBMIT_CLASS
         if (args.supervise) { childArgs += "--supervise" }
         Option(args.driverMemory).foreach { m => childArgs += ("--memory", m) }
         Option(args.driverCores).foreach { c => childArgs += ("--cores", c) }
@@ -675,7 +692,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
 
     // In yarn-cluster mode, use yarn.Client as a wrapper around the user class
     if (isYarnCluster) {
-      childMainClass = "org.apache.spark.deploy.yarn.Client"
+      childMainClass = YARN_CLUSTER_SUBMIT_CLASS
       if (args.isPython) {
         childArgs += ("--primary-py-file", args.primaryResource)
         childArgs += ("--class", "org.apache.spark.deploy.PythonRunner")
@@ -696,7 +713,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
 
     if (isMesosCluster) {
       assert(args.useRest, "Mesos cluster mode is only supported through the REST submission API")
-      childMainClass = "org.apache.spark.deploy.rest.RestSubmissionClient"
+      childMainClass = REST_CLUSTER_SUBMIT_CLASS
       if (args.isPython) {
         // Second argument is main class
         childArgs += (args.primaryResource, "")
@@ -715,7 +732,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
     }
 
     if (isKubernetesCluster) {
-      childMainClass = "org.apache.spark.deploy.k8s.submit.Client"
+      childMainClass = KUBERNETES_CLUSTER_SUBMIT_CLASS
       if (args.isPython) {
         childArgs ++= Array("--primary-py-file", args.primaryResource)
         childArgs ++= Array("--main-class", "org.apache.spark.deploy.PythonRunner")
@@ -723,7 +740,9 @@ object SparkSubmit extends CommandLineUtils with Logging {
           childArgs ++= Array("--other-py-files", args.pyFiles)
         }
       } else {
-        childArgs ++= Array("--primary-java-resource", args.primaryResource)
+        if (args.primaryResource != SparkLauncher.NO_RESOURCE) {
+          childArgs ++= Array("--primary-java-resource", args.primaryResource)
+        }
         childArgs ++= Array("--main-class", args.mainClass)
       }
       args.childArgs.foreach { arg =>
@@ -1266,7 +1285,7 @@ private[spark] object SparkSubmitUtils {
         // retrieve all resolved dependencies
         ivy.retrieve(rr.getModuleDescriptor.getModuleRevisionId,
           packagesDirectory.getAbsolutePath + File.separator +
-            "[organization]_[artifact]-[revision].[ext]",
+            "[organization]_[artifact]-[revision](-[classifier]).[ext]",
           retrieveOptions.setConfs(Array(ivyConfName)))
         resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
       } finally {
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 3cd98d461476a..9f3dd0ad4b83b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -524,8 +524,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     outStream.println(
       s"""
         |Options:
-        |  --master MASTER_URL         spark://host:port, mesos://host:port, yarn, or local
-        |                              (Default: local[*]).
+        |  --master MASTER_URL         spark://host:port, mesos://host:port, yarn,
+        |                              k8s://https://host:port, or local (Default: local[*]).
         |  --deploy-mode DEPLOY_MODE   Whether to launch the driver program locally ("client") or
         |                              on one of the worker machines inside the cluster ("cluster")
         |                              (Default: client).
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index 38f0d6f2afa5e..f1c06205bf04c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -23,31 +23,9 @@ import java.util.zip.ZipOutputStream
 import scala.xml.Node
 
 import org.apache.spark.SparkException
+import org.apache.spark.status.api.v1.ApplicationInfo
 import org.apache.spark.ui.SparkUI
 
-private[spark] case class ApplicationAttemptInfo(
-    attemptId: Option[String],
-    startTime: Long,
-    endTime: Long,
-    lastUpdated: Long,
-    sparkUser: String,
-    completed: Boolean = false,
-    appSparkVersion: String)
-
-private[spark] case class ApplicationHistoryInfo(
-    id: String,
-    name: String,
-    attempts: List[ApplicationAttemptInfo]) {
-
-  /**
-   * Has this application completed?
-   * @return true if the most recent attempt has completed
-   */
-  def completed: Boolean = {
-    attempts.nonEmpty && attempts.head.completed
-  }
-}
-
 /**
  * A loaded UI for a Spark application.
  *
@@ -119,7 +97,7 @@ private[history] abstract class ApplicationHistoryProvider {
    *
    * @return List of all know applications.
    */
-  def getListing(): Iterator[ApplicationHistoryInfo]
+  def getListing(): Iterator[ApplicationInfo]
 
   /**
    * Returns the Spark UI for a specific application.
@@ -152,9 +130,9 @@ private[history] abstract class ApplicationHistoryProvider {
   def writeEventLogs(appId: String, attemptId: Option[String], zipStream: ZipOutputStream): Unit
 
   /**
-   * @return the [[ApplicationHistoryInfo]] for the appId if it exists.
+   * @return the [[ApplicationInfo]] for the appId if it exists.
    */
-  def getApplicationInfo(appId: String): Option[ApplicationHistoryInfo]
+  def getApplicationInfo(appId: String): Option[ApplicationInfo]
 
   /**
    * @return html text to display when the application list is empty
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 69ccde3a8149d..94c80ebd55e74 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -24,6 +24,7 @@ import java.util.zip.{ZipEntry, ZipOutputStream}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
+import scala.util.Try
 import scala.xml.Node
 
 import com.fasterxml.jackson.annotation.JsonIgnore
@@ -39,11 +40,13 @@ import org.fusesource.leveldbjni.internal.NativeDB
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.io.CompressionCodec
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.ReplayListenerBus._
 import org.apache.spark.status._
 import org.apache.spark.status.KVUtils._
-import org.apache.spark.status.api.v1
+import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo}
+import org.apache.spark.status.config._
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
 import org.apache.spark.util.kvstore._
@@ -149,6 +152,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }.getOrElse(new InMemoryStore())
 
+  private val diskManager = storePath.map { path =>
+    new HistoryServerDiskManager(conf, path, listing, clock)
+  }
+
   private val activeUIs = new mutable.HashMap[(String, Option[String]), LoadedAppUI]()
 
   /**
@@ -219,6 +226,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   }
 
   private def startPolling(): Unit = {
+    diskManager.foreach(_.initialize())
+
     // Validate the log directory.
     val path = new Path(logDir)
     try {
@@ -252,19 +261,19 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
-  override def getListing(): Iterator[ApplicationHistoryInfo] = {
+  override def getListing(): Iterator[ApplicationInfo] = {
     // Return the listing in end time descending order.
     listing.view(classOf[ApplicationInfoWrapper])
       .index("endTime")
       .reverse()
       .iterator()
       .asScala
-      .map(_.toAppHistoryInfo())
+      .map(_.toApplicationInfo())
   }
 
-  override def getApplicationInfo(appId: String): Option[ApplicationHistoryInfo] = {
+  override def getApplicationInfo(appId: String): Option[ApplicationInfo] = {
     try {
-      Some(load(appId).toAppHistoryInfo())
+      Some(load(appId).toApplicationInfo())
     } catch {
       case e: NoSuchElementException =>
         None
@@ -299,69 +308,26 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       attempt.adminAclsGroups.getOrElse(""))
     secManager.setViewAclsGroups(attempt.viewAclsGroups.getOrElse(""))
 
-    val replayBus = new ReplayListenerBus()
-
-    val uiStorePath = storePath.map { path => getStorePath(path, appId, attemptId) }
+    val kvstore = try {
+      diskManager match {
+        case Some(sm) =>
+          loadDiskStore(sm, appId, attempt)
 
-    val (kvstore, needReplay) = uiStorePath match {
-      case Some(path) =>
-        try {
-          val _replay = !path.isDirectory()
-          (createDiskStore(path, conf), _replay)
-        } catch {
-          case e: Exception =>
-            // Get rid of the old data and re-create it. The store is either old or corrupted.
-            logWarning(s"Failed to load disk store $uiStorePath for $appId.", e)
-            Utils.deleteRecursively(path)
-            (createDiskStore(path, conf), true)
-        }
-
-      case _ =>
-        (new InMemoryStore(), true)
-    }
-
-    val listener = if (needReplay) {
-      val _listener = new AppStatusListener(kvstore, conf, false,
-        lastUpdateTime = Some(attempt.info.lastUpdated.getTime()))
-      replayBus.addListener(_listener)
-      AppStatusPlugin.loadPlugins().foreach { plugin =>
-        plugin.setupListeners(conf, kvstore, l => replayBus.addListener(l), false)
+        case _ =>
+          createInMemoryStore(attempt)
       }
-      Some(_listener)
-    } else {
-      None
-    }
-
-    val loadedUI = {
-      val ui = SparkUI.create(None, new AppStatusStore(kvstore), conf, secManager, app.info.name,
-        HistoryServer.getAttemptURI(appId, attempt.info.attemptId),
-        attempt.info.startTime.getTime(),
-        attempt.info.appSparkVersion)
-      LoadedAppUI(ui)
+    } catch {
+      case _: FileNotFoundException =>
+        return None
     }
 
-    try {
-      AppStatusPlugin.loadPlugins().foreach { plugin =>
-        plugin.setupUI(loadedUI.ui)
-      }
+    val ui = SparkUI.create(None, new AppStatusStore(kvstore), conf, secManager, app.info.name,
+      HistoryServer.getAttemptURI(appId, attempt.info.attemptId),
+      attempt.info.startTime.getTime(),
+      attempt.info.appSparkVersion)
+    loadPlugins().foreach(_.setupUI(ui))
 
-      val fileStatus = fs.getFileStatus(new Path(logDir, attempt.logPath))
-      replay(fileStatus, isApplicationCompleted(fileStatus), replayBus)
-      listener.foreach(_.flush())
-    } catch {
-      case e: Exception =>
-        try {
-          kvstore.close()
-        } catch {
-          case _e: Exception => logInfo("Error closing store.", _e)
-        }
-        uiStorePath.foreach(Utils.deleteRecursively)
-        if (e.isInstanceOf[FileNotFoundException]) {
-          return None
-        } else {
-          throw e
-        }
-    }
+    val loadedUI = LoadedAppUI(ui)
 
     synchronized {
       activeUIs((appId, attemptId)) = loadedUI
@@ -421,11 +387,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         loadedUI.lock.writeLock().unlock()
       }
 
-      // If the UI is not valid, delete its files from disk, if any. This relies on the fact that
-      // ApplicationCache will never call this method concurrently with getAppUI() for the same
-      // appId / attemptId.
-      if (!loadedUI.valid && storePath.isDefined) {
-        Utils.deleteRecursively(getStorePath(storePath.get, appId, attemptId))
+      diskManager.foreach { dm =>
+        // If the UI is not valid, delete its files from disk, if any. This relies on the fact that
+        // ApplicationCache will never call this method concurrently with getAppUI() for the same
+        // appId / attemptId.
+        dm.release(appId, attemptId, delete = !loadedUI.valid)
       }
     }
   }
@@ -573,12 +539,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
 
     val logPath = fileStatus.getPath()
-
     val bus = new ReplayListenerBus()
     val listener = new AppListingListener(fileStatus, clock)
     bus.addListener(listener)
+    replay(fileStatus, bus, eventsFilter = eventsFilter)
 
-    replay(fileStatus, isApplicationCompleted(fileStatus), bus, eventsFilter)
     listener.applicationInfo.foreach { app =>
       // Invalidate the existing UI for the reloaded app attempt, if any. See LoadedAppUI for a
       // discussion on the UI lifecycle.
@@ -655,10 +620,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
    */
   private def replay(
       eventLog: FileStatus,
-      appCompleted: Boolean,
       bus: ReplayListenerBus,
       eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = {
     val logPath = eventLog.getPath()
+    val isCompleted = !logPath.getName().endsWith(EventLoggingListener.IN_PROGRESS)
     logInfo(s"Replaying log path: $logPath")
     // Note that the eventLog may have *increased* in size since when we grabbed the filestatus,
     // and when we read the file here.  That is OK -- it may result in an unnecessary refresh
@@ -668,18 +633,44 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     // after it's created, so we get a file size that is no bigger than what is actually read.
     val logInput = EventLoggingListener.openEventLog(logPath, fs)
     try {
-      bus.replay(logInput, logPath.toString, !appCompleted, eventsFilter)
-      logInfo(s"Finished replaying $logPath")
+      bus.replay(logInput, logPath.toString, !isCompleted, eventsFilter)
+      logInfo(s"Finished parsing $logPath")
     } finally {
       logInput.close()
     }
   }
 
   /**
-   * Return true when the application has completed.
+   * Rebuilds the application state store from its event log.
    */
-  private def isApplicationCompleted(entry: FileStatus): Boolean = {
-    !entry.getPath().getName().endsWith(EventLoggingListener.IN_PROGRESS)
+  private def rebuildAppStore(
+      store: KVStore,
+      eventLog: FileStatus,
+      lastUpdated: Long): Unit = {
+    // Disable async updates, since they cause higher memory usage, and it's ok to take longer
+    // to parse the event logs in the SHS.
+    val replayConf = conf.clone().set(ASYNC_TRACKING_ENABLED, false)
+    val trackingStore = new ElementTrackingStore(store, replayConf)
+    val replayBus = new ReplayListenerBus()
+    val listener = new AppStatusListener(trackingStore, replayConf, false,
+      lastUpdateTime = Some(lastUpdated))
+    replayBus.addListener(listener)
+
+    for {
+      plugin <- loadPlugins()
+      listener <- plugin.createListeners(conf, trackingStore)
+    } replayBus.addListener(listener)
+
+    try {
+      replay(eventLog, replayBus)
+      trackingStore.close(false)
+    } catch {
+      case e: Exception =>
+        Utils.tryLogNonFatalError {
+          trackingStore.close()
+        }
+        throw e
+    }
   }
 
   /**
@@ -755,14 +746,58 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     listing.write(newAppInfo)
   }
 
-  private def createDiskStore(path: File, conf: SparkConf): KVStore = {
+  private def loadDiskStore(
+      dm: HistoryServerDiskManager,
+      appId: String,
+      attempt: AttemptInfoWrapper): KVStore = {
     val metadata = new AppStatusStoreMetadata(AppStatusStore.CURRENT_VERSION)
-    KVUtils.open(path, metadata)
+
+    // First check if the store already exists and try to open it. If that fails, then get rid of
+    // the existing data.
+    dm.openStore(appId, attempt.info.attemptId).foreach { path =>
+      try {
+        return KVUtils.open(path, metadata)
+      } catch {
+        case e: Exception =>
+          logInfo(s"Failed to open existing store for $appId/${attempt.info.attemptId}.", e)
+          dm.release(appId, attempt.info.attemptId, delete = true)
+      }
+    }
+
+    // At this point the disk data either does not exist or was deleted because it failed to
+    // load, so the event log needs to be replayed.
+    val status = fs.getFileStatus(new Path(logDir, attempt.logPath))
+    val isCompressed = EventLoggingListener.codecName(status.getPath()).flatMap { name =>
+      Try(CompressionCodec.getShortName(name)).toOption
+    }.isDefined
+    logInfo(s"Leasing disk manager space for app $appId / ${attempt.info.attemptId}...")
+    val lease = dm.lease(status.getLen(), isCompressed)
+    val newStorePath = try {
+      val store = KVUtils.open(lease.tmpPath, metadata)
+      try {
+        rebuildAppStore(store, status, attempt.info.lastUpdated.getTime())
+      } finally {
+        store.close()
+      }
+      lease.commit(appId, attempt.info.attemptId)
+    } catch {
+      case e: Exception =>
+        lease.rollback()
+        throw e
+    }
+
+    KVUtils.open(newStorePath, metadata)
   }
 
-  private def getStorePath(path: File, appId: String, attemptId: Option[String]): File = {
-    val fileName = appId + attemptId.map("_" + _).getOrElse("") + ".ldb"
-    new File(path, fileName)
+  private def createInMemoryStore(attempt: AttemptInfoWrapper): KVStore = {
+    val store = new InMemoryStore()
+    val status = fs.getFileStatus(new Path(logDir, attempt.logPath))
+    rebuildAppStore(store, status, attempt.info.lastUpdated.getTime())
+    store
+  }
+
+  private def loadPlugins(): Iterable[AppHistoryServerPlugin] = {
+    ServiceLoader.load(classOf[AppHistoryServerPlugin], Utils.getContextOrSparkClassLoader).asScala
   }
 
   /** For testing. Returns internal data about a single attempt. */
@@ -802,24 +837,16 @@ private[history] case class LogInfo(
     fileSize: Long)
 
 private[history] class AttemptInfoWrapper(
-    val info: v1.ApplicationAttemptInfo,
+    val info: ApplicationAttemptInfo,
     val logPath: String,
     val fileSize: Long,
     val adminAcls: Option[String],
     val viewAcls: Option[String],
     val adminAclsGroups: Option[String],
-    val viewAclsGroups: Option[String]) {
-
-  def toAppAttemptInfo(): ApplicationAttemptInfo = {
-    ApplicationAttemptInfo(info.attemptId, info.startTime.getTime(),
-      info.endTime.getTime(), info.lastUpdated.getTime(), info.sparkUser,
-      info.completed, info.appSparkVersion)
-  }
-
-}
+    val viewAclsGroups: Option[String])
 
 private[history] class ApplicationInfoWrapper(
-    val info: v1.ApplicationInfo,
+    val info: ApplicationInfo,
     val attempts: List[AttemptInfoWrapper]) {
 
   @JsonIgnore @KVIndexParam
@@ -831,9 +858,7 @@ private[history] class ApplicationInfoWrapper(
   @JsonIgnore @KVIndexParam("oldestAttempt")
   def oldestAttempt(): Long = attempts.map(_.info.lastUpdated.getTime()).min
 
-  def toAppHistoryInfo(): ApplicationHistoryInfo = {
-    ApplicationHistoryInfo(info.id, info.name, attempts.map(_.toAppAttemptInfo()))
-  }
+  def toApplicationInfo(): ApplicationInfo = info.copy(attempts = attempts.map(_.info))
 
 }
 
@@ -890,7 +915,7 @@ private[history] class AppListingListener(log: FileStatus, clock: Clock) extends
     var memoryPerExecutorMB: Option[Int] = None
 
     def toView(): ApplicationInfoWrapper = {
-      val apiInfo = new v1.ApplicationInfo(id, name, coresGranted, maxCores, coresPerExecutor,
+      val apiInfo = ApplicationInfo(id, name, coresGranted, maxCores, coresPerExecutor,
         memoryPerExecutorMB, Nil)
       new ApplicationInfoWrapper(apiInfo, List(attempt.toView()))
     }
@@ -913,7 +938,7 @@ private[history] class AppListingListener(log: FileStatus, clock: Clock) extends
     var viewAclsGroups: Option[String] = None
 
     def toView(): AttemptInfoWrapper = {
-      val apiInfo = new v1.ApplicationAttemptInfo(
+      val apiInfo = ApplicationAttemptInfo(
         attemptId,
         startTime,
         endTime,
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index 6399dccc1676a..5d62a7d8bebb4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -21,6 +21,7 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
+import org.apache.spark.status.api.v1.ApplicationInfo
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 
 private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
@@ -30,7 +31,8 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
     val requestedIncomplete =
       Option(UIUtils.stripXSS(request.getParameter("showIncomplete"))).getOrElse("false").toBoolean
 
-    val allAppsSize = parent.getApplicationList().count(_.completed != requestedIncomplete)
+    val allAppsSize = parent.getApplicationList()
+      .count(isApplicationCompleted(_) != requestedIncomplete)
     val eventLogsUnderProcessCount = parent.getEventLogsUnderProcess()
     val lastUpdatedTime = parent.getLastUpdatedTime()
     val providerConfig = parent.getProviderConfig()
@@ -55,6 +57,10 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
             }
             }
 
+            {
+            <p>Client local time zone: <span id="time-zone"></span></p>
+            }
+
             {
             if (allAppsSize > 0) {
               <script src={UIUtils.prependBaseUri("/static/dataTables.rowsGroup.js")}></script> ++
@@ -88,4 +94,8 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
   private def makePageLink(showIncomplete: Boolean): String = {
     UIUtils.prependBaseUri("/?" + "showIncomplete=" + showIncomplete)
   }
+
+  private def isApplicationCompleted(appInfo: ApplicationInfo): Boolean = {
+    appInfo.attempts.nonEmpty && appInfo.attempts.head.completed
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index b822a48e98e91..0ec4afad0308c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -28,9 +28,10 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.history.config.HISTORY_SERVER_UI_PORT
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, ApplicationsListResource, UIRoot}
+import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, UIRoot}
 import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
 import org.apache.spark.ui.JettyUtils._
 import org.apache.spark.util.{ShutdownHookManager, SystemClock, Utils}
@@ -175,7 +176,7 @@ class HistoryServer(
    *
    * @return List of all known applications.
    */
-  def getApplicationList(): Iterator[ApplicationHistoryInfo] = {
+  def getApplicationList(): Iterator[ApplicationInfo] = {
     provider.getListing()
   }
 
@@ -188,11 +189,11 @@ class HistoryServer(
   }
 
   def getApplicationInfoList: Iterator[ApplicationInfo] = {
-    getApplicationList().map(ApplicationsListResource.appHistoryInfoToPublicAppInfo)
+    getApplicationList()
   }
 
   def getApplicationInfo(appId: String): Option[ApplicationInfo] = {
-    provider.getApplicationInfo(appId).map(ApplicationsListResource.appHistoryInfoToPublicAppInfo)
+    provider.getApplicationInfo(appId)
   }
 
   override def writeEventLogs(
@@ -276,7 +277,7 @@ object HistoryServer extends Logging {
       .newInstance(conf)
       .asInstanceOf[ApplicationHistoryProvider]
 
-    val port = conf.getInt("spark.history.ui.port", 18080)
+    val port = conf.get(HISTORY_SERVER_UI_PORT)
 
     val server = new HistoryServer(conf, provider, securityManager, port)
     server.bind()
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
new file mode 100644
index 0000000000000..c03a360b91ef8
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
@@ -0,0 +1,327 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io.File
+import java.nio.file.Files
+import java.nio.file.attribute.PosixFilePermissions
+import java.util.concurrent.atomic.AtomicLong
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.{HashMap, ListBuffer}
+
+import org.apache.commons.io.FileUtils
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.status.KVUtils._
+import org.apache.spark.util.{Clock, Utils}
+import org.apache.spark.util.kvstore.KVStore
+
+/**
+ * A class used to keep track of disk usage by the SHS, allowing application data to be deleted
+ * from disk when usage exceeds a configurable threshold.
+ *
+ * The goal of the class is not to guarantee that usage will never exceed the threshold; because of
+ * how application data is written, disk usage may temporarily go higher. But, eventually, it
+ * should fall back under the threshold.
+ *
+ * @param conf Spark configuration.
+ * @param path Path where to store application data.
+ * @param listing The listing store, used to persist usage data.
+ * @param clock Clock instance to use.
+ */
+private class HistoryServerDiskManager(
+    conf: SparkConf,
+    path: File,
+    listing: KVStore,
+    clock: Clock) extends Logging {
+
+  import config._
+
+  private val appStoreDir = new File(path, "apps")
+  if (!appStoreDir.isDirectory() && !appStoreDir.mkdir()) {
+    throw new IllegalArgumentException(s"Failed to create app directory ($appStoreDir).")
+  }
+
+  private val tmpStoreDir = new File(path, "temp")
+  if (!tmpStoreDir.isDirectory() && !tmpStoreDir.mkdir()) {
+    throw new IllegalArgumentException(s"Failed to create temp directory ($tmpStoreDir).")
+  }
+
+  private val maxUsage = conf.get(MAX_LOCAL_DISK_USAGE)
+  private val currentUsage = new AtomicLong(0L)
+  private val committedUsage = new AtomicLong(0L)
+  private val active = new HashMap[(String, Option[String]), Long]()
+
+  def initialize(): Unit = {
+    updateUsage(sizeOf(appStoreDir), committed = true)
+
+    // Clean up any temporary stores during start up. This assumes that they're leftover from other
+    // instances and are not useful.
+    tmpStoreDir.listFiles().foreach(FileUtils.deleteQuietly)
+
+    // Go through the recorded store directories and remove any that may have been removed by
+    // external code.
+    val orphans = listing.view(classOf[ApplicationStoreInfo]).asScala.filter { info =>
+      !new File(info.path).exists()
+    }.toSeq
+
+    orphans.foreach { info =>
+      listing.delete(info.getClass(), info.path)
+    }
+
+    logInfo("Initialized disk manager: " +
+      s"current usage = ${Utils.bytesToString(currentUsage.get())}, " +
+      s"max usage = ${Utils.bytesToString(maxUsage)}")
+  }
+
+  /**
+   * Lease some space from the store. The leased space is calculated as a fraction of the given
+   * event log size; this is an approximation, and doesn't mean the application store cannot
+   * outgrow the lease.
+   *
+   * If there's not enough space for the lease, other applications might be evicted to make room.
+   * This method always returns a lease, meaning that it's possible for local disk usage to grow
+   * past the configured threshold if there aren't enough idle applications to evict.
+   *
+   * While the lease is active, the data is written to a temporary location, so `openStore()`
+   * will still return `None` for the application.
+   */
+  def lease(eventLogSize: Long, isCompressed: Boolean = false): Lease = {
+    val needed = approximateSize(eventLogSize, isCompressed)
+    makeRoom(needed)
+
+    val perms = PosixFilePermissions.fromString("rwx------")
+    val tmp = Files.createTempDirectory(tmpStoreDir.toPath(), "appstore",
+      PosixFilePermissions.asFileAttribute(perms)).toFile()
+
+    updateUsage(needed)
+    val current = currentUsage.get()
+    if (current > maxUsage) {
+      logInfo(s"Lease of ${Utils.bytesToString(needed)} may cause usage to exceed max " +
+        s"(${Utils.bytesToString(current)} > ${Utils.bytesToString(maxUsage)})")
+    }
+
+    new Lease(tmp, needed)
+  }
+
+  /**
+   * Returns the location of an application store if it's still available. Marks the store as
+   * being used so that it's not evicted when running out of designated space.
+   */
+  def openStore(appId: String, attemptId: Option[String]): Option[File] = {
+    val storePath = active.synchronized {
+      val path = appStorePath(appId, attemptId)
+      if (path.isDirectory()) {
+        active(appId -> attemptId) = sizeOf(path)
+        Some(path)
+      } else {
+        None
+      }
+    }
+
+    storePath.foreach { path =>
+      updateAccessTime(appId, attemptId)
+    }
+
+    storePath
+  }
+
+  /**
+   * Tell the disk manager that the store for the given application is not being used anymore.
+   *
+   * @param delete Whether to delete the store from disk.
+   */
+  def release(appId: String, attemptId: Option[String], delete: Boolean = false): Unit = {
+    // Because LevelDB may modify the structure of the store files even when just reading, update
+    // the accounting for this application when it's closed.
+    val oldSizeOpt = active.synchronized {
+      active.remove(appId -> attemptId)
+    }
+
+    oldSizeOpt.foreach { oldSize =>
+      val path = appStorePath(appId, attemptId)
+      updateUsage(-oldSize, committed = true)
+      if (path.isDirectory()) {
+        if (delete) {
+          deleteStore(path)
+        } else {
+          val newSize = sizeOf(path)
+          val newInfo = listing.read(classOf[ApplicationStoreInfo], path.getAbsolutePath())
+            .copy(size = newSize)
+          listing.write(newInfo)
+          updateUsage(newSize, committed = true)
+        }
+      }
+    }
+  }
+
+  /**
+   * A non-scientific approximation of how large an app state store will be given the size of the
+   * event log.
+   */
+  def approximateSize(eventLogSize: Long, isCompressed: Boolean): Long = {
+    if (isCompressed) {
+      // For compressed logs, assume that compression reduces the log size a lot, and the disk
+      // store will actually grow compared to the log size.
+      eventLogSize * 2
+    } else {
+      // For non-compressed logs, assume the disk store will end up at approximately 50% of the
+      // size of the logs. This is loosely based on empirical evidence.
+      eventLogSize / 2
+    }
+  }
+
+  /** Current free space. Considers space currently leased out too. */
+  def free(): Long = {
+    math.max(maxUsage - currentUsage.get(), 0L)
+  }
+
+  /** Current committed space. */
+  def committed(): Long = committedUsage.get()
+
+  private def deleteStore(path: File): Unit = {
+    FileUtils.deleteDirectory(path)
+    listing.delete(classOf[ApplicationStoreInfo], path.getAbsolutePath())
+  }
+
+  private def makeRoom(size: Long): Unit = {
+    if (free() < size) {
+      logDebug(s"Not enough free space, looking at candidates for deletion...")
+      val evicted = new ListBuffer[ApplicationStoreInfo]()
+      Utils.tryWithResource(
+        listing.view(classOf[ApplicationStoreInfo]).index("lastAccess").closeableIterator()
+      ) { iter =>
+        var needed = size
+        while (needed > 0 && iter.hasNext()) {
+          val info = iter.next()
+          val isActive = active.synchronized {
+            active.contains(info.appId -> info.attemptId)
+          }
+          if (!isActive) {
+            evicted += info
+            needed -= info.size
+          }
+        }
+      }
+
+      if (evicted.nonEmpty) {
+        val freed = evicted.map { info =>
+          logInfo(s"Deleting store for ${info.appId}/${info.attemptId}.")
+          deleteStore(new File(info.path))
+          updateUsage(-info.size, committed = true)
+          info.size
+        }.sum
+
+        logInfo(s"Deleted ${evicted.size} store(s) to free ${Utils.bytesToString(freed)} " +
+          s"(target = ${Utils.bytesToString(size)}).")
+      } else {
+        logWarning(s"Unable to free any space to make room for ${Utils.bytesToString(size)}.")
+      }
+    }
+  }
+
+  private def appStorePath(appId: String, attemptId: Option[String]): File = {
+    val fileName = appId + attemptId.map("_" + _).getOrElse("") + ".ldb"
+    new File(appStoreDir, fileName)
+  }
+
+  private def updateAccessTime(appId: String, attemptId: Option[String]): Unit = {
+    val path = appStorePath(appId, attemptId)
+    val info = ApplicationStoreInfo(path.getAbsolutePath(), clock.getTimeMillis(), appId, attemptId,
+      sizeOf(path))
+    listing.write(info)
+  }
+
+  private def updateUsage(delta: Long, committed: Boolean = false): Unit = {
+    val updated = currentUsage.addAndGet(delta)
+    if (updated < 0) {
+      throw new IllegalStateException(
+        s"Disk usage tracker went negative (now = $updated, delta = $delta)")
+    }
+    if (committed) {
+      val updatedCommitted = committedUsage.addAndGet(delta)
+      if (updatedCommitted < 0) {
+        throw new IllegalStateException(
+          s"Disk usage tracker went negative (now = $updatedCommitted, delta = $delta)")
+      }
+    }
+  }
+
+  /** Visible for testing. Return the size of a directory. */
+  private[history] def sizeOf(path: File): Long = FileUtils.sizeOf(path)
+
+  private[history] class Lease(val tmpPath: File, private val leased: Long) {
+
+    /**
+     * Commits a lease to its final location, and update accounting information. This method
+     * marks the application as active, so its store is not available for eviction.
+     */
+    def commit(appId: String, attemptId: Option[String]): File = {
+      val dst = appStorePath(appId, attemptId)
+
+      active.synchronized {
+        require(!active.contains(appId -> attemptId),
+          s"Cannot commit lease for active application $appId / $attemptId")
+
+        if (dst.isDirectory()) {
+          val size = sizeOf(dst)
+          deleteStore(dst)
+          updateUsage(-size, committed = true)
+        }
+      }
+
+      updateUsage(-leased)
+
+      val newSize = sizeOf(tmpPath)
+      makeRoom(newSize)
+      tmpPath.renameTo(dst)
+
+      updateUsage(newSize, committed = true)
+      if (committedUsage.get() > maxUsage) {
+        val current = Utils.bytesToString(committedUsage.get())
+        val max = Utils.bytesToString(maxUsage)
+        logWarning(s"Commit of application $appId / $attemptId causes maximum disk usage to be " +
+          s"exceeded ($current > $max)")
+      }
+
+      updateAccessTime(appId, attemptId)
+
+      active.synchronized {
+        active(appId -> attemptId) = newSize
+      }
+      dst
+    }
+
+    /** Deletes the temporary directory created for the lease. */
+    def rollback(): Unit = {
+      updateUsage(-leased)
+      FileUtils.deleteDirectory(tmpPath)
+    }
+
+  }
+
+}
+
+private case class ApplicationStoreInfo(
+    @KVIndexParam path: String,
+    @KVIndexParam("lastAccess") lastAccess: Long,
+    appId: String,
+    attemptId: Option[String],
+    size: Long)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/config.scala b/core/src/main/scala/org/apache/spark/deploy/history/config.scala
index 52dedc1a2ed41..efdbf672bb52f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/config.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/config.scala
@@ -20,6 +20,7 @@ package org.apache.spark.deploy.history
 import java.util.concurrent.TimeUnit
 
 import org.apache.spark.internal.config.ConfigBuilder
+import org.apache.spark.network.util.ByteUnit
 
 private[spark] object config {
 
@@ -39,4 +40,13 @@ private[spark] object config {
     .stringConf
     .createOptional
 
+  val MAX_LOCAL_DISK_USAGE = ConfigBuilder("spark.history.store.maxDiskUsage")
+    .bytesConf(ByteUnit.BYTE)
+    .createWithDefaultString("10g")
+
+  val HISTORY_SERVER_UI_PORT = ConfigBuilder("spark.history.ui.port")
+    .doc("Web UI port to bind Spark History Server")
+    .intConf
+    .createWithDefault(18080)
+
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
index 21cb94142b15b..742a95841a138 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
@@ -32,6 +32,7 @@ import scala.util.control.NonFatal
 import com.fasterxml.jackson.core.JsonProcessingException
 
 import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf, SparkException}
+import org.apache.spark.deploy.SparkApplication
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
@@ -398,9 +399,20 @@ private[spark] object RestSubmissionClient {
   val PROTOCOL_VERSION = "v1"
 
   /**
-   * Submit an application, assuming Spark parameters are specified through the given config.
-   * This is abstracted to its own method for testing purposes.
+   * Filter non-spark environment variables from any environment.
    */
+  private[rest] def filterSystemEnvironment(env: Map[String, String]): Map[String, String] = {
+    env.filterKeys { k =>
+      // SPARK_HOME is filtered out because it is usually wrong on the remote machine (SPARK-12345)
+      (k.startsWith("SPARK_") && k != "SPARK_ENV_LOADED" && k != "SPARK_HOME") ||
+        k.startsWith("MESOS_")
+    }
+  }
+}
+
+private[spark] class RestSubmissionClientApp extends SparkApplication {
+
+  /** Submits a request to run the application and return the response. Visible for testing. */
   def run(
       appResource: String,
       mainClass: String,
@@ -417,7 +429,7 @@ private[spark] object RestSubmissionClient {
     client.createSubmission(submitRequest)
   }
 
-  def main(args: Array[String]): Unit = {
+  override def start(args: Array[String], conf: SparkConf): Unit = {
     if (args.length < 2) {
       sys.error("Usage: RestSubmissionClient [app resource] [main class] [app args*]")
       sys.exit(1)
@@ -425,19 +437,8 @@ private[spark] object RestSubmissionClient {
     val appResource = args(0)
     val mainClass = args(1)
     val appArgs = args.slice(2, args.length)
-    val conf = new SparkConf
-    val env = filterSystemEnvironment(sys.env)
+    val env = RestSubmissionClient.filterSystemEnvironment(sys.env)
     run(appResource, mainClass, appArgs, conf, env)
   }
 
-  /**
-   * Filter non-spark environment variables from any environment.
-   */
-  private[rest] def filterSystemEnvironment(env: Map[String, String]): Map[String, String] = {
-    env.filterKeys { k =>
-      // SPARK_HOME is filtered out because it is usually wrong on the remote machine (SPARK-12345)
-      (k.startsWith("SPARK_") && k != "SPARK_ENV_LOADED" && k != "SPARK_HOME") ||
-        k.startsWith("MESOS_")
-    }
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
index 0d50a768942ed..86ddf954ca128 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
@@ -46,6 +46,8 @@ private[rest] class CreateSubmissionRequest extends SubmitRestProtocolRequest {
     super.doValidate()
     assert(sparkProperties != null, "No Spark properties set!")
     assertFieldIsSet(appResource, "appResource")
+    assertFieldIsSet(appArgs, "appArgs")
+    assertFieldIsSet(environmentVariables, "environmentVariables")
     assertPropertyIsSet("spark.app.name")
     assertPropertyIsBoolean("spark.driver.supervise")
     assertPropertyIsNumeric("spark.driver.cores")
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 275bec9ce9218..e6ecbf422faed 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -165,11 +165,7 @@ private[spark] class CoarseGrainedExecutorBackend(
     }
 
     if (notifyDriver && driver.nonEmpty) {
-      driver.get.ask[Boolean](
-        RemoveExecutor(executorId, new ExecutorLossReason(reason))
-      ).failed.foreach(e =>
-        logWarning(s"Unable to notify the driver due to " + e.getMessage, e)
-      )(ThreadUtils.sameThread)
+      driver.get.send(RemoveExecutor(executorId, new ExecutorLossReason(reason)))
     }
 
     System.exit(code)
@@ -220,7 +216,9 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       if (driverConf.contains("spark.yarn.credentials.file")) {
         logInfo("Will periodically update credentials from: " +
           driverConf.get("spark.yarn.credentials.file"))
-        SparkHadoopUtil.get.startCredentialUpdater(driverConf)
+        Utils.classForName("org.apache.spark.deploy.yarn.YarnSparkHadoopUtil")
+          .getMethod("startCredentialUpdater", classOf[SparkConf])
+          .invoke(null, driverConf)
       }
 
       cfg.hadoopDelegationCreds.foreach { tokens =>
@@ -236,7 +234,11 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         env.rpcEnv.setupEndpoint("WorkerWatcher", new WorkerWatcher(env.rpcEnv, url))
       }
       env.rpcEnv.awaitTermination()
-      SparkHadoopUtil.get.stopCredentialUpdater()
+      if (driverConf.contains("spark.yarn.credentials.file")) {
+        Utils.classForName("org.apache.spark.deploy.yarn.YarnSparkHadoopUtil")
+          .getMethod("stopCredentialUpdater")
+          .invoke(null)
+      }
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index e3e555eaa0277..2c3a8ef74800b 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -35,7 +35,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder
 import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.memory.TaskMemoryManager
+import org.apache.spark.memory.{SparkOutOfMemoryError, TaskMemoryManager}
 import org.apache.spark.rpc.RpcTimeout
 import org.apache.spark.scheduler.{DirectTaskResult, IndirectTaskResult, Task, TaskDescription}
 import org.apache.spark.shuffle.FetchFailedException
@@ -452,7 +452,7 @@ private[spark] class Executor(
         // TODO: do not serialize value twice
         val directResult = new DirectTaskResult(valueBytes, accumUpdates)
         val serializedDirectResult = ser.serialize(directResult)
-        val resultSize = serializedDirectResult.limit
+        val resultSize = serializedDirectResult.limit()
 
         // directSend = sending directly back to the driver
         val serializedResult: ByteBuffer = {
@@ -553,10 +553,9 @@ private[spark] class Executor(
 
           // Don't forcibly exit unless the exception was inherently fatal, to avoid
           // stopping other tasks unnecessarily.
-          if (Utils.isFatalError(t)) {
+          if (!t.isInstanceOf[SparkOutOfMemoryError] && Utils.isFatalError(t)) {
             uncaughtExceptionHandler.uncaughtException(Thread.currentThread(), t)
           }
-
       } finally {
         runningTasks.remove(taskId)
       }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
index 8f4c1b60920db..b0cd7110a3b47 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
@@ -235,7 +235,9 @@ private[spark] case class ConfigBuilder(key: String) {
   }
 
   def fallbackConf[T](fallback: ConfigEntry[T]): ConfigEntry[T] = {
-    new FallbackConfigEntry(key, _alternatives, _doc, _public, fallback)
+    val entry = new FallbackConfigEntry(key, _alternatives, _doc, _public, fallback)
+    _onCreate.foreach(_(entry))
+    entry
   }
 
   def regexConf: TypedConfigBuilder[Regex] = {
diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala
index f1190289244e9..ede3ace4f9aac 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala
@@ -139,7 +139,7 @@ private[spark] class OptionalConfigEntry[T](
     s => Some(rawValueConverter(s)),
     v => v.map(rawStringConverter).orNull, doc, isPublic) {
 
-  override def defaultValueString: String = "<undefined>"
+  override def defaultValueString: String = ConfigEntry.UNDEFINED
 
   override def readFrom(reader: ConfigReader): Option[T] = {
     readString(reader).map(rawValueConverter)
@@ -149,12 +149,12 @@ private[spark] class OptionalConfigEntry[T](
 /**
  * A config entry whose default value is defined by another config entry.
  */
-private class FallbackConfigEntry[T] (
+private[spark] class FallbackConfigEntry[T] (
     key: String,
     alternatives: List[String],
     doc: String,
     isPublic: Boolean,
-    private[config] val fallback: ConfigEntry[T])
+    val fallback: ConfigEntry[T])
   extends ConfigEntry[T](key, alternatives,
     fallback.valueConverter, fallback.stringConverter, doc, isPublic) {
 
@@ -167,6 +167,8 @@ private class FallbackConfigEntry[T] (
 
 private[spark] object ConfigEntry {
 
+  val UNDEFINED = "<undefined>"
+
   private val knownConfigs = new java.util.concurrent.ConcurrentHashMap[String, ConfigEntry[_]]()
 
   def registerEntry(entry: ConfigEntry[_]): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 5401589217e77..8d093bb8402e0 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -41,6 +41,10 @@ package object config {
     .bytesConf(ByteUnit.MiB)
     .createWithDefaultString("1g")
 
+  private[spark] val DRIVER_MEMORY_OVERHEAD = ConfigBuilder("spark.driver.memoryOverhead")
+    .bytesConf(ByteUnit.MiB)
+    .createOptional
+
   private[spark] val EVENT_LOG_COMPRESS =
     ConfigBuilder("spark.eventLog.compress")
       .booleanConf
@@ -80,6 +84,10 @@ package object config {
     .bytesConf(ByteUnit.MiB)
     .createWithDefaultString("1g")
 
+  private[spark] val EXECUTOR_MEMORY_OVERHEAD = ConfigBuilder("spark.executor.memoryOverhead")
+    .bytesConf(ByteUnit.MiB)
+    .createOptional
+
   private[spark] val MEMORY_OFFHEAP_ENABLED = ConfigBuilder("spark.memory.offHeap.enabled")
     .doc("If true, Spark will attempt to use off-heap memory for certain operations. " +
       "If off-heap memory use is enabled, then spark.memory.offHeap.size must be positive.")
@@ -263,11 +271,6 @@ package object config {
     .toSequence
     .createWithDefault(Nil)
 
-  // To limit memory usage, we only track information for a fixed number of tasks
-  private[spark] val UI_RETAINED_TASKS = ConfigBuilder("spark.ui.retainedTasks")
-    .intConf
-    .createWithDefault(100000)
-
   // To limit how many applications are shown in the History Server summary ui
   private[spark] val HISTORY_UI_MAX_APPS =
     ConfigBuilder("spark.history.ui.maxApplications").intConf.createWithDefault(Integer.MAX_VALUE)
diff --git a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
index 50f51e1af4530..6d0059b6a0272 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
@@ -28,8 +28,9 @@ import org.apache.spark.util.Utils
  *
  * 1. Implementations must be serializable, as the committer instance instantiated on the driver
  *    will be used for tasks on executors.
- * 2. Implementations should have a constructor with 2 arguments:
- *      (jobId: String, path: String)
+ * 2. Implementations should have a constructor with 2 or 3 arguments:
+ *      (jobId: String, path: String) or
+ *      (jobId: String, path: String, dynamicPartitionOverwrite: Boolean)
  * 3. A committer should not be reused across multiple Spark jobs.
  *
  * The proper call sequence is:
@@ -139,10 +140,22 @@ object FileCommitProtocol {
   /**
    * Instantiates a FileCommitProtocol using the given className.
    */
-  def instantiate(className: String, jobId: String, outputPath: String)
-    : FileCommitProtocol = {
+  def instantiate(
+      className: String,
+      jobId: String,
+      outputPath: String,
+      dynamicPartitionOverwrite: Boolean = false): FileCommitProtocol = {
     val clazz = Utils.classForName(className).asInstanceOf[Class[FileCommitProtocol]]
-    val ctor = clazz.getDeclaredConstructor(classOf[String], classOf[String])
-    ctor.newInstance(jobId, outputPath)
+    // First try the constructor with arguments (jobId: String, outputPath: String,
+    // dynamicPartitionOverwrite: Boolean).
+    // If that doesn't exist, try the one with (jobId: string, outputPath: String).
+    try {
+      val ctor = clazz.getDeclaredConstructor(classOf[String], classOf[String], classOf[Boolean])
+      ctor.newInstance(jobId, outputPath, dynamicPartitionOverwrite.asInstanceOf[java.lang.Boolean])
+    } catch {
+      case _: NoSuchMethodException =>
+        val ctor = clazz.getDeclaredConstructor(classOf[String], classOf[String])
+        ctor.newInstance(jobId, outputPath)
+    }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index 95c99d29c3a9c..6d20ef1f98a3c 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -39,8 +39,19 @@ import org.apache.spark.mapred.SparkHadoopMapRedUtil
  *
  * @param jobId the job's or stage's id
  * @param path the job's output path, or null if committer acts as a noop
+ * @param dynamicPartitionOverwrite If true, Spark will overwrite partition directories at runtime
+ *                                  dynamically, i.e., we first write files under a staging
+ *                                  directory with partition path, e.g.
+ *                                  /path/to/staging/a=1/b=1/xxx.parquet. When committing the job,
+ *                                  we first clean up the corresponding partition directories at
+ *                                  destination path, e.g. /path/to/destination/a=1/b=1, and move
+ *                                  files from staging directory to the corresponding partition
+ *                                  directories under destination path.
  */
-class HadoopMapReduceCommitProtocol(jobId: String, path: String)
+class HadoopMapReduceCommitProtocol(
+    jobId: String,
+    path: String,
+    dynamicPartitionOverwrite: Boolean = false)
   extends FileCommitProtocol with Serializable with Logging {
 
   import FileCommitProtocol._
@@ -67,9 +78,17 @@ class HadoopMapReduceCommitProtocol(jobId: String, path: String)
   @transient private var addedAbsPathFiles: mutable.Map[String, String] = null
 
   /**
-   * The staging directory for all files committed with absolute output paths.
+   * Tracks partitions with default path that have new files written into them by this task,
+   * e.g. a=1/b=2. Files under these partitions will be saved into staging directory and moved to
+   * destination directory at the end, if `dynamicPartitionOverwrite` is true.
    */
-  private def absPathStagingDir: Path = new Path(path, "_temporary-" + jobId)
+  @transient private var partitionPaths: mutable.Set[String] = null
+
+  /**
+   * The staging directory of this write job. Spark uses it to deal with files with absolute output
+   * path, or writing data into partitioned directory with dynamicPartitionOverwrite=true.
+   */
+  private def stagingDir = new Path(path, ".spark-staging-" + jobId)
 
   protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
     val format = context.getOutputFormatClass.newInstance()
@@ -85,11 +104,16 @@ class HadoopMapReduceCommitProtocol(jobId: String, path: String)
       taskContext: TaskAttemptContext, dir: Option[String], ext: String): String = {
     val filename = getFilename(taskContext, ext)
 
-    val stagingDir: String = committer match {
+    val stagingDir: Path = committer match {
+      case _ if dynamicPartitionOverwrite =>
+        assert(dir.isDefined,
+          "The dataset to be written must be partitioned when dynamicPartitionOverwrite is true.")
+        partitionPaths += dir.get
+        this.stagingDir
       // For FileOutputCommitter it has its own staging path called "work path".
       case f: FileOutputCommitter =>
-        Option(f.getWorkPath).map(_.toString).getOrElse(path)
-      case _ => path
+        new Path(Option(f.getWorkPath).map(_.toString).getOrElse(path))
+      case _ => new Path(path)
     }
 
     dir.map { d =>
@@ -106,8 +130,7 @@ class HadoopMapReduceCommitProtocol(jobId: String, path: String)
 
     // Include a UUID here to prevent file collisions for one task writing to different dirs.
     // In principle we could include hash(absoluteDir) instead but this is simpler.
-    val tmpOutputPath = new Path(
-      absPathStagingDir, UUID.randomUUID().toString() + "-" + filename).toString
+    val tmpOutputPath = new Path(stagingDir, UUID.randomUUID().toString() + "-" + filename).toString
 
     addedAbsPathFiles(tmpOutputPath) = absOutputPath
     tmpOutputPath
@@ -141,23 +164,42 @@ class HadoopMapReduceCommitProtocol(jobId: String, path: String)
 
   override def commitJob(jobContext: JobContext, taskCommits: Seq[TaskCommitMessage]): Unit = {
     committer.commitJob(jobContext)
-    val filesToMove = taskCommits.map(_.obj.asInstanceOf[Map[String, String]])
-      .foldLeft(Map[String, String]())(_ ++ _)
-    logDebug(s"Committing files staged for absolute locations $filesToMove")
+
     if (hasValidPath) {
-      val fs = absPathStagingDir.getFileSystem(jobContext.getConfiguration)
+      val (allAbsPathFiles, allPartitionPaths) =
+        taskCommits.map(_.obj.asInstanceOf[(Map[String, String], Set[String])]).unzip
+      val fs = stagingDir.getFileSystem(jobContext.getConfiguration)
+
+      val filesToMove = allAbsPathFiles.foldLeft(Map[String, String]())(_ ++ _)
+      logDebug(s"Committing files staged for absolute locations $filesToMove")
+      if (dynamicPartitionOverwrite) {
+        val absPartitionPaths = filesToMove.values.map(new Path(_).getParent).toSet
+        logDebug(s"Clean up absolute partition directories for overwriting: $absPartitionPaths")
+        absPartitionPaths.foreach(fs.delete(_, true))
+      }
       for ((src, dst) <- filesToMove) {
         fs.rename(new Path(src), new Path(dst))
       }
-      fs.delete(absPathStagingDir, true)
+
+      if (dynamicPartitionOverwrite) {
+        val partitionPaths = allPartitionPaths.foldLeft(Set[String]())(_ ++ _)
+        logDebug(s"Clean up default partition directories for overwriting: $partitionPaths")
+        for (part <- partitionPaths) {
+          val finalPartPath = new Path(path, part)
+          fs.delete(finalPartPath, true)
+          fs.rename(new Path(stagingDir, part), finalPartPath)
+        }
+      }
+
+      fs.delete(stagingDir, true)
     }
   }
 
   override def abortJob(jobContext: JobContext): Unit = {
     committer.abortJob(jobContext, JobStatus.State.FAILED)
     if (hasValidPath) {
-      val fs = absPathStagingDir.getFileSystem(jobContext.getConfiguration)
-      fs.delete(absPathStagingDir, true)
+      val fs = stagingDir.getFileSystem(jobContext.getConfiguration)
+      fs.delete(stagingDir, true)
     }
   }
 
@@ -165,13 +207,14 @@ class HadoopMapReduceCommitProtocol(jobId: String, path: String)
     committer = setupCommitter(taskContext)
     committer.setupTask(taskContext)
     addedAbsPathFiles = mutable.Map[String, String]()
+    partitionPaths = mutable.Set[String]()
   }
 
   override def commitTask(taskContext: TaskAttemptContext): TaskCommitMessage = {
     val attemptId = taskContext.getTaskAttemptID
     SparkHadoopMapRedUtil.commitTask(
       committer, taskContext, attemptId.getJobID.getId, attemptId.getTaskID.getId)
-    new TaskCommitMessage(addedAbsPathFiles.toMap)
+    new TaskCommitMessage(addedAbsPathFiles.toMap -> partitionPaths.toSet)
   }
 
   override def abortTask(taskContext: TaskAttemptContext): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
index 949d8c677998f..abf39213fa0d2 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
@@ -60,17 +60,17 @@ object SparkHadoopWriter extends Logging {
       config: HadoopWriteConfigUtil[K, V]): Unit = {
     // Extract context and configuration from RDD.
     val sparkContext = rdd.context
-    val stageId = rdd.id
+    val commitJobId = rdd.id
 
     // Set up a job.
     val jobTrackerId = createJobTrackerID(new Date())
-    val jobContext = config.createJobContext(jobTrackerId, stageId)
+    val jobContext = config.createJobContext(jobTrackerId, commitJobId)
     config.initOutputFormat(jobContext)
 
     // Assert the output format/key/value class is set in JobConf.
     config.assertConf(jobContext, rdd.conf)
 
-    val committer = config.createCommitter(stageId)
+    val committer = config.createCommitter(commitJobId)
     committer.setupJob(jobContext)
 
     // Try to write all RDD partitions as a Hadoop OutputFormat.
@@ -80,7 +80,7 @@ object SparkHadoopWriter extends Logging {
           context = context,
           config = config,
           jobTrackerId = jobTrackerId,
-          sparkStageId = context.stageId,
+          commitJobId = commitJobId,
           sparkPartitionId = context.partitionId,
           sparkAttemptNumber = context.attemptNumber,
           committer = committer,
@@ -102,14 +102,14 @@ object SparkHadoopWriter extends Logging {
       context: TaskContext,
       config: HadoopWriteConfigUtil[K, V],
       jobTrackerId: String,
-      sparkStageId: Int,
+      commitJobId: Int,
       sparkPartitionId: Int,
       sparkAttemptNumber: Int,
       committer: FileCommitProtocol,
       iterator: Iterator[(K, V)]): TaskCommitMessage = {
     // Set up a task.
     val taskContext = config.createTaskAttemptContext(
-      jobTrackerId, sparkStageId, sparkPartitionId, sparkAttemptNumber)
+      jobTrackerId, commitJobId, sparkPartitionId, sparkAttemptNumber)
     committer.setupTask(taskContext)
 
     val (outputMetrics, callback) = initHadoopOutputMetrics(context)
diff --git a/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala b/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala
index a5d41a1eeb479..aaae33ca4e6f3 100644
--- a/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala
+++ b/core/src/main/scala/org/apache/spark/launcher/LauncherBackend.scala
@@ -19,7 +19,7 @@ package org.apache.spark.launcher
 
 import java.net.{InetAddress, Socket}
 
-import org.apache.spark.SPARK_VERSION
+import org.apache.spark.{SPARK_VERSION, SparkConf}
 import org.apache.spark.launcher.LauncherProtocol._
 import org.apache.spark.util.{ThreadUtils, Utils}
 
@@ -36,9 +36,14 @@ private[spark] abstract class LauncherBackend {
   private var lastState: SparkAppHandle.State = _
   @volatile private var _isConnected = false
 
+  protected def conf: SparkConf
+
   def connect(): Unit = {
-    val port = sys.env.get(LauncherProtocol.ENV_LAUNCHER_PORT).map(_.toInt)
-    val secret = sys.env.get(LauncherProtocol.ENV_LAUNCHER_SECRET)
+    val port = conf.getOption(LauncherProtocol.CONF_LAUNCHER_PORT)
+      .orElse(sys.env.get(LauncherProtocol.ENV_LAUNCHER_PORT))
+      .map(_.toInt)
+    val secret = conf.getOption(LauncherProtocol.CONF_LAUNCHER_SECRET)
+      .orElse(sys.env.get(LauncherProtocol.ENV_LAUNCHER_SECRET))
     if (port != None && secret != None) {
       val s = new Socket(InetAddress.getLoopbackAddress(), port.get)
       connection = new BackendConnection(s)
diff --git a/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala b/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
index 607283a306b8f..764735dc4eae7 100644
--- a/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
+++ b/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
@@ -70,7 +70,8 @@ object SparkHadoopMapRedUtil extends Logging {
       if (shouldCoordinateWithDriver) {
         val outputCommitCoordinator = SparkEnv.get.outputCommitCoordinator
         val taskAttemptNumber = TaskContext.get().attemptNumber()
-        val canCommit = outputCommitCoordinator.canCommit(jobId, splitId, taskAttemptNumber)
+        val stageId = TaskContext.get().stageId()
+        val canCommit = outputCommitCoordinator.canCommit(stageId, splitId, taskAttemptNumber)
 
         if (canCommit) {
           performCommit()
@@ -80,7 +81,7 @@ object SparkHadoopMapRedUtil extends Logging {
           logInfo(message)
           // We need to abort the task so that the driver can reschedule new attempts, if necessary
           committer.abortTask(mrTaskContext)
-          throw new CommitDeniedException(message, jobId, splitId, taskAttemptNumber)
+          throw new CommitDeniedException(message, stageId, splitId, taskAttemptNumber)
         }
       } else {
         // Speculation is disabled or a user has chosen to manually bypass the commit coordination
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 8798dfc925362..7859781e98223 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -150,7 +150,7 @@ abstract class RDD[T: ClassTag](
   val id: Int = sc.newRddId()
 
   /** A friendly name for this RDD */
-  @transient var name: String = null
+  @transient var name: String = _
 
   /** Assign a name to this RDD */
   def setName(_name: String): this.type = {
@@ -224,8 +224,8 @@ abstract class RDD[T: ClassTag](
 
   // Our dependencies and partitions will be gotten by calling subclass's methods below, and will
   // be overwritten when we're checkpointed
-  private var dependencies_ : Seq[Dependency[_]] = null
-  @transient private var partitions_ : Array[Partition] = null
+  private var dependencies_ : Seq[Dependency[_]] = _
+  @transient private var partitions_ : Array[Partition] = _
 
   /** An Option holding our checkpoint RDD, if we are checkpointed */
   private def checkpointRDD: Option[CheckpointRDD[T]] = checkpointData.flatMap(_.checkpointRDD)
@@ -297,7 +297,7 @@ abstract class RDD[T: ClassTag](
   private[spark] def getNarrowAncestors: Seq[RDD[_]] = {
     val ancestors = new mutable.HashSet[RDD[_]]
 
-    def visit(rdd: RDD[_]) {
+    def visit(rdd: RDD[_]): Unit = {
       val narrowDependencies = rdd.dependencies.filter(_.isInstanceOf[NarrowDependency[_]])
       val narrowParents = narrowDependencies.map(_.rdd)
       val narrowParentsNotVisited = narrowParents.filterNot(ancestors.contains)
@@ -449,7 +449,7 @@ abstract class RDD[T: ClassTag](
     if (shuffle) {
       /** Distributes elements evenly across output partitions, starting from a random partition. */
       val distributePartition = (index: Int, items: Iterator[T]) => {
-        var position = (new Random(hashing.byteswap32(index))).nextInt(numPartitions)
+        var position = new Random(hashing.byteswap32(index)).nextInt(numPartitions)
         items.map { t =>
           // Note that the hash code of the key will just be the key itself. The HashPartitioner
           // will mod it with the number of total partitions.
@@ -951,7 +951,7 @@ abstract class RDD[T: ClassTag](
     def collectPartition(p: Int): Array[T] = {
       sc.runJob(this, (iter: Iterator[T]) => iter.toArray, Seq(p)).head
     }
-    (0 until partitions.length).iterator.flatMap(i => collectPartition(i))
+    partitions.indices.iterator.flatMap(i => collectPartition(i))
   }
 
   /**
@@ -1338,6 +1338,7 @@ abstract class RDD[T: ClassTag](
         // The number of partitions to try in this iteration. It is ok for this number to be
         // greater than totalParts because we actually cap it at totalParts in runJob.
         var numPartsToTry = 1L
+        val left = num - buf.size
         if (partsScanned > 0) {
           // If we didn't find any rows after the previous iteration, quadruple and retry.
           // Otherwise, interpolate the number of partitions we need to try, but overestimate
@@ -1345,13 +1346,12 @@ abstract class RDD[T: ClassTag](
           if (buf.isEmpty) {
             numPartsToTry = partsScanned * scaleUpFactor
           } else {
-            // the left side of max is >=1 whenever partsScanned >= 2
-            numPartsToTry = Math.max((1.5 * num * partsScanned / buf.size).toInt - partsScanned, 1)
+            // As left > 0, numPartsToTry is always >= 1
+            numPartsToTry = Math.ceil(1.5 * left * partsScanned / buf.size).toInt
             numPartsToTry = Math.min(numPartsToTry, partsScanned * scaleUpFactor)
           }
         }
 
-        val left = num - buf.size
         val p = partsScanned.until(math.min(partsScanned + numPartsToTry, totalParts).toInt)
         val res = sc.runJob(this, (it: Iterator[T]) => it.take(left).toArray, p)
 
@@ -1677,8 +1677,7 @@ abstract class RDD[T: ClassTag](
   // an RDD and its parent in every batch, in which case the parent may never be checkpointed
   // and its lineage never truncated, leading to OOMs in the long run (SPARK-6847).
   private val checkpointAllMarkedAncestors =
-    Option(sc.getLocalProperty(RDD.CHECKPOINT_ALL_MARKED_ANCESTORS))
-      .map(_.toBoolean).getOrElse(false)
+    Option(sc.getLocalProperty(RDD.CHECKPOINT_ALL_MARKED_ANCESTORS)).exists(_.toBoolean)
 
   /** Returns the first parent RDD */
   protected[spark] def firstParent[U: ClassTag]: RDD[U] = {
@@ -1686,7 +1685,7 @@ abstract class RDD[T: ClassTag](
   }
 
   /** Returns the jth parent RDD: e.g. rdd.parent[T](0) is equivalent to rdd.firstParent[T] */
-  protected[spark] def parent[U: ClassTag](j: Int) = {
+  protected[spark] def parent[U: ClassTag](j: Int): RDD[U] = {
     dependencies(j).rdd.asInstanceOf[RDD[U]]
   }
 
@@ -1754,7 +1753,7 @@ abstract class RDD[T: ClassTag](
    * collected. Subclasses of RDD may override this method for implementing their own cleaning
    * logic. See [[org.apache.spark.rdd.UnionRDD]] for an example.
    */
-  protected def clearDependencies() {
+  protected def clearDependencies(): Unit = {
     dependencies_ = null
   }
 
@@ -1790,7 +1789,7 @@ abstract class RDD[T: ClassTag](
           val lastDepStrings =
             debugString(lastDep.rdd, prefix, lastDep.isInstanceOf[ShuffleDependency[_, _, _]], true)
 
-          (frontDepStrings ++ lastDepStrings)
+          frontDepStrings ++ lastDepStrings
       }
     }
     // The first RDD in the dependency stack has no parents, so no need for a +-
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
index f951591e02a5c..a2936d6ad539c 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
@@ -332,16 +332,14 @@ private[netty] class NettyRpcEnv(
 
     val pipe = Pipe.open()
     val source = new FileDownloadChannel(pipe.source())
-    try {
+    Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
       val client = downloadClient(parsedUri.getHost(), parsedUri.getPort())
       val callback = new FileDownloadCallback(pipe.sink(), source, client)
       client.stream(parsedUri.getPath(), callback)
-    } catch {
-      case e: Exception =>
-        pipe.sink().close()
-        source.close()
-        throw e
-    }
+    })(catchBlock = {
+      pipe.sink().close()
+      source.close()
+    })
 
     source
   }
@@ -370,24 +368,33 @@ private[netty] class NettyRpcEnv(
     fileDownloadFactory.createClient(host, port)
   }
 
-  private class FileDownloadChannel(source: ReadableByteChannel) extends ReadableByteChannel {
+  private class FileDownloadChannel(source: Pipe.SourceChannel) extends ReadableByteChannel {
 
     @volatile private var error: Throwable = _
 
     def setError(e: Throwable): Unit = {
+      // This setError callback is invoked by internal RPC threads in order to propagate remote
+      // exceptions to application-level threads which are reading from this channel. When an
+      // RPC error occurs, the RPC system will call setError() and then will close the
+      // Pipe.SinkChannel corresponding to the other end of the `source` pipe. Closing of the pipe
+      // sink will cause `source.read()` operations to return EOF, unblocking the application-level
+      // reading thread. Thus there is no need to actually call `source.close()` here in the
+      // onError() callback and, in fact, calling it here would be dangerous because the close()
+      // would be asynchronous with respect to the read() call and could trigger race-conditions
+      // that lead to data corruption. See the PR for SPARK-22982 for more details on this topic.
       error = e
-      source.close()
     }
 
     override def read(dst: ByteBuffer): Int = {
       Try(source.read(dst)) match {
+        // See the documentation above in setError(): if an RPC error has occurred then setError()
+        // will be called to propagate the RPC error and then `source`'s corresponding
+        // Pipe.SinkChannel will be closed, unblocking this read. In that case, we want to propagate
+        // the remote RPC exception (and not any exceptions triggered by the pipe close, such as
+        // ChannelClosedException), hence this `error != null` check:
+        case _ if error != null => throw error
         case Success(bytesRead) => bytesRead
-        case Failure(readErr) =>
-          if (error != null) {
-            throw error
-          } else {
-            throw readErr
-          }
+        case Failure(readErr) => throw readErr
       }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
index 8605e1da161c7..7e14938acd8e0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
@@ -127,8 +127,8 @@ private class AsyncEventQueue(val name: String, conf: SparkConf, metrics: LiveLi
       throw new IllegalStateException(s"Attempted to stop $name that has not yet started!")
     }
     if (stopped.compareAndSet(false, true)) {
-      eventQueue.put(POISON_PILL)
       eventCount.incrementAndGet()
+      eventQueue.put(POISON_PILL)
     }
     dispatchThread.join()
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 9153751d03c1b..199937b8c27af 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -815,7 +815,8 @@ class DAGScheduler(
   private[scheduler] def handleBeginEvent(task: Task[_], taskInfo: TaskInfo) {
     // Note that there is a chance that this task is launched after the stage is cancelled.
     // In that case, we wouldn't have the stage anymore in stageIdToStage.
-    val stageAttemptId = stageIdToStage.get(task.stageId).map(_.latestInfo.attemptId).getOrElse(-1)
+    val stageAttemptId =
+      stageIdToStage.get(task.stageId).map(_.latestInfo.attemptNumber).getOrElse(-1)
     listenerBus.post(SparkListenerTaskStart(task.stageId, stageAttemptId, taskInfo))
   }
 
@@ -1050,7 +1051,7 @@ class DAGScheduler(
             val locs = taskIdToLocations(id)
             val part = stage.rdd.partitions(id)
             stage.pendingPartitions += id
-            new ShuffleMapTask(stage.id, stage.latestInfo.attemptId,
+            new ShuffleMapTask(stage.id, stage.latestInfo.attemptNumber,
               taskBinary, part, locs, properties, serializedTaskMetrics, Option(jobId),
               Option(sc.applicationId), sc.applicationAttemptId)
           }
@@ -1060,7 +1061,7 @@ class DAGScheduler(
             val p: Int = stage.partitions(id)
             val part = stage.rdd.partitions(p)
             val locs = taskIdToLocations(id)
-            new ResultTask(stage.id, stage.latestInfo.attemptId,
+            new ResultTask(stage.id, stage.latestInfo.attemptNumber,
               taskBinary, part, locs, id, properties, serializedTaskMetrics,
               Option(jobId), Option(sc.applicationId), sc.applicationAttemptId)
           }
@@ -1076,7 +1077,7 @@ class DAGScheduler(
       logInfo(s"Submitting ${tasks.size} missing tasks from $stage (${stage.rdd}) (first 15 " +
         s"tasks are for partitions ${tasks.take(15).map(_.partitionId)})")
       taskScheduler.submitTasks(new TaskSet(
-        tasks.toArray, stage.id, stage.latestInfo.attemptId, jobId, properties))
+        tasks.toArray, stage.id, stage.latestInfo.attemptNumber, jobId, properties))
     } else {
       // Because we posted SparkListenerStageSubmitted earlier, we should mark
       // the stage as completed here in case there are no tasks to run
@@ -1187,9 +1188,17 @@ class DAGScheduler(
     // only updated in certain cases.
     event.reason match {
       case Success =>
-        stage match {
-          case rs: ResultStage if rs.activeJob.isEmpty =>
-            // Ignore update if task's job has finished.
+        task match {
+          case rt: ResultTask[_, _] =>
+            val resultStage = stage.asInstanceOf[ResultStage]
+            resultStage.activeJob match {
+              case Some(job) =>
+                // Only update the accumulator once for each result task.
+                if (!job.finished(rt.outputId)) {
+                  updateAccumulators(event)
+                }
+              case None => // Ignore update if task's job has finished.
+            }
           case _ =>
             updateAccumulators(event)
         }
@@ -1237,7 +1246,7 @@ class DAGScheduler(
             val status = event.result.asInstanceOf[MapStatus]
             val execId = status.location.executorId
             logDebug("ShuffleMapTask finished on " + execId)
-            if (stageIdToStage(task.stageId).latestInfo.attemptId == task.stageAttemptId) {
+            if (stageIdToStage(task.stageId).latestInfo.attemptNumber == task.stageAttemptId) {
               // This task was for the currently running attempt of the stage. Since the task
               // completed successfully from the perspective of the TaskSetManager, mark it as
               // no longer pending (the TaskSetManager may consider the task complete even
@@ -1316,10 +1325,10 @@ class DAGScheduler(
         val failedStage = stageIdToStage(task.stageId)
         val mapStage = shuffleIdToMapStage(shuffleId)
 
-        if (failedStage.latestInfo.attemptId != task.stageAttemptId) {
+        if (failedStage.latestInfo.attemptNumber != task.stageAttemptId) {
           logInfo(s"Ignoring fetch failure from $task as it's from $failedStage attempt" +
             s" ${task.stageAttemptId} and there is a more recent attempt for that stage " +
-            s"(attempt ID ${failedStage.latestInfo.attemptId}) running")
+            s"(attempt ${failedStage.latestInfo.attemptNumber}) running")
         } else {
           // It is likely that we receive multiple FetchFailed for a single stage (because we have
           // multiple tasks running concurrently on different executors). In that case, it is
diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index a77adc5ff3545..b3a5b1f1e05b3 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -352,14 +352,8 @@ private[spark] object EventLoggingListener extends Logging {
    */
   def openEventLog(log: Path, fs: FileSystem): InputStream = {
     val in = new BufferedInputStream(fs.open(log))
-
-    // Compression codec is encoded as an extension, e.g. app_123.lzf
-    // Since we sanitize the app ID to not include periods, it is safe to split on it
-    val logName = log.getName.stripSuffix(IN_PROGRESS)
-    val codecName: Option[String] = logName.split("\\.").tail.lastOption
-
     try {
-      val codec = codecName.map { c =>
+      val codec = codecName(log).map { c =>
         codecMap.getOrElseUpdate(c, CompressionCodec.createCodec(new SparkConf, c))
       }
       codec.map(_.compressedInputStream(in)).getOrElse(in)
@@ -370,4 +364,11 @@ private[spark] object EventLoggingListener extends Logging {
     }
   }
 
+  def codecName(log: Path): Option[String] = {
+    // Compression codec is encoded as an extension, e.g. app_123.lzf
+    // Since we sanitize the app ID to not include periods, it is safe to split on it
+    val logName = log.getName.stripSuffix(IN_PROGRESS)
+    logName.split("\\.").tail.lastOption
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
index 2f93c497c5771..ba6387a8f08ad 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
@@ -62,6 +62,9 @@ private[spark] class LiveListenerBus(conf: SparkConf) {
 
   private val queues = new CopyOnWriteArrayList[AsyncEventQueue]()
 
+  // Visible for testing.
+  @volatile private[scheduler] var queuedEvents = new mutable.ListBuffer[SparkListenerEvent]()
+
   /** Add a listener to queue shared by all non-internal listeners. */
   def addToSharedQueue(listener: SparkListenerInterface): Unit = {
     addToQueue(listener, SHARED_QUEUE)
@@ -87,7 +90,9 @@ private[spark] class LiveListenerBus(conf: SparkConf) {
    * of each other (each one uses a separate thread for delivering events), allowing slower
    * listeners to be somewhat isolated from others.
    */
-  private def addToQueue(listener: SparkListenerInterface, queue: String): Unit = synchronized {
+  private[spark] def addToQueue(
+      listener: SparkListenerInterface,
+      queue: String): Unit = synchronized {
     if (stopped.get()) {
       throw new IllegalStateException("LiveListenerBus is stopped.")
     }
@@ -123,13 +128,39 @@ private[spark] class LiveListenerBus(conf: SparkConf) {
 
   /** Post an event to all queues. */
   def post(event: SparkListenerEvent): Unit = {
-    if (!stopped.get()) {
-      metrics.numEventsPosted.inc()
-      val it = queues.iterator()
-      while (it.hasNext()) {
-        it.next().post(event)
+    if (stopped.get()) {
+      return
+    }
+
+    metrics.numEventsPosted.inc()
+
+    // If the event buffer is null, it means the bus has been started and we can avoid
+    // synchronization and post events directly to the queues. This should be the most
+    // common case during the life of the bus.
+    if (queuedEvents == null) {
+      postToQueues(event)
+      return
+    }
+
+    // Otherwise, need to synchronize to check whether the bus is started, to make sure the thread
+    // calling start() picks up the new event.
+    synchronized {
+      if (!started.get()) {
+        queuedEvents += event
+        return
       }
     }
+
+    // If the bus was already started when the check above was made, just post directly to the
+    // queues.
+    postToQueues(event)
+  }
+
+  private def postToQueues(event: SparkListenerEvent): Unit = {
+    val it = queues.iterator()
+    while (it.hasNext()) {
+      it.next().post(event)
+    }
   }
 
   /**
@@ -147,7 +178,11 @@ private[spark] class LiveListenerBus(conf: SparkConf) {
     }
 
     this.sparkContext = sc
-    queues.asScala.foreach(_.start(sc))
+    queues.asScala.foreach { q =>
+      q.start(sc)
+      queuedEvents.foreach(q.post)
+    }
+    queuedEvents = null
     metricsSystem.registerSource(metrics)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index 26a6a3effc9ac..c9cd662f5709d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -69,6 +69,8 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
       eventsFilter: ReplayEventsFilter): Unit = {
     var currentLine: String = null
     var lineNumber: Int = 0
+    val unrecognizedEvents = new scala.collection.mutable.HashSet[String]
+    val unrecognizedProperties = new scala.collection.mutable.HashSet[String]
 
     try {
       val lineEntries = lines
@@ -84,16 +86,22 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
 
           postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
         } catch {
-          case e: ClassNotFoundException if KNOWN_REMOVED_CLASSES.contains(e.getMessage) =>
-            // Ignore events generated by Structured Streaming in Spark 2.0.0 and 2.0.1.
-            // It's safe since no place uses them.
-            logWarning(s"Dropped incompatible Structured Streaming log: $currentLine")
-          case e: UnrecognizedPropertyException if e.getMessage != null && e.getMessage.startsWith(
-            "Unrecognized field \"queryStatus\" " +
-              "(class org.apache.spark.sql.streaming.StreamingQueryListener$") =>
-            // Ignore events generated by Structured Streaming in Spark 2.0.2
-            // It's safe since no place uses them.
-            logWarning(s"Dropped incompatible Structured Streaming log: $currentLine")
+          case e: ClassNotFoundException =>
+            // Ignore unknown events, parse through the event log file.
+            // To avoid spamming, warnings are only displayed once for each unknown event.
+            if (!unrecognizedEvents.contains(e.getMessage)) {
+              logWarning(s"Drop unrecognized event: ${e.getMessage}")
+              unrecognizedEvents.add(e.getMessage)
+            }
+            logDebug(s"Drop incompatible event log: $currentLine")
+          case e: UnrecognizedPropertyException =>
+            // Ignore unrecognized properties, parse through the event log file.
+            // To avoid spamming, warnings are only displayed once for each unrecognized property.
+            if (!unrecognizedProperties.contains(e.getMessage)) {
+              logWarning(s"Drop unrecognized property: ${e.getMessage}")
+              unrecognizedProperties.add(e.getMessage)
+            }
+            logDebug(s"Drop incompatible event log: $currentLine")
           case jpe: JsonParseException =>
             // We can only ignore exception from last line of the file that might be truncated
             // the last entry may not be the very last line in the event log, but we treat it
@@ -125,13 +133,4 @@ private[spark] object ReplayListenerBus {
 
   // utility filter that selects all event logs during replay
   val SELECT_ALL_FILTER: ReplayEventsFilter = { (eventString: String) => true }
-
-  /**
-   * Classes that were removed. Structured Streaming doesn't use them any more. However, parsing
-   * old json may fail and we can just ignore these failures.
-   */
-  val KNOWN_REMOVED_CLASSES = Set(
-    "org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgress",
-    "org.apache.spark.sql.streaming.StreamingQueryListener$QueryTerminated"
-  )
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
index c513ed36d1680..903e25b7986f2 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
@@ -30,7 +30,7 @@ import org.apache.spark.storage.RDDInfo
 @DeveloperApi
 class StageInfo(
     val stageId: Int,
-    val attemptId: Int,
+    @deprecated("Use attemptNumber instead", "2.3.0") val attemptId: Int,
     val name: String,
     val numTasks: Int,
     val rddInfos: Seq[RDDInfo],
@@ -56,6 +56,8 @@ class StageInfo(
     completionTime = Some(System.currentTimeMillis)
   }
 
+  def attemptNumber(): Int = attemptId
+
   private[spark] def getStatusString: String = {
     if (completionTime.isDefined) {
       if (failureReason.isDefined) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala b/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
index 3c8cab7504c17..3c7af4f6146fa 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
@@ -79,7 +79,7 @@ class StatsReportListener extends SparkListener with Logging {
       x => info.completionTime.getOrElse(System.currentTimeMillis()) - x
     ).getOrElse("-")
 
-    s"Stage(${info.stageId}, ${info.attemptId}); Name: '${info.name}'; " +
+    s"Stage(${info.stageId}, ${info.attemptNumber}); Name: '${info.name}'; " +
       s"Status: ${info.getStatusString}$failureReason; numTasks: ${info.numTasks}; " +
       s"Took: $timeTaken msec"
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 7767ef1803a06..f536fc2a5f0a1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -79,6 +79,7 @@ private[spark] abstract class Task[T](
     SparkEnv.get.blockManager.registerTask(taskAttemptId)
     context = new TaskContextImpl(
       stageId,
+      stageAttemptId, // stageAttemptId and stageAttemptNumber are semantically equal
       partitionId,
       taskAttemptId,
       attemptNumber,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 665a082d1ef78..c5763ceda3eab 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -488,11 +488,11 @@ private[spark] class TaskSetManager(
             abort(s"$msg Exception during serialization: $e")
             throw new TaskNotSerializableException(e)
         }
-        if (serializedTask.limit > TaskSetManager.TASK_SIZE_TO_WARN_KB * 1024 &&
+        if (serializedTask.limit() > TaskSetManager.TASK_SIZE_TO_WARN_KB * 1024 &&
           !emittedTaskSizeWarning) {
           emittedTaskSizeWarning = true
           logWarning(s"Stage ${task.stageId} contains a task of very large size " +
-            s"(${serializedTask.limit / 1024} KB). The maximum recommended task size is " +
+            s"(${serializedTask.limit() / 1024} KB). The maximum recommended task size is " +
             s"${TaskSetManager.TASK_SIZE_TO_WARN_KB} KB.")
         }
         addRunningTask(taskId)
@@ -502,7 +502,7 @@ private[spark] class TaskSetManager(
         // val timeTaken = clock.getTime() - startTime
         val taskName = s"task ${info.id} in stage ${taskSet.id}"
         logInfo(s"Starting $taskName (TID $taskId, $host, executor ${info.executorId}, " +
-          s"partition ${task.partitionId}, $taskLocality, ${serializedTask.limit} bytes)")
+          s"partition ${task.partitionId}, $taskLocality, ${serializedTask.limit()} bytes)")
 
         sched.dagScheduler.taskStarted(task, info)
         new TaskDescription(
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index b6155527a1995..cc046b0aaa9d4 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -95,6 +95,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   // The num of current max ExecutorId used to re-register appMaster
   @volatile protected var currentExecutorIdCounter = 0
 
+  private val reviveThread =
+    ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-revive-thread")
+
   class DriverEndpoint(override val rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
     extends ThreadSafeRpcEndpoint with Logging {
 
@@ -103,9 +106,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
     protected val addressToExecutorId = new HashMap[RpcAddress, String]
 
-    private val reviveThread =
-      ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-revive-thread")
-
     override def onStart() {
       // Periodically revive offers to allow delay scheduling to work
       val reviveIntervalMs = conf.getTimeAsMs("spark.scheduler.revive.interval", "1s")
@@ -154,6 +154,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         executorDataMap.values.foreach { ed =>
           ed.executorEndpoint.send(UpdateDelegationTokens(newDelegationTokens))
         }
+
+      case RemoveExecutor(executorId, reason) =>
+        // We will remove the executor's state and cannot restore it. However, the connection
+        // between the driver and the executor may be still alive so that the executor won't exit
+        // automatically, so try to tell the executor to stop itself. See SPARK-13519.
+        executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor))
+        removeExecutor(executorId, reason)
     }
 
     override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
@@ -182,7 +189,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           addressToExecutorId(executorAddress) = executorId
           totalCoreCount.addAndGet(cores)
           totalRegisteredExecutors.addAndGet(1)
-          val data = new ExecutorData(executorRef, executorRef.address, hostname,
+          val data = new ExecutorData(executorRef, executorAddress, hostname,
             cores, cores, logUrls)
           // This must be synchronized because variables mutated
           // in this block are read when requesting executors
@@ -215,14 +222,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         }
         context.reply(true)
 
-      case RemoveExecutor(executorId, reason) =>
-        // We will remove the executor's state and cannot restore it. However, the connection
-        // between the driver and the executor may be still alive so that the executor won't exit
-        // automatically, so try to tell the executor to stop itself. See SPARK-13519.
-        executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor))
-        removeExecutor(executorId, reason)
-        context.reply(true)
-
       case RemoveWorker(workerId, host, message) =>
         removeWorker(workerId, host, message)
         context.reply(true)
@@ -288,13 +287,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     private def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
       for (task <- tasks.flatten) {
         val serializedTask = TaskDescription.encode(task)
-        if (serializedTask.limit >= maxRpcMessageSize) {
+        if (serializedTask.limit() >= maxRpcMessageSize) {
           scheduler.taskIdToTaskSetManager.get(task.taskId).foreach { taskSetMgr =>
             try {
               var msg = "Serialized task %s:%d was %d bytes, which exceeds max allowed: " +
                 "spark.rpc.message.maxSize (%d bytes). Consider increasing " +
                 "spark.rpc.message.maxSize or using broadcast variables for large values."
-              msg = msg.format(task.taskId, task.index, serializedTask.limit, maxRpcMessageSize)
+              msg = msg.format(task.taskId, task.index, serializedTask.limit(), maxRpcMessageSize)
               taskSetMgr.abort(msg)
             } catch {
               case e: Exception => logError("Exception in error callback", e)
@@ -373,10 +372,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
       shouldDisable
     }
-
-    override def onStop() {
-      reviveThread.shutdownNow()
-    }
   }
 
   var driverEndpoint: RpcEndpointRef = null
@@ -417,6 +412,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   }
 
   override def stop() {
+    reviveThread.shutdownNow()
     stopExecutors()
     try {
       if (driverEndpoint != null) {
@@ -465,9 +461,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * at once.
    */
   protected def removeExecutor(executorId: String, reason: ExecutorLossReason): Unit = {
-    // Only log the failure since we don't care about the result.
-    driverEndpoint.ask[Boolean](RemoveExecutor(executorId, reason)).failed.foreach(t =>
-      logError(t.getMessage, t))(ThreadUtils.sameThread)
+    driverEndpoint.send(RemoveExecutor(executorId, reason))
   }
 
   protected def removeWorker(workerId: String, host: String, message: String): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackendUtils.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackendUtils.scala
new file mode 100644
index 0000000000000..c166d030f2c89
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackendUtils.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.scheduler.cluster
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.config.{DYN_ALLOCATION_MAX_EXECUTORS, DYN_ALLOCATION_MIN_EXECUTORS, EXECUTOR_INSTANCES}
+import org.apache.spark.util.Utils
+
+private[spark] object SchedulerBackendUtils {
+  val DEFAULT_NUMBER_EXECUTORS = 2
+
+  /**
+   * Getting the initial target number of executors depends on whether dynamic allocation is
+   * enabled.
+   * If not using dynamic allocation it gets the number of executors requested by the user.
+   */
+  def getInitialTargetExecutorNumber(
+      conf: SparkConf,
+      numExecutors: Int = DEFAULT_NUMBER_EXECUTORS): Int = {
+    if (Utils.isDynamicAllocationEnabled(conf)) {
+      val minNumExecutors = conf.get(DYN_ALLOCATION_MIN_EXECUTORS)
+      val initialNumExecutors = Utils.getDynamicAllocationInitialExecutors(conf)
+      val maxNumExecutors = conf.get(DYN_ALLOCATION_MAX_EXECUTORS)
+      require(initialNumExecutors >= minNumExecutors && initialNumExecutors <= maxNumExecutors,
+        s"initial executor number $initialNumExecutors must between min executor number " +
+          s"$minNumExecutors and max executor number $maxNumExecutors")
+
+      initialNumExecutors
+    } else {
+      conf.get(EXECUTOR_INSTANCES).getOrElse(numExecutors)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 505c342a889ee..f73a58ff5d48c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -45,6 +45,7 @@ private[spark] class StandaloneSchedulerBackend(
   private var client: StandaloneAppClient = null
   private val stopping = new AtomicBoolean(false)
   private val launcherBackend = new LauncherBackend() {
+    override protected def conf: SparkConf = sc.conf
     override protected def onStopRequest(): Unit = stop(SparkAppHandle.State.KILLED)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
index 35509bc2f85b9..4c614c5c0f602 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
@@ -105,6 +105,7 @@ private[spark] class LocalSchedulerBackend(
   private val userClassPath = getUserClasspath(conf)
   private val listenerBus = scheduler.sc.listenerBus
   private val launcherBackend = new LauncherBackend() {
+    override def conf: SparkConf = LocalSchedulerBackend.this.conf
     override def onStopRequest(): Unit = stop(SparkAppHandle.State.KILLED)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 2259d1a2d555d..538ae05e4eea1 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -181,20 +181,25 @@ class KryoSerializer(conf: SparkConf)
 
     // We can't load those class directly in order to avoid unnecessary jar dependencies.
     // We load them safely, ignore it if the class not found.
-    Seq("org.apache.spark.mllib.linalg.Vector",
-      "org.apache.spark.mllib.linalg.DenseVector",
-      "org.apache.spark.mllib.linalg.SparseVector",
-      "org.apache.spark.mllib.linalg.Matrix",
-      "org.apache.spark.mllib.linalg.DenseMatrix",
-      "org.apache.spark.mllib.linalg.SparseMatrix",
-      "org.apache.spark.ml.linalg.Vector",
+    Seq(
+      "org.apache.spark.ml.feature.Instance",
+      "org.apache.spark.ml.feature.LabeledPoint",
+      "org.apache.spark.ml.feature.OffsetInstance",
+      "org.apache.spark.ml.linalg.DenseMatrix",
       "org.apache.spark.ml.linalg.DenseVector",
-      "org.apache.spark.ml.linalg.SparseVector",
       "org.apache.spark.ml.linalg.Matrix",
-      "org.apache.spark.ml.linalg.DenseMatrix",
       "org.apache.spark.ml.linalg.SparseMatrix",
-      "org.apache.spark.ml.feature.Instance",
-      "org.apache.spark.ml.feature.OffsetInstance"
+      "org.apache.spark.ml.linalg.SparseVector",
+      "org.apache.spark.ml.linalg.Vector",
+      "org.apache.spark.ml.tree.impl.TreePoint",
+      "org.apache.spark.mllib.clustering.VectorWithNorm",
+      "org.apache.spark.mllib.linalg.DenseMatrix",
+      "org.apache.spark.mllib.linalg.DenseVector",
+      "org.apache.spark.mllib.linalg.Matrix",
+      "org.apache.spark.mllib.linalg.SparseMatrix",
+      "org.apache.spark.mllib.linalg.SparseVector",
+      "org.apache.spark.mllib.linalg.Vector",
+      "org.apache.spark.mllib.regression.LabeledPoint"
     ).foreach { name =>
       try {
         val clazz = Utils.classForName(name)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index 94a3a78e94165..266ee42e39cca 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -18,10 +18,9 @@
 package org.apache.spark.shuffle
 
 import java.io._
+import java.nio.channels.Channels
 import java.nio.file.Files
 
-import com.google.common.io.ByteStreams
-
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.internal.Logging
 import org.apache.spark.io.NioBufferedFileInputStream
@@ -142,8 +141,7 @@ private[spark] class IndexShuffleBlockResolver(
     val indexFile = getIndexFile(shuffleId, mapId)
     val indexTmp = Utils.tempFileWith(indexFile)
     try {
-      val out = new DataOutputStream(
-        new BufferedOutputStream(Files.newOutputStream(indexTmp.toPath)))
+      val out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(indexTmp)))
       Utils.tryWithSafeFinally {
         // We take in lengths of each block, need to convert it to offsets.
         var offset = 0L
@@ -198,11 +196,24 @@ private[spark] class IndexShuffleBlockResolver(
     // find out the consolidated file, then the offset within that from our index
     val indexFile = getIndexFile(blockId.shuffleId, blockId.mapId)
 
-    val in = new DataInputStream(Files.newInputStream(indexFile.toPath))
+    // SPARK-22982: if this FileInputStream's position is seeked forward by another piece of code
+    // which is incorrectly using our file descriptor then this code will fetch the wrong offsets
+    // (which may cause a reducer to be sent a different reducer's data). The explicit position
+    // checks added here were a useful debugging aid during SPARK-22982 and may help prevent this
+    // class of issue from re-occurring in the future which is why they are left here even though
+    // SPARK-22982 is fixed.
+    val channel = Files.newByteChannel(indexFile.toPath)
+    channel.position(blockId.reduceId * 8)
+    val in = new DataInputStream(Channels.newInputStream(channel))
     try {
-      ByteStreams.skipFully(in, blockId.reduceId * 8)
       val offset = in.readLong()
       val nextOffset = in.readLong()
+      val actualPosition = channel.position()
+      val expectedPosition = blockId.reduceId * 8 + 16
+      if (actualPosition != expectedPosition) {
+        throw new Exception(s"SPARK-22982: Incorrect channel position after index file reads: " +
+          s"expected $expectedPosition but actual position was $actualPosition.")
+      }
       new FileSegmentManagedBuffer(
         transportConf,
         getDataFile(blockId.shuffleId, blockId.mapId),
diff --git a/core/src/main/scala/org/apache/spark/status/AppHistoryServerPlugin.scala b/core/src/main/scala/org/apache/spark/status/AppHistoryServerPlugin.scala
new file mode 100644
index 0000000000000..d144a0e998fa1
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/AppHistoryServerPlugin.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status
+
+import org.apache.spark.SparkConf
+import org.apache.spark.scheduler.SparkListener
+import org.apache.spark.ui.SparkUI
+
+/**
+ * An interface for creating history listeners(to replay event logs) defined in other modules like
+ * SQL, and setup the UI of the plugin to rebuild the history UI.
+ */
+private[spark] trait AppHistoryServerPlugin {
+  /**
+   * Creates listeners to replay the event logs.
+   */
+  def createListeners(conf: SparkConf, store: ElementTrackingStore): Seq[SparkListener]
+
+  /**
+   * Sets up UI of this plugin to rebuild the history UI.
+   */
+  def setupUI(ui: SparkUI): Unit
+}
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index f2d8e0a5480ba..b4edcf23abc09 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -18,7 +18,10 @@
 package org.apache.spark.status
 
 import java.util.Date
+import java.util.concurrent.ConcurrentHashMap
+import java.util.function.Function
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
 
 import org.apache.spark._
@@ -29,7 +32,6 @@ import org.apache.spark.status.api.v1
 import org.apache.spark.storage._
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.ui.scope._
-import org.apache.spark.util.kvstore.KVStore
 
 /**
  * A Spark listener that writes application information to a data store. The types written to the
@@ -39,7 +41,7 @@ import org.apache.spark.util.kvstore.KVStore
  *                       unfinished tasks can be more accurately calculated (see SPARK-21922).
  */
 private[spark] class AppStatusListener(
-    kvstore: KVStore,
+    kvstore: ElementTrackingStore,
     conf: SparkConf,
     live: Boolean,
     lastUpdateTime: Option[Long] = None) extends SparkListener with Logging {
@@ -48,6 +50,7 @@ private[spark] class AppStatusListener(
 
   private var sparkVersion = SPARK_VERSION
   private var appInfo: v1.ApplicationInfo = null
+  private var appSummary = new AppSummary(0, 0)
   private var coresPerTask: Int = 1
 
   // How often to update live entities. -1 means "never update" when replaying applications,
@@ -55,16 +58,37 @@ private[spark] class AppStatusListener(
   // operations that we can live without when rapidly processing incoming task events.
   private val liveUpdatePeriodNs = if (live) conf.get(LIVE_ENTITY_UPDATE_PERIOD) else -1L
 
+  private val maxTasksPerStage = conf.get(MAX_RETAINED_TASKS_PER_STAGE)
   private val maxGraphRootNodes = conf.get(MAX_RETAINED_ROOT_NODES)
 
   // Keep track of live entities, so that task metrics can be efficiently updated (without
   // causing too many writes to the underlying store, and other expensive operations).
-  private val liveStages = new HashMap[(Int, Int), LiveStage]()
+  private val liveStages = new ConcurrentHashMap[(Int, Int), LiveStage]()
   private val liveJobs = new HashMap[Int, LiveJob]()
   private val liveExecutors = new HashMap[String, LiveExecutor]()
   private val liveTasks = new HashMap[Long, LiveTask]()
   private val liveRDDs = new HashMap[Int, LiveRDD]()
   private val pools = new HashMap[String, SchedulerPool]()
+  // Keep the active executor count as a separate variable to avoid having to do synchronization
+  // around liveExecutors.
+  @volatile private var activeExecutorCount = 0
+
+  kvstore.addTrigger(classOf[ExecutorSummaryWrapper], conf.get(MAX_RETAINED_DEAD_EXECUTORS))
+    { count => cleanupExecutors(count) }
+
+  kvstore.addTrigger(classOf[JobDataWrapper], conf.get(MAX_RETAINED_JOBS)) { count =>
+    cleanupJobs(count)
+  }
+
+  kvstore.addTrigger(classOf[StageDataWrapper], conf.get(MAX_RETAINED_STAGES)) { count =>
+    cleanupStages(count)
+  }
+
+  kvstore.onFlush {
+    if (!live) {
+      flush()
+    }
+  }
 
   override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
     case SparkListenerLogStart(version) => sparkVersion = version
@@ -74,7 +98,7 @@ private[spark] class AppStatusListener(
   override def onApplicationStart(event: SparkListenerApplicationStart): Unit = {
     assert(event.appId.isDefined, "Application without IDs are not supported.")
 
-    val attempt = new v1.ApplicationAttemptInfo(
+    val attempt = v1.ApplicationAttemptInfo(
       event.appAttemptId,
       new Date(event.time),
       new Date(-1),
@@ -84,7 +108,7 @@ private[spark] class AppStatusListener(
       false,
       sparkVersion)
 
-    appInfo = new v1.ApplicationInfo(
+    appInfo = v1.ApplicationInfo(
       event.appId.get,
       event.appName,
       None,
@@ -94,6 +118,18 @@ private[spark] class AppStatusListener(
       Seq(attempt))
 
     kvstore.write(new ApplicationInfoWrapper(appInfo))
+    kvstore.write(appSummary)
+
+    // Update the driver block manager with logs from this event. The SparkContext initialization
+    // code registers the driver before this event is sent.
+    event.driverLogs.foreach { logs =>
+      val driver = liveExecutors.get(SparkContext.DRIVER_IDENTIFIER)
+        .orElse(liveExecutors.get(SparkContext.LEGACY_DRIVER_IDENTIFIER))
+      driver.foreach { d =>
+        d.executorLogs = logs.toMap
+        update(d, System.nanoTime())
+      }
+    }
   }
 
   override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = {
@@ -119,7 +155,7 @@ private[spark] class AppStatusListener(
 
   override def onApplicationEnd(event: SparkListenerApplicationEnd): Unit = {
     val old = appInfo.attempts.head
-    val attempt = new v1.ApplicationAttemptInfo(
+    val attempt = v1.ApplicationAttemptInfo(
       old.attemptId,
       old.startTime,
       new Date(event.time),
@@ -129,7 +165,7 @@ private[spark] class AppStatusListener(
       true,
       old.appSparkVersion)
 
-    appInfo = new v1.ApplicationInfo(
+    appInfo = v1.ApplicationInfo(
       appInfo.id,
       appInfo.name,
       None,
@@ -155,10 +191,11 @@ private[spark] class AppStatusListener(
   override def onExecutorRemoved(event: SparkListenerExecutorRemoved): Unit = {
     liveExecutors.remove(event.executorId).foreach { exec =>
       val now = System.nanoTime()
+      activeExecutorCount = math.max(0, activeExecutorCount - 1)
       exec.isActive = false
       exec.removeTime = new Date(event.time)
       exec.removeReason = event.reason
-      update(exec, now)
+      update(exec, now, last = true)
 
       // Remove all RDD distributions that reference the removed executor, in case there wasn't
       // a corresponding event.
@@ -268,13 +305,15 @@ private[spark] class AppStatusListener(
       val now = System.nanoTime()
 
       // Check if there are any pending stages that match this job; mark those as skipped.
-      job.stageIds.foreach { sid =>
-        val pending = liveStages.filter { case ((id, _), _) => id == sid }
-        pending.foreach { case (key, stage) =>
+      val it = liveStages.entrySet.iterator()
+      while (it.hasNext()) {
+        val e = it.next()
+        if (job.stageIds.contains(e.getKey()._1)) {
+          val stage = e.getValue()
           stage.status = v1.StageStatus.SKIPPED
           job.skippedStages += stage.info.stageId
           job.skippedTasks += stage.info.numTasks
-          liveStages.remove(key)
+          it.remove()
           update(stage, now)
         }
       }
@@ -285,8 +324,11 @@ private[spark] class AppStatusListener(
       }
 
       job.completionTime = if (event.time > 0) Some(new Date(event.time)) else None
-      update(job, now)
+      update(job, now, last = true)
     }
+
+    appSummary = new AppSummary(appSummary.numCompletedJobs + 1, appSummary.numCompletedStages)
+    kvstore.write(appSummary)
   }
 
   override def onStageSubmitted(event: SparkListenerStageSubmitted): Unit = {
@@ -303,10 +345,6 @@ private[spark] class AppStatusListener(
       .toSeq
     stage.jobIds = stage.jobs.map(_.jobId).toSet
 
-    stage.schedulingPool = Option(event.properties).flatMap { p =>
-      Option(p.getProperty("spark.scheduler.pool"))
-    }.getOrElse(SparkUI.DEFAULT_POOL_NAME)
-
     stage.description = Option(event.properties).flatMap { p =>
       Option(p.getProperty(SparkContext.SPARK_JOB_DESCRIPTION))
     }
@@ -336,15 +374,26 @@ private[spark] class AppStatusListener(
     liveTasks.put(event.taskInfo.taskId, task)
     liveUpdate(task, now)
 
-    liveStages.get((event.stageId, event.stageAttemptId)).foreach { stage =>
+    Option(liveStages.get((event.stageId, event.stageAttemptId))).foreach { stage =>
       stage.activeTasks += 1
       stage.firstLaunchTime = math.min(stage.firstLaunchTime, event.taskInfo.launchTime)
+
+      val locality = event.taskInfo.taskLocality.toString()
+      val count = stage.localitySummary.getOrElse(locality, 0L) + 1L
+      stage.localitySummary = stage.localitySummary ++ Map(locality -> count)
       maybeUpdate(stage, now)
 
       stage.jobs.foreach { job =>
         job.activeTasks += 1
         maybeUpdate(job, now)
       }
+
+      if (stage.savedTasks.incrementAndGet() > maxTasksPerStage && !stage.cleaning) {
+        stage.cleaning = true
+        kvstore.doAsync {
+          cleanupTasks(stage)
+        }
+      }
     }
 
     liveExecutors.get(event.taskInfo.executorId).foreach { exec =>
@@ -388,7 +437,7 @@ private[spark] class AppStatusListener(
       }
       task.errorMessage = errorMessage
       val delta = task.updateMetrics(event.taskMetrics)
-      update(task, now)
+      update(task, now, last = true)
       delta
     }.orNull
 
@@ -403,9 +452,9 @@ private[spark] class AppStatusListener(
         (0, 1, 0)
     }
 
-    liveStages.get((event.stageId, event.stageAttemptId)).foreach { stage =>
+    Option(liveStages.get((event.stageId, event.stageAttemptId))).foreach { stage =>
       if (metricsDelta != null) {
-        stage.metrics.update(metricsDelta)
+        stage.metrics = LiveEntityHelpers.addMetrics(stage.metrics, metricsDelta)
       }
       stage.activeTasks -= 1
       stage.completedTasks += completedDelta
@@ -441,9 +490,16 @@ private[spark] class AppStatusListener(
       esummary.failedTasks += failedDelta
       esummary.killedTasks += killedDelta
       if (metricsDelta != null) {
-        esummary.metrics.update(metricsDelta)
+        esummary.metrics = LiveEntityHelpers.addMetrics(esummary.metrics, metricsDelta)
       }
       maybeUpdate(esummary, now)
+
+      if (!stage.cleaning && stage.savedTasks.get() > maxTasksPerStage) {
+        stage.cleaning = true
+        kvstore.doAsync {
+          cleanupTasks(stage)
+        }
+      }
     }
 
     liveExecutors.get(event.taskInfo.executorId).foreach { exec =>
@@ -466,12 +522,20 @@ private[spark] class AppStatusListener(
         }
       }
 
-      maybeUpdate(exec, now)
+      // Force an update on live applications when the number of active tasks reaches 0. This is
+      // checked in some tests (e.g. SQLTestUtilsBase) so it needs to be reliably up to date.
+      if (exec.activeTasks == 0) {
+        liveUpdate(exec, now)
+      } else {
+        maybeUpdate(exec, now)
+      }
     }
   }
 
   override def onStageCompleted(event: SparkListenerStageCompleted): Unit = {
-    liveStages.remove((event.stageInfo.stageId, event.stageInfo.attemptId)).foreach { stage =>
+    val maybeStage =
+      Option(liveStages.remove((event.stageInfo.stageId, event.stageInfo.attemptNumber)))
+    maybeStage.foreach { stage =>
       val now = System.nanoTime()
       stage.info = event.stageInfo
 
@@ -504,8 +568,11 @@ private[spark] class AppStatusListener(
       }
 
       stage.executorSummaries.values.foreach(update(_, now))
-      update(stage, now)
+      update(stage, now, last = true)
     }
+
+    appSummary = new AppSummary(appSummary.numCompletedJobs, appSummary.numCompletedStages + 1)
+    kvstore.write(appSummary)
   }
 
   override def onBlockManagerAdded(event: SparkListenerBlockManagerAdded): Unit = {
@@ -540,12 +607,12 @@ private[spark] class AppStatusListener(
         val delta = task.updateMetrics(metrics)
         maybeUpdate(task, now)
 
-        liveStages.get((sid, sAttempt)).foreach { stage =>
-          stage.metrics.update(delta)
+        Option(liveStages.get((sid, sAttempt))).foreach { stage =>
+          stage.metrics = LiveEntityHelpers.addMetrics(stage.metrics, delta)
           maybeUpdate(stage, now)
 
           val esummary = stage.executorSummary(event.execId)
-          esummary.metrics.update(delta)
+          esummary.metrics = LiveEntityHelpers.addMetrics(esummary.metrics, delta)
           maybeUpdate(esummary, now)
         }
       }
@@ -561,9 +628,9 @@ private[spark] class AppStatusListener(
   }
 
   /** Flush all live entities' data to the underlying store. */
-  def flush(): Unit = {
+  private def flush(): Unit = {
     val now = System.nanoTime()
-    liveStages.values.foreach { stage =>
+    liveStages.values.asScala.foreach { stage =>
       update(stage, now)
       stage.executorSummaries.values.foreach(update(_, now))
     }
@@ -574,6 +641,18 @@ private[spark] class AppStatusListener(
     pools.values.foreach(update(_, now))
   }
 
+  /**
+   * Shortcut to get active stages quickly in a live application, for use by the console
+   * progress bar.
+   */
+  def activeStages(): Seq[v1.StageData] = {
+    liveStages.values.asScala
+      .filter(_.info.submissionTime.isDefined)
+      .map(_.toApi())
+      .toList
+      .sortBy(_.stageId)
+  }
+
   private def updateRDDBlock(event: SparkListenerBlockUpdated, block: RDDBlockId): Unit = {
     val now = System.nanoTime()
     val executorId = event.blockUpdatedInfo.blockManagerId.executorId
@@ -615,7 +694,7 @@ private[spark] class AppStatusListener(
     // can update the executor information too.
     liveRDDs.get(block.rddId).foreach { rdd =>
       if (updatedStorageLevel.isDefined) {
-        rdd.storageLevel = updatedStorageLevel.get
+        rdd.setStorageLevel(updatedStorageLevel.get)
       }
 
       val partition = rdd.partition(block.name)
@@ -684,7 +763,10 @@ private[spark] class AppStatusListener(
   }
 
   private def getOrCreateExecutor(executorId: String, addTime: Long): LiveExecutor = {
-    liveExecutors.getOrElseUpdate(executorId, new LiveExecutor(executorId, addTime))
+    liveExecutors.getOrElseUpdate(executorId, {
+      activeExecutorCount += 1
+      new LiveExecutor(executorId, addTime)
+    })
   }
 
   private def updateStreamBlock(event: SparkListenerBlockUpdated, stream: StreamBlockId): Unit = {
@@ -708,7 +790,10 @@ private[spark] class AppStatusListener(
   }
 
   private def getOrCreateStage(info: StageInfo): LiveStage = {
-    val stage = liveStages.getOrElseUpdate((info.stageId, info.attemptId), new LiveStage())
+    val stage = liveStages.computeIfAbsent((info.stageId, info.attemptNumber),
+      new Function[(Int, Int), LiveStage]() {
+        override def apply(key: (Int, Int)): LiveStage = new LiveStage()
+      })
     stage.info = info
     stage
   }
@@ -727,13 +812,13 @@ private[spark] class AppStatusListener(
     }
   }
 
-  private def update(entity: LiveEntity, now: Long): Unit = {
-    entity.write(kvstore, now)
+  private def update(entity: LiveEntity, now: Long, last: Boolean = false): Unit = {
+    entity.write(kvstore, now, checkTriggers = last)
   }
 
   /** Update a live entity only if it hasn't been updated in the last configured period. */
   private def maybeUpdate(entity: LiveEntity, now: Long): Unit = {
-    if (liveUpdatePeriodNs >= 0 && now - entity.lastWriteTime > liveUpdatePeriodNs) {
+    if (live && liveUpdatePeriodNs >= 0 && now - entity.lastWriteTime > liveUpdatePeriodNs) {
       update(entity, now)
     }
   }
@@ -745,4 +830,150 @@ private[spark] class AppStatusListener(
     }
   }
 
+  private def cleanupExecutors(count: Long): Unit = {
+    // Because the limit is on the number of *dead* executors, we need to calculate whether
+    // there are actually enough dead executors to be deleted.
+    val threshold = conf.get(MAX_RETAINED_DEAD_EXECUTORS)
+    val dead = count - activeExecutorCount
+
+    if (dead > threshold) {
+      val countToDelete = calculateNumberToRemove(dead, threshold)
+      val toDelete = kvstore.view(classOf[ExecutorSummaryWrapper]).index("active")
+        .max(countToDelete).first(false).last(false).asScala.toSeq
+      toDelete.foreach { e => kvstore.delete(e.getClass(), e.info.id) }
+    }
+  }
+
+  private def cleanupJobs(count: Long): Unit = {
+    val countToDelete = calculateNumberToRemove(count, conf.get(MAX_RETAINED_JOBS))
+    if (countToDelete <= 0L) {
+      return
+    }
+
+    val toDelete = KVUtils.viewToSeq(kvstore.view(classOf[JobDataWrapper]),
+        countToDelete.toInt) { j =>
+      j.info.status != JobExecutionStatus.RUNNING && j.info.status != JobExecutionStatus.UNKNOWN
+    }
+    toDelete.foreach { j => kvstore.delete(j.getClass(), j.info.jobId) }
+  }
+
+  private def cleanupStages(count: Long): Unit = {
+    val countToDelete = calculateNumberToRemove(count, conf.get(MAX_RETAINED_STAGES))
+    if (countToDelete <= 0L) {
+      return
+    }
+
+    val stages = KVUtils.viewToSeq(kvstore.view(classOf[StageDataWrapper]),
+        countToDelete.toInt) { s =>
+      s.info.status != v1.StageStatus.ACTIVE && s.info.status != v1.StageStatus.PENDING
+    }
+
+    stages.foreach { s =>
+      val key = Array(s.info.stageId, s.info.attemptId)
+      kvstore.delete(s.getClass(), key)
+
+      val execSummaries = kvstore.view(classOf[ExecutorStageSummaryWrapper])
+        .index("stage")
+        .first(key)
+        .last(key)
+        .asScala
+        .toSeq
+      execSummaries.foreach { e =>
+        kvstore.delete(e.getClass(), e.id)
+      }
+
+      val tasks = kvstore.view(classOf[TaskDataWrapper])
+        .index("stage")
+        .first(key)
+        .last(key)
+        .asScala
+
+      tasks.foreach { t =>
+        kvstore.delete(t.getClass(), t.taskId)
+      }
+
+      // Check whether there are remaining attempts for the same stage. If there aren't, then
+      // also delete the RDD graph data.
+      val remainingAttempts = kvstore.view(classOf[StageDataWrapper])
+        .index("stageId")
+        .first(s.info.stageId)
+        .last(s.info.stageId)
+        .closeableIterator()
+
+      val hasMoreAttempts = try {
+        remainingAttempts.asScala.exists { other =>
+          other.info.attemptId != s.info.attemptId
+        }
+      } finally {
+        remainingAttempts.close()
+      }
+
+      if (!hasMoreAttempts) {
+        kvstore.delete(classOf[RDDOperationGraphWrapper], s.info.stageId)
+      }
+
+      cleanupCachedQuantiles(key)
+    }
+  }
+
+  private def cleanupTasks(stage: LiveStage): Unit = {
+    val countToDelete = calculateNumberToRemove(stage.savedTasks.get(), maxTasksPerStage).toInt
+    if (countToDelete > 0) {
+      val stageKey = Array(stage.info.stageId, stage.info.attemptNumber)
+      val view = kvstore.view(classOf[TaskDataWrapper]).index("stage").first(stageKey)
+        .last(stageKey)
+
+      // Try to delete finished tasks only.
+      val toDelete = KVUtils.viewToSeq(view, countToDelete) { t =>
+        !live || t.status != TaskState.RUNNING.toString()
+      }
+      toDelete.foreach { t => kvstore.delete(t.getClass(), t.taskId) }
+      stage.savedTasks.addAndGet(-toDelete.size)
+
+      // If there are more running tasks than the configured limit, delete running tasks. This
+      // should be extremely rare since the limit should generally far exceed the number of tasks
+      // that can run in parallel.
+      val remaining = countToDelete - toDelete.size
+      if (remaining > 0) {
+        val runningTasksToDelete = view.max(remaining).iterator().asScala.toList
+        runningTasksToDelete.foreach { t => kvstore.delete(t.getClass(), t.taskId) }
+        stage.savedTasks.addAndGet(-remaining)
+      }
+
+      // On live applications, cleanup any cached quantiles for the stage. This makes sure that
+      // quantiles will be recalculated after tasks are replaced with newer ones.
+      //
+      // This is not needed in the SHS since caching only happens after the event logs are
+      // completely processed.
+      if (live) {
+        cleanupCachedQuantiles(stageKey)
+      }
+    }
+    stage.cleaning = false
+  }
+
+  private def cleanupCachedQuantiles(stageKey: Array[Int]): Unit = {
+    val cachedQuantiles = kvstore.view(classOf[CachedQuantile])
+      .index("stage")
+      .first(stageKey)
+      .last(stageKey)
+      .asScala
+      .toList
+    cachedQuantiles.foreach { q =>
+      kvstore.delete(q.getClass(), q.id)
+    }
+  }
+
+  /**
+   * Remove at least (retainedSize / 10) items to reduce friction. Because tracking may be done
+   * asynchronously, this method may return 0 in case enough items have been deleted already.
+   */
+  private def calculateNumberToRemove(dataSize: Long, retainedSize: Long): Long = {
+    if (dataSize > retainedSize) {
+      math.max(retainedSize / 10L, dataSize - retainedSize)
+    } else {
+      0L
+    }
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusPlugin.scala b/core/src/main/scala/org/apache/spark/status/AppStatusPlugin.scala
deleted file mode 100644
index 69ca02ec76293..0000000000000
--- a/core/src/main/scala/org/apache/spark/status/AppStatusPlugin.scala
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.status
-
-import java.util.ServiceLoader
-
-import scala.collection.JavaConverters._
-
-import org.apache.spark.SparkConf
-import org.apache.spark.scheduler.SparkListener
-import org.apache.spark.ui.SparkUI
-import org.apache.spark.util.Utils
-import org.apache.spark.util.kvstore.KVStore
-
-/**
- * An interface that defines plugins for collecting and storing application state.
- *
- * The plugin implementations are invoked for both live and replayed applications. For live
- * applications, it's recommended that plugins defer creation of UI tabs until there's actual
- * data to be shown.
- */
-private[spark] trait AppStatusPlugin {
-
-  /**
-   * Install listeners to collect data about the running application and populate the given
-   * store.
-   *
-   * @param conf The Spark configuration.
-   * @param store The KVStore where to keep application data.
-   * @param addListenerFn Function to register listeners with a bus.
-   * @param live Whether this is a live application (or an application being replayed by the
-   *             HistoryServer).
-   */
-  def setupListeners(
-      conf: SparkConf,
-      store: KVStore,
-      addListenerFn: SparkListener => Unit,
-      live: Boolean): Unit
-
-  /**
-   * Install any needed extensions (tabs, pages, etc) to a Spark UI. The plugin can detect whether
-   * the app is live or replayed by looking at the UI's SparkContext field `sc`.
-   *
-   * @param ui The Spark UI instance for the application.
-   */
-  def setupUI(ui: SparkUI): Unit
-
-}
-
-private[spark] object AppStatusPlugin {
-
-  def loadPlugins(): Iterable[AppStatusPlugin] = {
-    ServiceLoader.load(classOf[AppStatusPlugin], Utils.getContextOrSparkClassLoader).asScala
-  }
-
-}
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index d0615e5dd0223..efc28538a33db 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -17,13 +17,11 @@
 
 package org.apache.spark.status
 
-import java.io.File
-import java.util.{Arrays, List => JList}
+import java.util.{List => JList}
 
 import scala.collection.JavaConverters._
 
 import org.apache.spark.{JobExecutionStatus, SparkConf}
-import org.apache.spark.scheduler.SparkListener
 import org.apache.spark.status.api.v1
 import org.apache.spark.ui.scope._
 import org.apache.spark.util.{Distribution, Utils}
@@ -32,7 +30,9 @@ import org.apache.spark.util.kvstore.{InMemoryStore, KVStore}
 /**
  * A wrapper around a KVStore that provides methods for accessing the API data stored within.
  */
-private[spark] class AppStatusStore(val store: KVStore) {
+private[spark] class AppStatusStore(
+    val store: KVStore,
+    val listener: Option[AppStatusListener] = None) {
 
   def applicationInfo(): v1.ApplicationInfo = {
     store.view(classOf[ApplicationInfoWrapper]).max(1).iterator().next().info
@@ -70,6 +70,14 @@ private[spark] class AppStatusStore(val store: KVStore) {
     store.read(classOf[ExecutorSummaryWrapper], executorId).info
   }
 
+  /**
+   * This is used by ConsoleProgressBar to quickly fetch active stages for drawing the progress
+   * bar. It will only return anything useful when called from a live application.
+   */
+  def activeStages(): Seq[v1.StageData] = {
+    listener.map(_.activeStages()).getOrElse(Nil)
+  }
+
   def stageList(statuses: JList[v1.StageStatus]): Seq[v1.StageData] = {
     val it = store.view(classOf[StageDataWrapper]).reverse().asScala.map(_.info)
     if (statuses != null && !statuses.isEmpty()) {
@@ -90,7 +98,11 @@ private[spark] class AppStatusStore(val store: KVStore) {
     val it = store.view(classOf[StageDataWrapper]).index("stageId").reverse().first(stageId)
       .closeableIterator()
     try {
-      it.next().info
+      if (it.hasNext()) {
+        it.next().info
+      } else {
+        throw new NoSuchElementException(s"No stage with id $stageId")
+      }
     } finally {
       it.close()
     }
@@ -102,107 +114,238 @@ private[spark] class AppStatusStore(val store: KVStore) {
     if (details) stageWithDetails(stage) else stage
   }
 
+  def taskCount(stageId: Int, stageAttemptId: Int): Long = {
+    store.count(classOf[TaskDataWrapper], "stage", Array(stageId, stageAttemptId))
+  }
+
+  def localitySummary(stageId: Int, stageAttemptId: Int): Map[String, Long] = {
+    store.read(classOf[StageDataWrapper], Array(stageId, stageAttemptId)).locality
+  }
+
+  /**
+   * Calculates a summary of the task metrics for the given stage attempt, returning the
+   * requested quantiles for the recorded metrics.
+   *
+   * This method can be expensive if the requested quantiles are not cached; the method
+   * will only cache certain quantiles (every 0.05 step), so it's recommended to stick to
+   * those to avoid expensive scans of all task data.
+   */
   def taskSummary(
       stageId: Int,
       stageAttemptId: Int,
-      quantiles: Array[Double]): v1.TaskMetricDistributions = {
-
-    val stage = Array(stageId, stageAttemptId)
-
-    val rawMetrics = store.view(classOf[TaskDataWrapper])
-      .index("stage")
-      .first(stage)
-      .last(stage)
-      .asScala
-      .flatMap(_.info.taskMetrics)
-      .toList
-      .view
-
-    def metricQuantiles(f: v1.TaskMetrics => Double): IndexedSeq[Double] =
-      Distribution(rawMetrics.map { d => f(d) }).get.getQuantiles(quantiles)
-
-    // We need to do a lot of similar munging to nested metrics here.  For each one,
-    // we want (a) extract the values for nested metrics (b) make a distribution for each metric
-    // (c) shove the distribution into the right field in our return type and (d) only return
-    // a result if the option is defined for any of the tasks.  MetricHelper is a little util
-    // to make it a little easier to deal w/ all of the nested options.  Mostly it lets us just
-    // implement one "build" method, which just builds the quantiles for each field.
-
-    val inputMetrics =
-      new MetricHelper[v1.InputMetrics, v1.InputMetricDistributions](rawMetrics, quantiles) {
-        def getSubmetrics(raw: v1.TaskMetrics): v1.InputMetrics = raw.inputMetrics
-
-        def build: v1.InputMetricDistributions = new v1.InputMetricDistributions(
-          bytesRead = submetricQuantiles(_.bytesRead),
-          recordsRead = submetricQuantiles(_.recordsRead)
-        )
-      }.build
-
-    val outputMetrics =
-      new MetricHelper[v1.OutputMetrics, v1.OutputMetricDistributions](rawMetrics, quantiles) {
-        def getSubmetrics(raw: v1.TaskMetrics): v1.OutputMetrics = raw.outputMetrics
-
-        def build: v1.OutputMetricDistributions = new v1.OutputMetricDistributions(
-          bytesWritten = submetricQuantiles(_.bytesWritten),
-          recordsWritten = submetricQuantiles(_.recordsWritten)
-        )
-      }.build
-
-    val shuffleReadMetrics =
-      new MetricHelper[v1.ShuffleReadMetrics, v1.ShuffleReadMetricDistributions](rawMetrics,
-        quantiles) {
-        def getSubmetrics(raw: v1.TaskMetrics): v1.ShuffleReadMetrics =
-          raw.shuffleReadMetrics
-
-        def build: v1.ShuffleReadMetricDistributions = new v1.ShuffleReadMetricDistributions(
-          readBytes = submetricQuantiles { s => s.localBytesRead + s.remoteBytesRead },
-          readRecords = submetricQuantiles(_.recordsRead),
-          remoteBytesRead = submetricQuantiles(_.remoteBytesRead),
-          remoteBytesReadToDisk = submetricQuantiles(_.remoteBytesReadToDisk),
-          remoteBlocksFetched = submetricQuantiles(_.remoteBlocksFetched),
-          localBlocksFetched = submetricQuantiles(_.localBlocksFetched),
-          totalBlocksFetched = submetricQuantiles { s =>
-            s.localBlocksFetched + s.remoteBlocksFetched
-          },
-          fetchWaitTime = submetricQuantiles(_.fetchWaitTime)
-        )
-      }.build
-
-    val shuffleWriteMetrics =
-      new MetricHelper[v1.ShuffleWriteMetrics, v1.ShuffleWriteMetricDistributions](rawMetrics,
-        quantiles) {
-        def getSubmetrics(raw: v1.TaskMetrics): v1.ShuffleWriteMetrics =
-          raw.shuffleWriteMetrics
-
-        def build: v1.ShuffleWriteMetricDistributions = new v1.ShuffleWriteMetricDistributions(
-          writeBytes = submetricQuantiles(_.bytesWritten),
-          writeRecords = submetricQuantiles(_.recordsWritten),
-          writeTime = submetricQuantiles(_.writeTime)
-        )
-      }.build
-
-    new v1.TaskMetricDistributions(
+      unsortedQuantiles: Array[Double]): Option[v1.TaskMetricDistributions] = {
+    val stageKey = Array(stageId, stageAttemptId)
+    val quantiles = unsortedQuantiles.sorted
+
+    // We don't know how many tasks remain in the store that actually have metrics. So scan one
+    // metric and count how many valid tasks there are. Use skip() instead of next() since it's
+    // cheaper for disk stores (avoids deserialization).
+    val count = {
+      Utils.tryWithResource(
+        store.view(classOf[TaskDataWrapper])
+          .parent(stageKey)
+          .index(TaskIndexNames.EXEC_RUN_TIME)
+          .first(0L)
+          .closeableIterator()
+      ) { it =>
+        var _count = 0L
+        while (it.hasNext()) {
+          _count += 1
+          it.skip(1)
+        }
+        _count
+      }
+    }
+
+    if (count <= 0) {
+      return None
+    }
+
+    // Find out which quantiles are already cached. The data in the store must match the expected
+    // task count to be considered, otherwise it will be re-scanned and overwritten.
+    val cachedQuantiles = quantiles.filter(shouldCacheQuantile).flatMap { q =>
+      val qkey = Array(stageId, stageAttemptId, quantileToString(q))
+      asOption(store.read(classOf[CachedQuantile], qkey)).filter(_.taskCount == count)
+    }
+
+    // If there are no missing quantiles, return the data. Otherwise, just compute everything
+    // to make the code simpler.
+    if (cachedQuantiles.size == quantiles.size) {
+      def toValues(fn: CachedQuantile => Double): IndexedSeq[Double] = cachedQuantiles.map(fn)
+
+      val distributions = new v1.TaskMetricDistributions(
+        quantiles = quantiles,
+        executorDeserializeTime = toValues(_.executorDeserializeTime),
+        executorDeserializeCpuTime = toValues(_.executorDeserializeCpuTime),
+        executorRunTime = toValues(_.executorRunTime),
+        executorCpuTime = toValues(_.executorCpuTime),
+        resultSize = toValues(_.resultSize),
+        jvmGcTime = toValues(_.jvmGcTime),
+        resultSerializationTime = toValues(_.resultSerializationTime),
+        gettingResultTime = toValues(_.gettingResultTime),
+        schedulerDelay = toValues(_.schedulerDelay),
+        peakExecutionMemory = toValues(_.peakExecutionMemory),
+        memoryBytesSpilled = toValues(_.memoryBytesSpilled),
+        diskBytesSpilled = toValues(_.diskBytesSpilled),
+        inputMetrics = new v1.InputMetricDistributions(
+          toValues(_.bytesRead),
+          toValues(_.recordsRead)),
+        outputMetrics = new v1.OutputMetricDistributions(
+          toValues(_.bytesWritten),
+          toValues(_.recordsWritten)),
+        shuffleReadMetrics = new v1.ShuffleReadMetricDistributions(
+          toValues(_.shuffleReadBytes),
+          toValues(_.shuffleRecordsRead),
+          toValues(_.shuffleRemoteBlocksFetched),
+          toValues(_.shuffleLocalBlocksFetched),
+          toValues(_.shuffleFetchWaitTime),
+          toValues(_.shuffleRemoteBytesRead),
+          toValues(_.shuffleRemoteBytesReadToDisk),
+          toValues(_.shuffleTotalBlocksFetched)),
+        shuffleWriteMetrics = new v1.ShuffleWriteMetricDistributions(
+          toValues(_.shuffleWriteBytes),
+          toValues(_.shuffleWriteRecords),
+          toValues(_.shuffleWriteTime)))
+
+      return Some(distributions)
+    }
+
+    // Compute quantiles by scanning the tasks in the store. This is not really stable for live
+    // stages (e.g. the number of recorded tasks may change while this code is running), but should
+    // stabilize once the stage finishes. It's also slow, especially with disk stores.
+    val indices = quantiles.map { q => math.min((q * count).toLong, count - 1) }
+
+    def scanTasks(index: String)(fn: TaskDataWrapper => Long): IndexedSeq[Double] = {
+      Utils.tryWithResource(
+        store.view(classOf[TaskDataWrapper])
+          .parent(stageKey)
+          .index(index)
+          .first(0L)
+          .closeableIterator()
+      ) { it =>
+        var last = Double.NaN
+        var currentIdx = -1L
+        indices.map { idx =>
+          if (idx == currentIdx) {
+            last
+          } else {
+            val diff = idx - currentIdx
+            currentIdx = idx
+            if (it.skip(diff - 1)) {
+              last = fn(it.next()).toDouble
+              last
+            } else {
+              Double.NaN
+            }
+          }
+        }.toIndexedSeq
+      }
+    }
+
+    val computedQuantiles = new v1.TaskMetricDistributions(
       quantiles = quantiles,
-      executorDeserializeTime = metricQuantiles(_.executorDeserializeTime),
-      executorDeserializeCpuTime = metricQuantiles(_.executorDeserializeCpuTime),
-      executorRunTime = metricQuantiles(_.executorRunTime),
-      executorCpuTime = metricQuantiles(_.executorCpuTime),
-      resultSize = metricQuantiles(_.resultSize),
-      jvmGcTime = metricQuantiles(_.jvmGcTime),
-      resultSerializationTime = metricQuantiles(_.resultSerializationTime),
-      memoryBytesSpilled = metricQuantiles(_.memoryBytesSpilled),
-      diskBytesSpilled = metricQuantiles(_.diskBytesSpilled),
-      inputMetrics = inputMetrics,
-      outputMetrics = outputMetrics,
-      shuffleReadMetrics = shuffleReadMetrics,
-      shuffleWriteMetrics = shuffleWriteMetrics
-    )
+      executorDeserializeTime = scanTasks(TaskIndexNames.DESER_TIME) { t =>
+        t.executorDeserializeTime
+      },
+      executorDeserializeCpuTime = scanTasks(TaskIndexNames.DESER_CPU_TIME) { t =>
+        t.executorDeserializeCpuTime
+      },
+      executorRunTime = scanTasks(TaskIndexNames.EXEC_RUN_TIME) { t => t.executorRunTime },
+      executorCpuTime = scanTasks(TaskIndexNames.EXEC_CPU_TIME) { t => t.executorCpuTime },
+      resultSize = scanTasks(TaskIndexNames.RESULT_SIZE) { t => t.resultSize },
+      jvmGcTime = scanTasks(TaskIndexNames.GC_TIME) { t => t.jvmGcTime },
+      resultSerializationTime = scanTasks(TaskIndexNames.SER_TIME) { t =>
+        t.resultSerializationTime
+      },
+      gettingResultTime = scanTasks(TaskIndexNames.GETTING_RESULT_TIME) { t =>
+        t.gettingResultTime
+      },
+      schedulerDelay = scanTasks(TaskIndexNames.SCHEDULER_DELAY) { t => t.schedulerDelay },
+      peakExecutionMemory = scanTasks(TaskIndexNames.PEAK_MEM) { t => t.peakExecutionMemory },
+      memoryBytesSpilled = scanTasks(TaskIndexNames.MEM_SPILL) { t => t.memoryBytesSpilled },
+      diskBytesSpilled = scanTasks(TaskIndexNames.DISK_SPILL) { t => t.diskBytesSpilled },
+      inputMetrics = new v1.InputMetricDistributions(
+        scanTasks(TaskIndexNames.INPUT_SIZE) { t => t.inputBytesRead },
+        scanTasks(TaskIndexNames.INPUT_RECORDS) { t => t.inputRecordsRead }),
+      outputMetrics = new v1.OutputMetricDistributions(
+        scanTasks(TaskIndexNames.OUTPUT_SIZE) { t => t.outputBytesWritten },
+        scanTasks(TaskIndexNames.OUTPUT_RECORDS) { t => t.outputRecordsWritten }),
+      shuffleReadMetrics = new v1.ShuffleReadMetricDistributions(
+        scanTasks(TaskIndexNames.SHUFFLE_TOTAL_READS) { m =>
+          m.shuffleLocalBytesRead + m.shuffleRemoteBytesRead
+        },
+        scanTasks(TaskIndexNames.SHUFFLE_READ_RECORDS) { t => t.shuffleRecordsRead },
+        scanTasks(TaskIndexNames.SHUFFLE_REMOTE_BLOCKS) { t => t.shuffleRemoteBlocksFetched },
+        scanTasks(TaskIndexNames.SHUFFLE_LOCAL_BLOCKS) { t => t.shuffleLocalBlocksFetched },
+        scanTasks(TaskIndexNames.SHUFFLE_READ_TIME) { t => t.shuffleFetchWaitTime },
+        scanTasks(TaskIndexNames.SHUFFLE_REMOTE_READS) { t => t.shuffleRemoteBytesRead },
+        scanTasks(TaskIndexNames.SHUFFLE_REMOTE_READS_TO_DISK) { t =>
+          t.shuffleRemoteBytesReadToDisk
+        },
+        scanTasks(TaskIndexNames.SHUFFLE_TOTAL_BLOCKS) { m =>
+          m.shuffleLocalBlocksFetched + m.shuffleRemoteBlocksFetched
+        }),
+      shuffleWriteMetrics = new v1.ShuffleWriteMetricDistributions(
+        scanTasks(TaskIndexNames.SHUFFLE_WRITE_SIZE) { t => t.shuffleBytesWritten },
+        scanTasks(TaskIndexNames.SHUFFLE_WRITE_RECORDS) { t => t.shuffleRecordsWritten },
+        scanTasks(TaskIndexNames.SHUFFLE_WRITE_TIME) { t => t.shuffleWriteTime }))
+
+    // Go through the computed quantiles and cache the values that match the caching criteria.
+    computedQuantiles.quantiles.zipWithIndex
+      .filter { case (q, _) => quantiles.contains(q) && shouldCacheQuantile(q) }
+      .foreach { case (q, idx) =>
+        val cached = new CachedQuantile(stageId, stageAttemptId, quantileToString(q), count,
+          executorDeserializeTime = computedQuantiles.executorDeserializeTime(idx),
+          executorDeserializeCpuTime = computedQuantiles.executorDeserializeCpuTime(idx),
+          executorRunTime = computedQuantiles.executorRunTime(idx),
+          executorCpuTime = computedQuantiles.executorCpuTime(idx),
+          resultSize = computedQuantiles.resultSize(idx),
+          jvmGcTime = computedQuantiles.jvmGcTime(idx),
+          resultSerializationTime = computedQuantiles.resultSerializationTime(idx),
+          gettingResultTime = computedQuantiles.gettingResultTime(idx),
+          schedulerDelay = computedQuantiles.schedulerDelay(idx),
+          peakExecutionMemory = computedQuantiles.peakExecutionMemory(idx),
+          memoryBytesSpilled = computedQuantiles.memoryBytesSpilled(idx),
+          diskBytesSpilled = computedQuantiles.diskBytesSpilled(idx),
+
+          bytesRead = computedQuantiles.inputMetrics.bytesRead(idx),
+          recordsRead = computedQuantiles.inputMetrics.recordsRead(idx),
+
+          bytesWritten = computedQuantiles.outputMetrics.bytesWritten(idx),
+          recordsWritten = computedQuantiles.outputMetrics.recordsWritten(idx),
+
+          shuffleReadBytes = computedQuantiles.shuffleReadMetrics.readBytes(idx),
+          shuffleRecordsRead = computedQuantiles.shuffleReadMetrics.readRecords(idx),
+          shuffleRemoteBlocksFetched =
+            computedQuantiles.shuffleReadMetrics.remoteBlocksFetched(idx),
+          shuffleLocalBlocksFetched = computedQuantiles.shuffleReadMetrics.localBlocksFetched(idx),
+          shuffleFetchWaitTime = computedQuantiles.shuffleReadMetrics.fetchWaitTime(idx),
+          shuffleRemoteBytesRead = computedQuantiles.shuffleReadMetrics.remoteBytesRead(idx),
+          shuffleRemoteBytesReadToDisk =
+            computedQuantiles.shuffleReadMetrics.remoteBytesReadToDisk(idx),
+          shuffleTotalBlocksFetched = computedQuantiles.shuffleReadMetrics.totalBlocksFetched(idx),
+
+          shuffleWriteBytes = computedQuantiles.shuffleWriteMetrics.writeBytes(idx),
+          shuffleWriteRecords = computedQuantiles.shuffleWriteMetrics.writeRecords(idx),
+          shuffleWriteTime = computedQuantiles.shuffleWriteMetrics.writeTime(idx))
+        store.write(cached)
+      }
+
+    Some(computedQuantiles)
   }
 
+  /**
+   * Whether to cache information about a specific metric quantile. We cache quantiles at every 0.05
+   * step, which covers the default values used both in the API and in the stages page.
+   */
+  private def shouldCacheQuantile(q: Double): Boolean = (math.round(q * 100) % 5) == 0
+
+  private def quantileToString(q: Double): String = math.round(q * 100).toString
+
   def taskList(stageId: Int, stageAttemptId: Int, maxTasks: Int): Seq[v1.TaskData] = {
     val stageKey = Array(stageId, stageAttemptId)
     store.view(classOf[TaskDataWrapper]).index("stage").first(stageKey).last(stageKey).reverse()
-      .max(maxTasks).asScala.map(_.info).toSeq.reverse
+      .max(maxTasks).asScala.map(_.toApi).toSeq.reverse
   }
 
   def taskList(
@@ -211,18 +354,43 @@ private[spark] class AppStatusStore(val store: KVStore) {
       offset: Int,
       length: Int,
       sortBy: v1.TaskSorting): Seq[v1.TaskData] = {
+    val (indexName, ascending) = sortBy match {
+      case v1.TaskSorting.ID =>
+        (None, true)
+      case v1.TaskSorting.INCREASING_RUNTIME =>
+        (Some(TaskIndexNames.EXEC_RUN_TIME), true)
+      case v1.TaskSorting.DECREASING_RUNTIME =>
+        (Some(TaskIndexNames.EXEC_RUN_TIME), false)
+    }
+    taskList(stageId, stageAttemptId, offset, length, indexName, ascending)
+  }
+
+  def taskList(
+      stageId: Int,
+      stageAttemptId: Int,
+      offset: Int,
+      length: Int,
+      sortBy: Option[String],
+      ascending: Boolean): Seq[v1.TaskData] = {
     val stageKey = Array(stageId, stageAttemptId)
     val base = store.view(classOf[TaskDataWrapper])
     val indexed = sortBy match {
-      case v1.TaskSorting.ID =>
+      case Some(index) =>
+        base.index(index).parent(stageKey)
+
+      case _ =>
+        // Sort by ID, which is the "stage" index.
         base.index("stage").first(stageKey).last(stageKey)
-      case v1.TaskSorting.INCREASING_RUNTIME =>
-        base.index("runtime").first(stageKey ++ Array(-1L)).last(stageKey ++ Array(Long.MaxValue))
-      case v1.TaskSorting.DECREASING_RUNTIME =>
-        base.index("runtime").first(stageKey ++ Array(Long.MaxValue)).last(stageKey ++ Array(-1L))
-          .reverse()
     }
-    indexed.skip(offset).max(length).asScala.map(_.info).toSeq
+
+    val ordered = if (ascending) indexed else indexed.reverse()
+    ordered.skip(offset).max(length).asScala.map(_.toApi).toSeq
+  }
+
+  def executorSummary(stageId: Int, attemptId: Int): Map[String, v1.ExecutorStageSummary] = {
+    val stageKey = Array(stageId, attemptId)
+    store.view(classOf[ExecutorStageSummaryWrapper]).index("stage").first(stageKey).last(stageKey)
+      .asScala.map { exec => (exec.executorId -> exec.info) }.toMap
   }
 
   def rddList(cachedOnly: Boolean = true): Seq[v1.RDDStorageInfo] = {
@@ -248,12 +416,6 @@ private[spark] class AppStatusStore(val store: KVStore) {
       .map { t => (t.taskId, t) }
       .toMap
 
-    val stageKey = Array(stage.stageId, stage.attemptId)
-    val execs = store.view(classOf[ExecutorStageSummaryWrapper]).index("stage").first(stageKey)
-      .last(stageKey).closeableIterator().asScala
-      .map { exec => (exec.executorId -> exec.info) }
-      .toMap
-
     new v1.StageData(
       stage.status,
       stage.stageId,
@@ -287,7 +449,7 @@ private[spark] class AppStatusStore(val store: KVStore) {
       stage.rddIds,
       stage.accumulatorUpdates,
       Some(tasks),
-      Some(execs),
+      Some(executorSummary(stage.stageId, stage.attemptId)),
       stage.killedTasksSummary)
   }
 
@@ -320,6 +482,10 @@ private[spark] class AppStatusStore(val store: KVStore) {
     store.read(classOf[PoolData], name)
   }
 
+  def appSummary(): AppSummary = {
+    store.read(classOf[AppSummary], classOf[AppSummary].getName())
+  }
+
   def close(): Unit = {
     store.close()
   }
@@ -332,36 +498,11 @@ private[spark] object AppStatusStore {
 
   /**
    * Create an in-memory store for a live application.
-   *
-   * @param conf Configuration.
-   * @param addListenerFn Function to register a listener with a bus.
    */
-  def createLiveStore(conf: SparkConf, addListenerFn: SparkListener => Unit): AppStatusStore = {
-    val store = new InMemoryStore()
-    addListenerFn(new AppStatusListener(store, conf, true))
-    AppStatusPlugin.loadPlugins().foreach { p =>
-      p.setupListeners(conf, store, addListenerFn, true)
-    }
-    new AppStatusStore(store)
+  def createLiveStore(conf: SparkConf): AppStatusStore = {
+    val store = new ElementTrackingStore(new InMemoryStore(), conf)
+    val listener = new AppStatusListener(store, conf, true)
+    new AppStatusStore(store, listener = Some(listener))
   }
 
 }
-
-/**
- * Helper for getting distributions from nested metric types.
- */
-private abstract class MetricHelper[I, O](
-    rawMetrics: Seq[v1.TaskMetrics],
-    quantiles: Array[Double]) {
-
-  def getSubmetrics(raw: v1.TaskMetrics): I
-
-  def build: O
-
-  val data: Seq[I] = rawMetrics.map(getSubmetrics)
-
-  /** applies the given function to all input metrics, and returns the quantiles */
-  def submetricQuantiles(f: I => Double): IndexedSeq[Double] = {
-    Distribution(data.map { d => f(d) }).get.getQuantiles(quantiles)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusUtils.scala b/core/src/main/scala/org/apache/spark/status/AppStatusUtils.scala
new file mode 100644
index 0000000000000..341bd4e0cd016
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusUtils.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status
+
+import org.apache.spark.status.api.v1.{TaskData, TaskMetrics}
+
+private[spark] object AppStatusUtils {
+
+  def schedulerDelay(task: TaskData): Long = {
+    if (task.taskMetrics.isDefined && task.duration.isDefined) {
+      val m = task.taskMetrics.get
+      schedulerDelay(task.launchTime.getTime(), fetchStart(task), task.duration.get,
+        m.executorDeserializeTime, m.resultSerializationTime, m.executorRunTime)
+    } else {
+      0L
+    }
+  }
+
+  def gettingResultTime(task: TaskData): Long = {
+    gettingResultTime(task.launchTime.getTime(), fetchStart(task), task.duration.getOrElse(-1L))
+  }
+
+  def schedulerDelay(
+      launchTime: Long,
+      fetchStart: Long,
+      duration: Long,
+      deserializeTime: Long,
+      serializeTime: Long,
+      runTime: Long): Long = {
+    math.max(0, duration - runTime - deserializeTime - serializeTime -
+      gettingResultTime(launchTime, fetchStart, duration))
+  }
+
+  def gettingResultTime(launchTime: Long, fetchStart: Long, duration: Long): Long = {
+    if (fetchStart > 0) {
+      if (duration > 0) {
+        launchTime + duration - fetchStart
+      } else {
+        System.currentTimeMillis() - fetchStart
+      }
+    } else {
+      0L
+    }
+  }
+
+  private def fetchStart(task: TaskData): Long = {
+    if (task.resultFetchStart.isDefined) {
+      task.resultFetchStart.get.getTime()
+    } else {
+      -1
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/ElementTrackingStore.scala b/core/src/main/scala/org/apache/spark/status/ElementTrackingStore.scala
new file mode 100644
index 0000000000000..863b0967f765e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/ElementTrackingStore.scala
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status
+
+import java.util.concurrent.TimeUnit
+
+import scala.collection.mutable.{HashMap, ListBuffer}
+
+import com.google.common.util.concurrent.MoreExecutors
+
+import org.apache.spark.SparkConf
+import org.apache.spark.util.{ThreadUtils, Utils}
+import org.apache.spark.util.kvstore._
+
+/**
+ * A KVStore wrapper that allows tracking the number of elements of specific types, and triggering
+ * actions once they reach a threshold. This allows writers, for example, to control how much data
+ * is stored by potentially deleting old data as new data is added.
+ *
+ * This store is used when populating data either from a live UI or an event log. On top of firing
+ * triggers when elements reach a certain threshold, it provides two extra bits of functionality:
+ *
+ * - a generic worker thread that can be used to run expensive tasks asynchronously; the tasks can
+ *   be configured to run on the calling thread when more determinism is desired (e.g. unit tests).
+ * - a generic flush mechanism so that listeners can be notified about when they should flush
+ *   internal state to the store (e.g. after the SHS finishes parsing an event log).
+ *
+ * The configured triggers are run on a separate thread by default; they can be forced to run on
+ * the calling thread by setting the `ASYNC_TRACKING_ENABLED` configuration to `false`.
+ */
+private[spark] class ElementTrackingStore(store: KVStore, conf: SparkConf) extends KVStore {
+
+  import config._
+
+  private val triggers = new HashMap[Class[_], Seq[Trigger[_]]]()
+  private val flushTriggers = new ListBuffer[() => Unit]()
+  private val executor = if (conf.get(ASYNC_TRACKING_ENABLED)) {
+    ThreadUtils.newDaemonSingleThreadExecutor("element-tracking-store-worker")
+  } else {
+    MoreExecutors.sameThreadExecutor()
+  }
+
+  @volatile private var stopped = false
+
+  /**
+   * Register a trigger that will be fired once the number of elements of a given type reaches
+   * the given threshold.
+   *
+   * @param klass The type to monitor.
+   * @param threshold The number of elements that should trigger the action.
+   * @param action Action to run when the threshold is reached; takes as a parameter the number
+   *               of elements of the registered type currently known to be in the store.
+   */
+  def addTrigger(klass: Class[_], threshold: Long)(action: Long => Unit): Unit = {
+    val existing = triggers.getOrElse(klass, Seq())
+    triggers(klass) = existing :+ Trigger(threshold, action)
+  }
+
+  /**
+   * Adds a trigger to be executed before the store is flushed. This normally happens before
+   * closing, and is useful for flushing intermediate state to the store, e.g. when replaying
+   * in-progress applications through the SHS.
+   *
+   * Flush triggers are called synchronously in the same thread that is closing the store.
+   */
+  def onFlush(action: => Unit): Unit = {
+    flushTriggers += { () => action }
+  }
+
+  /**
+   * Enqueues an action to be executed asynchronously. The task will run on the calling thread if
+   * `ASYNC_TRACKING_ENABLED` is `false`.
+   */
+  def doAsync(fn: => Unit): Unit = {
+    executor.submit(new Runnable() {
+      override def run(): Unit = Utils.tryLog { fn }
+    })
+  }
+
+  override def read[T](klass: Class[T], naturalKey: Any): T = store.read(klass, naturalKey)
+
+  override def write(value: Any): Unit = store.write(value)
+
+  /** Write an element to the store, optionally checking for whether to fire triggers. */
+  def write(value: Any, checkTriggers: Boolean): Unit = {
+    write(value)
+
+    if (checkTriggers && !stopped) {
+      triggers.get(value.getClass()).foreach { list =>
+        doAsync {
+          val count = store.count(value.getClass())
+          list.foreach { t =>
+            if (count > t.threshold) {
+              t.action(count)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  override def delete(klass: Class[_], naturalKey: Any): Unit = store.delete(klass, naturalKey)
+
+  override def getMetadata[T](klass: Class[T]): T = store.getMetadata(klass)
+
+  override def setMetadata(value: Any): Unit = store.setMetadata(value)
+
+  override def view[T](klass: Class[T]): KVStoreView[T] = store.view(klass)
+
+  override def count(klass: Class[_]): Long = store.count(klass)
+
+  override def count(klass: Class[_], index: String, indexedValue: Any): Long = {
+    store.count(klass, index, indexedValue)
+  }
+
+  override def close(): Unit = {
+    close(true)
+  }
+
+  /** A close() method that optionally leaves the parent store open. */
+  def close(closeParent: Boolean): Unit = synchronized {
+    if (stopped) {
+      return
+    }
+
+    stopped = true
+    executor.shutdown()
+    if (!executor.awaitTermination(5, TimeUnit.SECONDS)) {
+      executor.shutdownNow()
+    }
+
+    flushTriggers.foreach { trigger =>
+      Utils.tryLog(trigger())
+    }
+
+    if (closeParent) {
+      store.close()
+    }
+  }
+
+  private case class Trigger[T](
+      threshold: Long,
+      action: Long => Unit)
+
+}
diff --git a/core/src/main/scala/org/apache/spark/status/KVUtils.scala b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
index 4638511944c61..99b1843d8e1c0 100644
--- a/core/src/main/scala/org/apache/spark/status/KVUtils.scala
+++ b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
@@ -20,6 +20,7 @@ package org.apache.spark.status
 import java.io.File
 
 import scala.annotation.meta.getter
+import scala.collection.JavaConverters._
 import scala.language.implicitConversions
 import scala.reflect.{classTag, ClassTag}
 
@@ -68,6 +69,19 @@ private[spark] object KVUtils extends Logging {
     db
   }
 
+  /** Turns a KVStoreView into a Scala sequence, applying a filter. */
+  def viewToSeq[T](
+      view: KVStoreView[T],
+      max: Int)
+      (filter: T => Boolean): Seq[T] = {
+    val iter = view.closeableIterator()
+    try {
+      iter.asScala.filter(filter).take(max).toList
+    } finally {
+      iter.close()
+    }
+  }
+
   private[spark] class MetadataMismatchException extends Exception
 
 }
diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
index ef2936c9b69a4..4295e664e131c 100644
--- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
+++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
@@ -18,9 +18,12 @@
 package org.apache.spark.status
 
 import java.util.Date
+import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.HashMap
 
+import com.google.common.collect.Interners
+
 import org.apache.spark.JobExecutionStatus
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler.{AccumulableInfo, StageInfo, TaskInfo}
@@ -38,10 +41,12 @@ import org.apache.spark.util.kvstore.KVStore
  */
 private[spark] abstract class LiveEntity {
 
-  var lastWriteTime = 0L
+  var lastWriteTime = -1L
 
-  def write(store: KVStore, now: Long): Unit = {
-    store.write(doUpdate())
+  def write(store: ElementTrackingStore, now: Long, checkTriggers: Boolean = false): Unit = {
+    // Always check triggers on the first write, since adding an element to the store may
+    // cause the maximum count for the element type to be exceeded.
+    store.write(doUpdate(), checkTriggers || lastWriteTime == -1L)
     lastWriteTime = now
   }
 
@@ -116,7 +121,9 @@ private class LiveTask(
 
   import LiveEntityHelpers._
 
-  private var recordedMetrics: v1.TaskMetrics = null
+  // The task metrics use a special value when no metrics have been reported. The special value is
+  // checked when calculating indexed values when writing to the store (see [[TaskDataWrapper]]).
+  private var metrics: v1.TaskMetrics = createMetrics(default = -1L)
 
   var errorMessage: Option[String] = None
 
@@ -126,8 +133,8 @@ private class LiveTask(
    */
   def updateMetrics(metrics: TaskMetrics): v1.TaskMetrics = {
     if (metrics != null) {
-      val old = recordedMetrics
-      recordedMetrics = new v1.TaskMetrics(
+      val old = this.metrics
+      val newMetrics = createMetrics(
         metrics.executorDeserializeTime,
         metrics.executorDeserializeCpuTime,
         metrics.executorRunTime,
@@ -138,73 +145,35 @@ private class LiveTask(
         metrics.memoryBytesSpilled,
         metrics.diskBytesSpilled,
         metrics.peakExecutionMemory,
-        new v1.InputMetrics(
-          metrics.inputMetrics.bytesRead,
-          metrics.inputMetrics.recordsRead),
-        new v1.OutputMetrics(
-          metrics.outputMetrics.bytesWritten,
-          metrics.outputMetrics.recordsWritten),
-        new v1.ShuffleReadMetrics(
-          metrics.shuffleReadMetrics.remoteBlocksFetched,
-          metrics.shuffleReadMetrics.localBlocksFetched,
-          metrics.shuffleReadMetrics.fetchWaitTime,
-          metrics.shuffleReadMetrics.remoteBytesRead,
-          metrics.shuffleReadMetrics.remoteBytesReadToDisk,
-          metrics.shuffleReadMetrics.localBytesRead,
-          metrics.shuffleReadMetrics.recordsRead),
-        new v1.ShuffleWriteMetrics(
-          metrics.shuffleWriteMetrics.bytesWritten,
-          metrics.shuffleWriteMetrics.writeTime,
-          metrics.shuffleWriteMetrics.recordsWritten))
-      if (old != null) calculateMetricsDelta(recordedMetrics, old) else recordedMetrics
+        metrics.inputMetrics.bytesRead,
+        metrics.inputMetrics.recordsRead,
+        metrics.outputMetrics.bytesWritten,
+        metrics.outputMetrics.recordsWritten,
+        metrics.shuffleReadMetrics.remoteBlocksFetched,
+        metrics.shuffleReadMetrics.localBlocksFetched,
+        metrics.shuffleReadMetrics.fetchWaitTime,
+        metrics.shuffleReadMetrics.remoteBytesRead,
+        metrics.shuffleReadMetrics.remoteBytesReadToDisk,
+        metrics.shuffleReadMetrics.localBytesRead,
+        metrics.shuffleReadMetrics.recordsRead,
+        metrics.shuffleWriteMetrics.bytesWritten,
+        metrics.shuffleWriteMetrics.writeTime,
+        metrics.shuffleWriteMetrics.recordsWritten)
+
+      this.metrics = newMetrics
+
+      // Only calculate the delta if the old metrics contain valid information, otherwise
+      // the new metrics are the delta.
+      if (old.executorDeserializeTime >= 0L) {
+        subtractMetrics(newMetrics, old)
+      } else {
+        newMetrics
+      }
     } else {
       null
     }
   }
 
-  /**
-   * Return a new TaskMetrics object containing the delta of the various fields of the given
-   * metrics objects. This is currently targeted at updating stage data, so it does not
-   * necessarily calculate deltas for all the fields.
-   */
-  private def calculateMetricsDelta(
-      metrics: v1.TaskMetrics,
-      old: v1.TaskMetrics): v1.TaskMetrics = {
-    val shuffleWriteDelta = new v1.ShuffleWriteMetrics(
-      metrics.shuffleWriteMetrics.bytesWritten - old.shuffleWriteMetrics.bytesWritten,
-      0L,
-      metrics.shuffleWriteMetrics.recordsWritten - old.shuffleWriteMetrics.recordsWritten)
-
-    val shuffleReadDelta = new v1.ShuffleReadMetrics(
-      0L, 0L, 0L,
-      metrics.shuffleReadMetrics.remoteBytesRead - old.shuffleReadMetrics.remoteBytesRead,
-      metrics.shuffleReadMetrics.remoteBytesReadToDisk -
-        old.shuffleReadMetrics.remoteBytesReadToDisk,
-      metrics.shuffleReadMetrics.localBytesRead - old.shuffleReadMetrics.localBytesRead,
-      metrics.shuffleReadMetrics.recordsRead - old.shuffleReadMetrics.recordsRead)
-
-    val inputDelta = new v1.InputMetrics(
-      metrics.inputMetrics.bytesRead - old.inputMetrics.bytesRead,
-      metrics.inputMetrics.recordsRead - old.inputMetrics.recordsRead)
-
-    val outputDelta = new v1.OutputMetrics(
-      metrics.outputMetrics.bytesWritten - old.outputMetrics.bytesWritten,
-      metrics.outputMetrics.recordsWritten - old.outputMetrics.recordsWritten)
-
-    new v1.TaskMetrics(
-      0L, 0L,
-      metrics.executorRunTime - old.executorRunTime,
-      metrics.executorCpuTime - old.executorCpuTime,
-      0L, 0L, 0L,
-      metrics.memoryBytesSpilled - old.memoryBytesSpilled,
-      metrics.diskBytesSpilled - old.diskBytesSpilled,
-      0L,
-      inputDelta,
-      outputDelta,
-      shuffleReadDelta,
-      shuffleWriteDelta)
-  }
-
   override protected def doUpdate(): Any = {
     val duration = if (info.finished) {
       info.duration
@@ -212,22 +181,48 @@ private class LiveTask(
       info.timeRunning(lastUpdateTime.getOrElse(System.currentTimeMillis()))
     }
 
-    val task = new v1.TaskData(
+    new TaskDataWrapper(
       info.taskId,
       info.index,
       info.attemptNumber,
-      new Date(info.launchTime),
-      if (info.gettingResult) Some(new Date(info.gettingResultTime)) else None,
-      Some(duration),
-      info.executorId,
-      info.host,
-      info.status,
-      info.taskLocality.toString(),
+      info.launchTime,
+      if (info.gettingResult) info.gettingResultTime else -1L,
+      duration,
+      weakIntern(info.executorId),
+      weakIntern(info.host),
+      weakIntern(info.status),
+      weakIntern(info.taskLocality.toString()),
       info.speculative,
       newAccumulatorInfos(info.accumulables),
       errorMessage,
-      Option(recordedMetrics))
-    new TaskDataWrapper(task, stageId, stageAttemptId)
+
+      metrics.executorDeserializeTime,
+      metrics.executorDeserializeCpuTime,
+      metrics.executorRunTime,
+      metrics.executorCpuTime,
+      metrics.resultSize,
+      metrics.jvmGcTime,
+      metrics.resultSerializationTime,
+      metrics.memoryBytesSpilled,
+      metrics.diskBytesSpilled,
+      metrics.peakExecutionMemory,
+      metrics.inputMetrics.bytesRead,
+      metrics.inputMetrics.recordsRead,
+      metrics.outputMetrics.bytesWritten,
+      metrics.outputMetrics.recordsWritten,
+      metrics.shuffleReadMetrics.remoteBlocksFetched,
+      metrics.shuffleReadMetrics.localBlocksFetched,
+      metrics.shuffleReadMetrics.fetchWaitTime,
+      metrics.shuffleReadMetrics.remoteBytesRead,
+      metrics.shuffleReadMetrics.remoteBytesReadToDisk,
+      metrics.shuffleReadMetrics.localBytesRead,
+      metrics.shuffleReadMetrics.recordsRead,
+      metrics.shuffleWriteMetrics.bytesWritten,
+      metrics.shuffleWriteMetrics.writeTime,
+      metrics.shuffleWriteMetrics.recordsWritten,
+
+      stageId,
+      stageAttemptId)
   }
 
 }
@@ -310,50 +305,19 @@ private class LiveExecutor(val executorId: String, _addTime: Long) extends LiveE
 
 }
 
-/** Metrics tracked per stage (both total and per executor). */
-private class MetricsTracker {
-  var executorRunTime = 0L
-  var executorCpuTime = 0L
-  var inputBytes = 0L
-  var inputRecords = 0L
-  var outputBytes = 0L
-  var outputRecords = 0L
-  var shuffleReadBytes = 0L
-  var shuffleReadRecords = 0L
-  var shuffleWriteBytes = 0L
-  var shuffleWriteRecords = 0L
-  var memoryBytesSpilled = 0L
-  var diskBytesSpilled = 0L
-
-  def update(delta: v1.TaskMetrics): Unit = {
-    executorRunTime += delta.executorRunTime
-    executorCpuTime += delta.executorCpuTime
-    inputBytes += delta.inputMetrics.bytesRead
-    inputRecords += delta.inputMetrics.recordsRead
-    outputBytes += delta.outputMetrics.bytesWritten
-    outputRecords += delta.outputMetrics.recordsWritten
-    shuffleReadBytes += delta.shuffleReadMetrics.localBytesRead +
-      delta.shuffleReadMetrics.remoteBytesRead
-    shuffleReadRecords += delta.shuffleReadMetrics.recordsRead
-    shuffleWriteBytes += delta.shuffleWriteMetrics.bytesWritten
-    shuffleWriteRecords += delta.shuffleWriteMetrics.recordsWritten
-    memoryBytesSpilled += delta.memoryBytesSpilled
-    diskBytesSpilled += delta.diskBytesSpilled
-  }
-
-}
-
 private class LiveExecutorStageSummary(
     stageId: Int,
     attemptId: Int,
     executorId: String) extends LiveEntity {
 
+  import LiveEntityHelpers._
+
   var taskTime = 0L
   var succeededTasks = 0
   var failedTasks = 0
   var killedTasks = 0
 
-  val metrics = new MetricsTracker()
+  var metrics = createMetrics(default = 0L)
 
   override protected def doUpdate(): Any = {
     val info = new v1.ExecutorStageSummary(
@@ -361,14 +325,14 @@ private class LiveExecutorStageSummary(
       failedTasks,
       succeededTasks,
       killedTasks,
-      metrics.inputBytes,
-      metrics.inputRecords,
-      metrics.outputBytes,
-      metrics.outputRecords,
-      metrics.shuffleReadBytes,
-      metrics.shuffleReadRecords,
-      metrics.shuffleWriteBytes,
-      metrics.shuffleWriteRecords,
+      metrics.inputMetrics.bytesRead,
+      metrics.inputMetrics.recordsRead,
+      metrics.outputMetrics.bytesWritten,
+      metrics.outputMetrics.recordsWritten,
+      metrics.shuffleReadMetrics.remoteBytesRead + metrics.shuffleReadMetrics.localBytesRead,
+      metrics.shuffleReadMetrics.recordsRead,
+      metrics.shuffleWriteMetrics.bytesWritten,
+      metrics.shuffleWriteMetrics.recordsWritten,
       metrics.memoryBytesSpilled,
       metrics.diskBytesSpilled)
     new ExecutorStageSummaryWrapper(stageId, attemptId, executorId, info)
@@ -399,20 +363,26 @@ private class LiveStage extends LiveEntity {
 
   var firstLaunchTime = Long.MaxValue
 
-  val metrics = new MetricsTracker()
+  var localitySummary: Map[String, Long] = Map()
+
+  var metrics = createMetrics(default = 0L)
 
   val executorSummaries = new HashMap[String, LiveExecutorStageSummary]()
 
+  // Used for cleanup of tasks after they reach the configured limit. Not written to the store.
+  @volatile var cleaning = false
+  var savedTasks = new AtomicInteger(0)
+
   def executorSummary(executorId: String): LiveExecutorStageSummary = {
     executorSummaries.getOrElseUpdate(executorId,
-      new LiveExecutorStageSummary(info.stageId, info.attemptId, executorId))
+      new LiveExecutorStageSummary(info.stageId, info.attemptNumber, executorId))
   }
 
-  override protected def doUpdate(): Any = {
-    val update = new v1.StageData(
+  def toApi(): v1.StageData = {
+    new v1.StageData(
       status,
       info.stageId,
-      info.attemptId,
+      info.attemptNumber,
 
       info.numTasks,
       activeTasks,
@@ -428,14 +398,14 @@ private class LiveStage extends LiveEntity {
       info.completionTime.map(new Date(_)),
       info.failureReason,
 
-      metrics.inputBytes,
-      metrics.inputRecords,
-      metrics.outputBytes,
-      metrics.outputRecords,
-      metrics.shuffleReadBytes,
-      metrics.shuffleReadRecords,
-      metrics.shuffleWriteBytes,
-      metrics.shuffleWriteRecords,
+      metrics.inputMetrics.bytesRead,
+      metrics.inputMetrics.recordsRead,
+      metrics.outputMetrics.bytesWritten,
+      metrics.outputMetrics.recordsWritten,
+      metrics.shuffleReadMetrics.localBytesRead + metrics.shuffleReadMetrics.remoteBytesRead,
+      metrics.shuffleReadMetrics.recordsRead,
+      metrics.shuffleWriteMetrics.bytesWritten,
+      metrics.shuffleWriteMetrics.recordsWritten,
       metrics.memoryBytesSpilled,
       metrics.diskBytesSpilled,
 
@@ -449,14 +419,18 @@ private class LiveStage extends LiveEntity {
       None,
       None,
       killedSummary)
+  }
 
-    new StageDataWrapper(update, jobIds)
+  override protected def doUpdate(): Any = {
+    new StageDataWrapper(toApi(), jobIds, localitySummary)
   }
 
 }
 
 private class LiveRDDPartition(val blockName: String) {
 
+  import LiveEntityHelpers._
+
   // Pointers used by RDDPartitionSeq.
   @volatile var prev: LiveRDDPartition = null
   @volatile var next: LiveRDDPartition = null
@@ -476,7 +450,7 @@ private class LiveRDDPartition(val blockName: String) {
       diskUsed: Long): Unit = {
     value = new v1.RDDPartitionInfo(
       blockName,
-      storageLevel,
+      weakIntern(storageLevel),
       memoryUsed,
       diskUsed,
       executors)
@@ -486,6 +460,8 @@ private class LiveRDDPartition(val blockName: String) {
 
 private class LiveRDDDistribution(exec: LiveExecutor) {
 
+  import LiveEntityHelpers._
+
   val executorId = exec.executorId
   var memoryUsed = 0L
   var diskUsed = 0L
@@ -499,7 +475,7 @@ private class LiveRDDDistribution(exec: LiveExecutor) {
   def toApi(): v1.RDDDataDistribution = {
     if (lastUpdate == null) {
       lastUpdate = new v1.RDDDataDistribution(
-        exec.hostPort,
+        weakIntern(exec.hostPort),
         memoryUsed,
         exec.maxMemory - exec.memoryUsed,
         diskUsed,
@@ -515,7 +491,9 @@ private class LiveRDDDistribution(exec: LiveExecutor) {
 
 private class LiveRDD(val info: RDDInfo) extends LiveEntity {
 
-  var storageLevel: String = info.storageLevel.description
+  import LiveEntityHelpers._
+
+  var storageLevel: String = weakIntern(info.storageLevel.description)
   var memoryUsed = 0L
   var diskUsed = 0L
 
@@ -524,6 +502,10 @@ private class LiveRDD(val info: RDDInfo) extends LiveEntity {
 
   private val distributions = new HashMap[String, LiveRDDDistribution]()
 
+  def setStorageLevel(level: String): Unit = {
+    this.storageLevel = weakIntern(level)
+  }
+
   def partition(blockName: String): LiveRDDPartition = {
     partitions.getOrElseUpdate(blockName, {
       val part = new LiveRDDPartition(blockName)
@@ -584,6 +566,9 @@ private class SchedulerPool(name: String) extends LiveEntity {
 
 private object LiveEntityHelpers {
 
+  private val stringInterner = Interners.newWeakInterner[String]()
+
+
   def newAccumulatorInfos(accums: Iterable[AccumulableInfo]): Seq[v1.AccumulableInfo] = {
     accums
       .filter { acc =>
@@ -595,13 +580,119 @@ private object LiveEntityHelpers {
       .map { acc =>
         new v1.AccumulableInfo(
           acc.id,
-          acc.name.orNull,
+          acc.name.map(weakIntern).orNull,
           acc.update.map(_.toString()),
           acc.value.map(_.toString()).orNull)
       }
       .toSeq
   }
 
+  /** String interning to reduce the memory usage. */
+  def weakIntern(s: String): String = {
+    stringInterner.intern(s)
+  }
+
+  // scalastyle:off argcount
+  def createMetrics(
+      executorDeserializeTime: Long,
+      executorDeserializeCpuTime: Long,
+      executorRunTime: Long,
+      executorCpuTime: Long,
+      resultSize: Long,
+      jvmGcTime: Long,
+      resultSerializationTime: Long,
+      memoryBytesSpilled: Long,
+      diskBytesSpilled: Long,
+      peakExecutionMemory: Long,
+      inputBytesRead: Long,
+      inputRecordsRead: Long,
+      outputBytesWritten: Long,
+      outputRecordsWritten: Long,
+      shuffleRemoteBlocksFetched: Long,
+      shuffleLocalBlocksFetched: Long,
+      shuffleFetchWaitTime: Long,
+      shuffleRemoteBytesRead: Long,
+      shuffleRemoteBytesReadToDisk: Long,
+      shuffleLocalBytesRead: Long,
+      shuffleRecordsRead: Long,
+      shuffleBytesWritten: Long,
+      shuffleWriteTime: Long,
+      shuffleRecordsWritten: Long): v1.TaskMetrics = {
+    new v1.TaskMetrics(
+      executorDeserializeTime,
+      executorDeserializeCpuTime,
+      executorRunTime,
+      executorCpuTime,
+      resultSize,
+      jvmGcTime,
+      resultSerializationTime,
+      memoryBytesSpilled,
+      diskBytesSpilled,
+      peakExecutionMemory,
+      new v1.InputMetrics(
+        inputBytesRead,
+        inputRecordsRead),
+      new v1.OutputMetrics(
+        outputBytesWritten,
+        outputRecordsWritten),
+      new v1.ShuffleReadMetrics(
+        shuffleRemoteBlocksFetched,
+        shuffleLocalBlocksFetched,
+        shuffleFetchWaitTime,
+        shuffleRemoteBytesRead,
+        shuffleRemoteBytesReadToDisk,
+        shuffleLocalBytesRead,
+        shuffleRecordsRead),
+      new v1.ShuffleWriteMetrics(
+        shuffleBytesWritten,
+        shuffleWriteTime,
+        shuffleRecordsWritten))
+  }
+  // scalastyle:on argcount
+
+  def createMetrics(default: Long): v1.TaskMetrics = {
+    createMetrics(default, default, default, default, default, default, default, default,
+      default, default, default, default, default, default, default, default,
+      default, default, default, default, default, default, default, default)
+  }
+
+  /** Add m2 values to m1. */
+  def addMetrics(m1: v1.TaskMetrics, m2: v1.TaskMetrics): v1.TaskMetrics = addMetrics(m1, m2, 1)
+
+  /** Subtract m2 values from m1. */
+  def subtractMetrics(m1: v1.TaskMetrics, m2: v1.TaskMetrics): v1.TaskMetrics = {
+    addMetrics(m1, m2, -1)
+  }
+
+  private def addMetrics(m1: v1.TaskMetrics, m2: v1.TaskMetrics, mult: Int): v1.TaskMetrics = {
+    createMetrics(
+      m1.executorDeserializeTime + m2.executorDeserializeTime * mult,
+      m1.executorDeserializeCpuTime + m2.executorDeserializeCpuTime * mult,
+      m1.executorRunTime + m2.executorRunTime * mult,
+      m1.executorCpuTime + m2.executorCpuTime * mult,
+      m1.resultSize + m2.resultSize * mult,
+      m1.jvmGcTime + m2.jvmGcTime * mult,
+      m1.resultSerializationTime + m2.resultSerializationTime * mult,
+      m1.memoryBytesSpilled + m2.memoryBytesSpilled * mult,
+      m1.diskBytesSpilled + m2.diskBytesSpilled * mult,
+      m1.peakExecutionMemory + m2.peakExecutionMemory * mult,
+      m1.inputMetrics.bytesRead + m2.inputMetrics.bytesRead * mult,
+      m1.inputMetrics.recordsRead + m2.inputMetrics.recordsRead * mult,
+      m1.outputMetrics.bytesWritten + m2.outputMetrics.bytesWritten * mult,
+      m1.outputMetrics.recordsWritten + m2.outputMetrics.recordsWritten * mult,
+      m1.shuffleReadMetrics.remoteBlocksFetched + m2.shuffleReadMetrics.remoteBlocksFetched * mult,
+      m1.shuffleReadMetrics.localBlocksFetched + m2.shuffleReadMetrics.localBlocksFetched * mult,
+      m1.shuffleReadMetrics.fetchWaitTime + m2.shuffleReadMetrics.fetchWaitTime * mult,
+      m1.shuffleReadMetrics.remoteBytesRead + m2.shuffleReadMetrics.remoteBytesRead * mult,
+      m1.shuffleReadMetrics.remoteBytesReadToDisk +
+        m2.shuffleReadMetrics.remoteBytesReadToDisk * mult,
+      m1.shuffleReadMetrics.localBytesRead + m2.shuffleReadMetrics.localBytesRead * mult,
+      m1.shuffleReadMetrics.recordsRead + m2.shuffleReadMetrics.recordsRead * mult,
+      m1.shuffleWriteMetrics.bytesWritten + m2.shuffleWriteMetrics.bytesWritten * mult,
+      m1.shuffleWriteMetrics.writeTime + m2.shuffleWriteMetrics.writeTime * mult,
+      m1.shuffleWriteMetrics.recordsWritten + m2.shuffleWriteMetrics.recordsWritten * mult)
+  }
+
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
index 91660a524ca93..69054f2b771f1 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
@@ -20,8 +20,6 @@ import java.util.{Date, List => JList}
 import javax.ws.rs.{DefaultValue, GET, Produces, QueryParam}
 import javax.ws.rs.core.MediaType
 
-import org.apache.spark.deploy.history.ApplicationHistoryInfo
-
 @Produces(Array(MediaType.APPLICATION_JSON))
 private[v1] class ApplicationListResource extends ApiRequestContext {
 
@@ -67,33 +65,3 @@ private[v1] class ApplicationListResource extends ApiRequestContext {
     startTimeOk && endTimeOk
   }
 }
-
-private[spark] object ApplicationsListResource {
-  def appHistoryInfoToPublicAppInfo(app: ApplicationHistoryInfo): ApplicationInfo = {
-    new ApplicationInfo(
-      id = app.id,
-      name = app.name,
-      coresGranted = None,
-      maxCores = None,
-      coresPerExecutor = None,
-      memoryPerExecutorMB = None,
-      attempts = app.attempts.map { internalAttemptInfo =>
-        new ApplicationAttemptInfo(
-          attemptId = internalAttemptInfo.attemptId,
-          startTime = new Date(internalAttemptInfo.startTime),
-          endTime = new Date(internalAttemptInfo.endTime),
-          duration =
-            if (internalAttemptInfo.endTime > 0) {
-              internalAttemptInfo.endTime - internalAttemptInfo.startTime
-            } else {
-              0
-            },
-          lastUpdated = new Date(internalAttemptInfo.lastUpdated),
-          sparkUser = internalAttemptInfo.sparkUser,
-          completed = internalAttemptInfo.completed,
-          appSparkVersion = internalAttemptInfo.appSparkVersion
-        )
-      }
-    )
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
index bd4dfe3c68885..96249e4bfd5fa 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
@@ -25,7 +25,6 @@ import org.apache.spark.scheduler.StageInfo
 import org.apache.spark.status.api.v1.StageStatus._
 import org.apache.spark.status.api.v1.TaskSorting._
 import org.apache.spark.ui.SparkUI
-import org.apache.spark.ui.jobs.UIData.StageUIData
 
 @Produces(Array(MediaType.APPLICATION_JSON))
 private[v1] class StagesResource extends BaseAppResource {
@@ -60,7 +59,15 @@ private[v1] class StagesResource extends BaseAppResource {
       ui.store.stageAttempt(stageId, stageAttemptId, details = details)
     } catch {
       case _: NoSuchElementException =>
-        throw new NotFoundException(s"unknown attempt $stageAttemptId for stage $stageId.")
+        // Change the message depending on whether there are any attempts for the requested stage.
+        val all = ui.store.stageData(stageId)
+        val msg = if (all.nonEmpty) {
+          val ids = all.map(_.attemptId)
+          s"unknown attempt for stage $stageId.  Found attempts: [${ids.mkString(",")}]"
+        } else {
+          s"unknown stage: $stageId"
+        }
+        throw new NotFoundException(msg)
     }
   }
 
@@ -80,7 +87,8 @@ private[v1] class StagesResource extends BaseAppResource {
       }
     }
 
-    ui.store.taskSummary(stageId, stageAttemptId, quantiles)
+    ui.store.taskSummary(stageId, stageAttemptId, quantiles).getOrElse(
+      throw new NotFoundException(s"No tasks reported metrics for $stageId / $stageAttemptId yet."))
   }
 
   @GET
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
index 14280099f6422..7d8e4de3c8efb 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -24,27 +24,27 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize
 
 import org.apache.spark.JobExecutionStatus
 
-class ApplicationInfo private[spark](
-    val id: String,
-    val name: String,
-    val coresGranted: Option[Int],
-    val maxCores: Option[Int],
-    val coresPerExecutor: Option[Int],
-    val memoryPerExecutorMB: Option[Int],
-    val attempts: Seq[ApplicationAttemptInfo])
+case class ApplicationInfo private[spark](
+    id: String,
+    name: String,
+    coresGranted: Option[Int],
+    maxCores: Option[Int],
+    coresPerExecutor: Option[Int],
+    memoryPerExecutorMB: Option[Int],
+    attempts: Seq[ApplicationAttemptInfo])
 
 @JsonIgnoreProperties(
   value = Array("startTimeEpoch", "endTimeEpoch", "lastUpdatedEpoch"),
   allowGetters = true)
-class ApplicationAttemptInfo private[spark](
-    val attemptId: Option[String],
-    val startTime: Date,
-    val endTime: Date,
-    val lastUpdated: Date,
-    val duration: Long,
-    val sparkUser: String,
-    val completed: Boolean = false,
-    val appSparkVersion: String) {
+case class ApplicationAttemptInfo private[spark](
+    attemptId: Option[String],
+    startTime: Date,
+    endTime: Date,
+    lastUpdated: Date,
+    duration: Long,
+    sparkUser: String,
+    completed: Boolean = false,
+    appSparkVersion: String) {
 
   def getStartTimeEpoch: Long = startTime.getTime
 
@@ -261,6 +261,9 @@ class TaskMetricDistributions private[spark](
     val resultSize: IndexedSeq[Double],
     val jvmGcTime: IndexedSeq[Double],
     val resultSerializationTime: IndexedSeq[Double],
+    val gettingResultTime: IndexedSeq[Double],
+    val schedulerDelay: IndexedSeq[Double],
+    val peakExecutionMemory: IndexedSeq[Double],
     val memoryBytesSpilled: IndexedSeq[Double],
     val diskBytesSpilled: IndexedSeq[Double],
 
diff --git a/core/src/main/scala/org/apache/spark/status/config.scala b/core/src/main/scala/org/apache/spark/status/config.scala
index 7af9dff977a86..67801b8f046f4 100644
--- a/core/src/main/scala/org/apache/spark/status/config.scala
+++ b/core/src/main/scala/org/apache/spark/status/config.scala
@@ -23,10 +23,30 @@ import org.apache.spark.internal.config._
 
 private[spark] object config {
 
+  val ASYNC_TRACKING_ENABLED = ConfigBuilder("spark.appStateStore.asyncTracking.enable")
+    .booleanConf
+    .createWithDefault(true)
+
   val LIVE_ENTITY_UPDATE_PERIOD = ConfigBuilder("spark.ui.liveUpdate.period")
     .timeConf(TimeUnit.NANOSECONDS)
     .createWithDefaultString("100ms")
 
+  val MAX_RETAINED_JOBS = ConfigBuilder("spark.ui.retainedJobs")
+    .intConf
+    .createWithDefault(1000)
+
+  val MAX_RETAINED_STAGES = ConfigBuilder("spark.ui.retainedStages")
+    .intConf
+    .createWithDefault(1000)
+
+  val MAX_RETAINED_TASKS_PER_STAGE = ConfigBuilder("spark.ui.retainedTasks")
+    .intConf
+    .createWithDefault(100000)
+
+  val MAX_RETAINED_DEAD_EXECUTORS = ConfigBuilder("spark.ui.retainedDeadExecutors")
+    .intConf
+    .createWithDefault(100)
+
   val MAX_RETAINED_ROOT_NODES = ConfigBuilder("spark.ui.dagGraph.retainedRootRDDs")
     .intConf
     .createWithDefault(Int.MaxValue)
diff --git a/core/src/main/scala/org/apache/spark/status/storeTypes.scala b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
index c1ea87542d6cc..c9cb996a55fcc 100644
--- a/core/src/main/scala/org/apache/spark/status/storeTypes.scala
+++ b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.status
 
-import java.lang.{Integer => JInteger, Long => JLong}
+import java.lang.{Long => JLong}
+import java.util.Date
 
 import com.fasterxml.jackson.annotation.JsonIgnore
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize
 
 import org.apache.spark.status.KVUtils._
 import org.apache.spark.status.api.v1._
@@ -49,10 +51,10 @@ private[spark] class ApplicationEnvironmentInfoWrapper(val info: ApplicationEnvi
 private[spark] class ExecutorSummaryWrapper(val info: ExecutorSummary) {
 
   @JsonIgnore @KVIndex
-  private[this] val id: String = info.id
+  private def id: String = info.id
 
   @JsonIgnore @KVIndex("active")
-  private[this] val active: Boolean = info.isActive
+  private def active: Boolean = info.isActive
 
   @JsonIgnore @KVIndex("host")
   val host: String = info.hostPort.split(":")(0)
@@ -61,7 +63,7 @@ private[spark] class ExecutorSummaryWrapper(val info: ExecutorSummary) {
 
 /**
  * Keep track of the existing stages when the job was submitted, and those that were
- * completed during the job's execution. This allows a more accurate acounting of how
+ * completed during the job's execution. This allows a more accurate accounting of how
  * many tasks were skipped for the job.
  */
 private[spark] class JobDataWrapper(
@@ -69,49 +71,272 @@ private[spark] class JobDataWrapper(
     val skippedStages: Set[Int]) {
 
   @JsonIgnore @KVIndex
-  private[this] val id: Int = info.jobId
+  private def id: Int = info.jobId
 
 }
 
 private[spark] class StageDataWrapper(
     val info: StageData,
-    val jobIds: Set[Int]) {
+    val jobIds: Set[Int],
+    @JsonDeserialize(contentAs = classOf[JLong])
+    val locality: Map[String, Long]) {
 
   @JsonIgnore @KVIndex
-  def id: Array[Int] = Array(info.stageId, info.attemptId)
+  private[this] val id: Array[Int] = Array(info.stageId, info.attemptId)
 
   @JsonIgnore @KVIndex("stageId")
-  def stageId: Int = info.stageId
+  private def stageId: Int = info.stageId
+
+  @JsonIgnore @KVIndex("active")
+  private def active: Boolean = info.status == StageStatus.ACTIVE
 
 }
 
 /**
- * The task information is always indexed with the stage ID, since that is how the UI and API
- * consume it. That means every indexed value has the stage ID and attempt ID included, aside
- * from the actual data being indexed.
+ * Tasks have a lot of indices that are used in a few different places. This object keeps logical
+ * names for these indices, mapped to short strings to save space when using a disk store.
+ */
+private[spark] object TaskIndexNames {
+  final val ACCUMULATORS = "acc"
+  final val ATTEMPT = "att"
+  final val DESER_CPU_TIME = "dct"
+  final val DESER_TIME = "des"
+  final val DISK_SPILL = "dbs"
+  final val DURATION = "dur"
+  final val ERROR = "err"
+  final val EXECUTOR = "exe"
+  final val EXEC_CPU_TIME = "ect"
+  final val EXEC_RUN_TIME = "ert"
+  final val GC_TIME = "gc"
+  final val GETTING_RESULT_TIME = "grt"
+  final val INPUT_RECORDS = "ir"
+  final val INPUT_SIZE = "is"
+  final val LAUNCH_TIME = "lt"
+  final val LOCALITY = "loc"
+  final val MEM_SPILL = "mbs"
+  final val OUTPUT_RECORDS = "or"
+  final val OUTPUT_SIZE = "os"
+  final val PEAK_MEM = "pem"
+  final val RESULT_SIZE = "rs"
+  final val SCHEDULER_DELAY = "dly"
+  final val SER_TIME = "rst"
+  final val SHUFFLE_LOCAL_BLOCKS = "slbl"
+  final val SHUFFLE_READ_RECORDS = "srr"
+  final val SHUFFLE_READ_TIME = "srt"
+  final val SHUFFLE_REMOTE_BLOCKS = "srbl"
+  final val SHUFFLE_REMOTE_READS = "srby"
+  final val SHUFFLE_REMOTE_READS_TO_DISK = "srbd"
+  final val SHUFFLE_TOTAL_READS = "stby"
+  final val SHUFFLE_TOTAL_BLOCKS = "stbl"
+  final val SHUFFLE_WRITE_RECORDS = "swr"
+  final val SHUFFLE_WRITE_SIZE = "sws"
+  final val SHUFFLE_WRITE_TIME = "swt"
+  final val STAGE = "stage"
+  final val STATUS = "sta"
+  final val TASK_INDEX = "idx"
+}
+
+/**
+ * Unlike other data types, the task data wrapper does not keep a reference to the API's TaskData.
+ * That is to save memory, since for large applications there can be a large number of these
+ * elements (by default up to 100,000 per stage), and every bit of wasted memory adds up.
+ *
+ * It also contains many secondary indices, which are used to sort data efficiently in the UI at the
+ * expense of storage space (and slower write times).
  */
 private[spark] class TaskDataWrapper(
-    val info: TaskData,
+    // Storing this as an object actually saves memory; it's also used as the key in the in-memory
+    // store, so in that case you'd save the extra copy of the value here.
+    @KVIndexParam
+    val taskId: JLong,
+    @KVIndexParam(value = TaskIndexNames.TASK_INDEX, parent = TaskIndexNames.STAGE)
+    val index: Int,
+    @KVIndexParam(value = TaskIndexNames.ATTEMPT, parent = TaskIndexNames.STAGE)
+    val attempt: Int,
+    @KVIndexParam(value = TaskIndexNames.LAUNCH_TIME, parent = TaskIndexNames.STAGE)
+    val launchTime: Long,
+    val resultFetchStart: Long,
+    @KVIndexParam(value = TaskIndexNames.DURATION, parent = TaskIndexNames.STAGE)
+    val duration: Long,
+    @KVIndexParam(value = TaskIndexNames.EXECUTOR, parent = TaskIndexNames.STAGE)
+    val executorId: String,
+    val host: String,
+    @KVIndexParam(value = TaskIndexNames.STATUS, parent = TaskIndexNames.STAGE)
+    val status: String,
+    @KVIndexParam(value = TaskIndexNames.LOCALITY, parent = TaskIndexNames.STAGE)
+    val taskLocality: String,
+    val speculative: Boolean,
+    val accumulatorUpdates: Seq[AccumulableInfo],
+    val errorMessage: Option[String],
+
+    // The following is an exploded view of a TaskMetrics API object. This saves 5 objects
+    // (= 80 bytes of Java object overhead) per instance of this wrapper. If the first value
+    // (executorDeserializeTime) is -1L, it means the metrics for this task have not been
+    // recorded.
+    @KVIndexParam(value = TaskIndexNames.DESER_TIME, parent = TaskIndexNames.STAGE)
+    val executorDeserializeTime: Long,
+    @KVIndexParam(value = TaskIndexNames.DESER_CPU_TIME, parent = TaskIndexNames.STAGE)
+    val executorDeserializeCpuTime: Long,
+    @KVIndexParam(value = TaskIndexNames.EXEC_RUN_TIME, parent = TaskIndexNames.STAGE)
+    val executorRunTime: Long,
+    @KVIndexParam(value = TaskIndexNames.EXEC_CPU_TIME, parent = TaskIndexNames.STAGE)
+    val executorCpuTime: Long,
+    @KVIndexParam(value = TaskIndexNames.RESULT_SIZE, parent = TaskIndexNames.STAGE)
+    val resultSize: Long,
+    @KVIndexParam(value = TaskIndexNames.GC_TIME, parent = TaskIndexNames.STAGE)
+    val jvmGcTime: Long,
+    @KVIndexParam(value = TaskIndexNames.SER_TIME, parent = TaskIndexNames.STAGE)
+    val resultSerializationTime: Long,
+    @KVIndexParam(value = TaskIndexNames.MEM_SPILL, parent = TaskIndexNames.STAGE)
+    val memoryBytesSpilled: Long,
+    @KVIndexParam(value = TaskIndexNames.DISK_SPILL, parent = TaskIndexNames.STAGE)
+    val diskBytesSpilled: Long,
+    @KVIndexParam(value = TaskIndexNames.PEAK_MEM, parent = TaskIndexNames.STAGE)
+    val peakExecutionMemory: Long,
+    @KVIndexParam(value = TaskIndexNames.INPUT_SIZE, parent = TaskIndexNames.STAGE)
+    val inputBytesRead: Long,
+    @KVIndexParam(value = TaskIndexNames.INPUT_RECORDS, parent = TaskIndexNames.STAGE)
+    val inputRecordsRead: Long,
+    @KVIndexParam(value = TaskIndexNames.OUTPUT_SIZE, parent = TaskIndexNames.STAGE)
+    val outputBytesWritten: Long,
+    @KVIndexParam(value = TaskIndexNames.OUTPUT_RECORDS, parent = TaskIndexNames.STAGE)
+    val outputRecordsWritten: Long,
+    @KVIndexParam(value = TaskIndexNames.SHUFFLE_REMOTE_BLOCKS, parent = TaskIndexNames.STAGE)
+    val shuffleRemoteBlocksFetched: Long,
+    @KVIndexParam(value = TaskIndexNames.SHUFFLE_LOCAL_BLOCKS, parent = TaskIndexNames.STAGE)
+    val shuffleLocalBlocksFetched: Long,
+    @KVIndexParam(value = TaskIndexNames.SHUFFLE_READ_TIME, parent = TaskIndexNames.STAGE)
+    val shuffleFetchWaitTime: Long,
+    @KVIndexParam(value = TaskIndexNames.SHUFFLE_REMOTE_READS, parent = TaskIndexNames.STAGE)
+    val shuffleRemoteBytesRead: Long,
+    @KVIndexParam(value = TaskIndexNames.SHUFFLE_REMOTE_READS_TO_DISK,
+      parent = TaskIndexNames.STAGE)
+    val shuffleRemoteBytesReadToDisk: Long,
+    val shuffleLocalBytesRead: Long,
+    @KVIndexParam(value = TaskIndexNames.SHUFFLE_READ_RECORDS, parent = TaskIndexNames.STAGE)
+    val shuffleRecordsRead: Long,
+    @KVIndexParam(value = TaskIndexNames.SHUFFLE_WRITE_SIZE, parent = TaskIndexNames.STAGE)
+    val shuffleBytesWritten: Long,
+    @KVIndexParam(value = TaskIndexNames.SHUFFLE_WRITE_TIME, parent = TaskIndexNames.STAGE)
+    val shuffleWriteTime: Long,
+    @KVIndexParam(value = TaskIndexNames.SHUFFLE_WRITE_RECORDS, parent = TaskIndexNames.STAGE)
+    val shuffleRecordsWritten: Long,
+
     val stageId: Int,
     val stageAttemptId: Int) {
 
-  @JsonIgnore @KVIndex
-  def id: Long = info.taskId
+  def hasMetrics: Boolean = executorDeserializeTime >= 0
+
+  def toApi: TaskData = {
+    val metrics = if (hasMetrics) {
+      Some(new TaskMetrics(
+        executorDeserializeTime,
+        executorDeserializeCpuTime,
+        executorRunTime,
+        executorCpuTime,
+        resultSize,
+        jvmGcTime,
+        resultSerializationTime,
+        memoryBytesSpilled,
+        diskBytesSpilled,
+        peakExecutionMemory,
+        new InputMetrics(
+          inputBytesRead,
+          inputRecordsRead),
+        new OutputMetrics(
+          outputBytesWritten,
+          outputRecordsWritten),
+        new ShuffleReadMetrics(
+          shuffleRemoteBlocksFetched,
+          shuffleLocalBlocksFetched,
+          shuffleFetchWaitTime,
+          shuffleRemoteBytesRead,
+          shuffleRemoteBytesReadToDisk,
+          shuffleLocalBytesRead,
+          shuffleRecordsRead),
+        new ShuffleWriteMetrics(
+          shuffleBytesWritten,
+          shuffleWriteTime,
+          shuffleRecordsWritten)))
+    } else {
+      None
+    }
 
-  @JsonIgnore @KVIndex("stage")
-  def stage: Array[Int] = Array(stageId, stageAttemptId)
+    new TaskData(
+      taskId,
+      index,
+      attempt,
+      new Date(launchTime),
+      if (resultFetchStart > 0L) Some(new Date(resultFetchStart)) else None,
+      if (duration > 0L) Some(duration) else None,
+      executorId,
+      host,
+      status,
+      taskLocality,
+      speculative,
+      accumulatorUpdates,
+      errorMessage,
+      metrics)
+  }
+
+  @JsonIgnore @KVIndex(TaskIndexNames.STAGE)
+  private def stage: Array[Int] = Array(stageId, stageAttemptId)
+
+  @JsonIgnore @KVIndex(value = TaskIndexNames.SCHEDULER_DELAY, parent = TaskIndexNames.STAGE)
+  def schedulerDelay: Long = {
+    if (hasMetrics) {
+      AppStatusUtils.schedulerDelay(launchTime, resultFetchStart, duration, executorDeserializeTime,
+        resultSerializationTime, executorRunTime)
+    } else {
+      -1L
+    }
+  }
+
+  @JsonIgnore @KVIndex(value = TaskIndexNames.GETTING_RESULT_TIME, parent = TaskIndexNames.STAGE)
+  def gettingResultTime: Long = {
+    if (hasMetrics) {
+      AppStatusUtils.gettingResultTime(launchTime, resultFetchStart, duration)
+    } else {
+      -1L
+    }
+  }
+
+  /**
+   * Sorting by accumulators is a little weird, and the previous behavior would generate
+   * insanely long keys in the index. So this implementation just considers the first
+   * accumulator and its String representation.
+   */
+  @JsonIgnore @KVIndex(value = TaskIndexNames.ACCUMULATORS, parent = TaskIndexNames.STAGE)
+  private def accumulators: String = {
+    if (accumulatorUpdates.nonEmpty) {
+      val acc = accumulatorUpdates.head
+      s"${acc.name}:${acc.value}"
+    } else {
+      ""
+    }
+  }
 
-  @JsonIgnore @KVIndex("runtime")
-  def runtime: Array[AnyRef] = {
-    val _runtime = info.taskMetrics.map(_.executorRunTime).getOrElse(-1L)
-    Array(stageId: JInteger, stageAttemptId: JInteger, _runtime: JLong)
+  @JsonIgnore @KVIndex(value = TaskIndexNames.SHUFFLE_TOTAL_READS, parent = TaskIndexNames.STAGE)
+  private def shuffleTotalReads: Long = {
+    if (hasMetrics) {
+      shuffleLocalBytesRead + shuffleRemoteBytesRead
+    } else {
+      -1L
+    }
   }
 
-  @JsonIgnore @KVIndex("startTime")
-  def startTime: Array[AnyRef] = {
-    Array(stageId: JInteger, stageAttemptId: JInteger, info.launchTime.getTime(): JLong)
+  @JsonIgnore @KVIndex(value = TaskIndexNames.SHUFFLE_TOTAL_BLOCKS, parent = TaskIndexNames.STAGE)
+  private def shuffleTotalBlocks: Long = {
+    if (hasMetrics) {
+      shuffleLocalBlocksFetched + shuffleRemoteBlocksFetched
+    } else {
+      -1L
+    }
   }
 
+  @JsonIgnore @KVIndex(value = TaskIndexNames.ERROR, parent = TaskIndexNames.STAGE)
+  private def error: String = if (errorMessage.isDefined) errorMessage.get else ""
+
 }
 
 private[spark] class RDDStorageInfoWrapper(val info: RDDStorageInfo) {
@@ -131,10 +356,13 @@ private[spark] class ExecutorStageSummaryWrapper(
     val info: ExecutorStageSummary) {
 
   @JsonIgnore @KVIndex
-  val id: Array[Any] = Array(stageId, stageAttemptId, executorId)
+  private val _id: Array[Any] = Array(stageId, stageAttemptId, executorId)
 
   @JsonIgnore @KVIndex("stage")
-  private[this] val stage: Array[Int] = Array(stageId, stageAttemptId)
+  private def stage: Array[Int] = Array(stageId, stageAttemptId)
+
+  @JsonIgnore
+  def id: Array[Any] = _id
 
 }
 
@@ -187,3 +415,66 @@ private[spark] class RDDOperationGraphWrapper(
 private[spark] class PoolData(
     @KVIndexParam val name: String,
     val stageIds: Set[Int])
+
+/**
+ * A class with information about an app, to be used by the UI. There's only one instance of
+ * this summary per application, so its ID in the store is the class name.
+ */
+private[spark] class AppSummary(
+    val numCompletedJobs: Int,
+    val numCompletedStages: Int) {
+
+  @KVIndex
+  def id: String = classOf[AppSummary].getName()
+
+}
+
+/**
+ * A cached view of a specific quantile for one stage attempt's metrics.
+ */
+private[spark] class CachedQuantile(
+    val stageId: Int,
+    val stageAttemptId: Int,
+    val quantile: String,
+    val taskCount: Long,
+
+    // The following fields are an exploded view of a single entry for TaskMetricDistributions.
+    val executorDeserializeTime: Double,
+    val executorDeserializeCpuTime: Double,
+    val executorRunTime: Double,
+    val executorCpuTime: Double,
+    val resultSize: Double,
+    val jvmGcTime: Double,
+    val resultSerializationTime: Double,
+    val gettingResultTime: Double,
+    val schedulerDelay: Double,
+    val peakExecutionMemory: Double,
+    val memoryBytesSpilled: Double,
+    val diskBytesSpilled: Double,
+
+    val bytesRead: Double,
+    val recordsRead: Double,
+
+    val bytesWritten: Double,
+    val recordsWritten: Double,
+
+    val shuffleReadBytes: Double,
+    val shuffleRecordsRead: Double,
+    val shuffleRemoteBlocksFetched: Double,
+    val shuffleLocalBlocksFetched: Double,
+    val shuffleFetchWaitTime: Double,
+    val shuffleRemoteBytesRead: Double,
+    val shuffleRemoteBytesReadToDisk: Double,
+    val shuffleTotalBlocksFetched: Double,
+
+    val shuffleWriteBytes: Double,
+    val shuffleWriteRecords: Double,
+    val shuffleWriteTime: Double) {
+
+  @KVIndex @JsonIgnore
+  def id: Array[Any] = Array(stageId, stageAttemptId, quantile)
+
+  @KVIndex("stage") @JsonIgnore
+  def stage: Array[Int] = Array(stageId, stageAttemptId)
+
+}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index 56d0266b8edad..89a6a71a589a1 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.storage
 
+import java.io.IOException
 import java.util.{HashMap => JHashMap}
 
 import scala.collection.JavaConverters._
@@ -159,11 +160,16 @@ class BlockManagerMasterEndpoint(
     // Ask the slaves to remove the RDD, and put the result in a sequence of Futures.
     // The dispatcher is used as an implicit argument into the Future sequence construction.
     val removeMsg = RemoveRdd(rddId)
-    Future.sequence(
-      blockManagerInfo.values.map { bm =>
-        bm.slaveEndpoint.ask[Int](removeMsg)
-      }.toSeq
-    )
+
+    val futures = blockManagerInfo.values.map { bm =>
+      bm.slaveEndpoint.ask[Int](removeMsg).recover {
+        case e: IOException =>
+          logWarning(s"Error trying to remove RDD $rddId", e)
+          0 // zero blocks were removed
+      }
+    }.toSeq
+
+    Future.sequence(futures)
   }
 
   private def removeShuffle(shuffleId: Int): Future[Seq[Boolean]] = {
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 97abd92d4b70f..39249d411b582 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -26,12 +26,11 @@ import java.util.concurrent.ConcurrentHashMap
 import scala.collection.mutable.ListBuffer
 
 import com.google.common.io.Closeables
-import io.netty.channel.{DefaultFileRegion, FileRegion}
-import io.netty.util.AbstractReferenceCounted
+import io.netty.channel.DefaultFileRegion
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.internal.Logging
-import org.apache.spark.network.util.JavaUtils
+import org.apache.spark.network.util.{AbstractFileRegion, JavaUtils}
 import org.apache.spark.security.CryptoStreamUtils
 import org.apache.spark.util.Utils
 import org.apache.spark.util.io.ChunkedByteBuffer
@@ -266,7 +265,7 @@ private class EncryptedBlockData(
 }
 
 private class ReadableChannelFileRegion(source: ReadableByteChannel, blockSize: Long)
-  extends AbstractReferenceCounted with FileRegion {
+  extends AbstractFileRegion {
 
   private var _transferred = 0L
 
@@ -277,7 +276,7 @@ private class ReadableChannelFileRegion(source: ReadableByteChannel, blockSize:
 
   override def position(): Long = 0
 
-  override def transfered(): Long = _transferred
+  override def transferred(): Long = _transferred
 
   override def transferTo(target: WritableByteChannel, pos: Long): Long = {
     assert(pos == transfered(), "Invalid position.")
diff --git a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
index 3ae80ecfd22e6..3c4ee4eb6bbb9 100644
--- a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
@@ -21,10 +21,11 @@ import java.util.{Timer, TimerTask}
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
+import org.apache.spark.status.api.v1.StageData
 
 /**
  * ConsoleProgressBar shows the progress of stages in the next line of the console. It poll the
- * status of active stages from `sc.statusTracker` periodically, the progress bar will be showed
+ * status of active stages from the app state store periodically, the progress bar will be showed
  * up after the stage has ran at least 500ms. If multiple stages run in the same time, the status
  * of them will be combined together, showed in one line.
  */
@@ -64,9 +65,8 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
     if (now - lastFinishTime < firstDelayMSec) {
       return
     }
-    val stageIds = sc.statusTracker.getActiveStageIds()
-    val stages = stageIds.flatMap(sc.statusTracker.getStageInfo).filter(_.numTasks() > 1)
-      .filter(now - _.submissionTime() > firstDelayMSec).sortBy(_.stageId())
+    val stages = sc.statusStore.activeStages()
+      .filter { s => now - s.submissionTime.get.getTime() > firstDelayMSec }
     if (stages.length > 0) {
       show(now, stages.take(3))  // display at most 3 stages in same time
     }
@@ -77,15 +77,15 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
    * after your last output, keeps overwriting itself to hold in one line. The logging will follow
    * the progress bar, then progress bar will be showed in next line without overwrite logs.
    */
-  private def show(now: Long, stages: Seq[SparkStageInfo]) {
+  private def show(now: Long, stages: Seq[StageData]) {
     val width = TerminalWidth / stages.size
     val bar = stages.map { s =>
-      val total = s.numTasks()
-      val header = s"[Stage ${s.stageId()}:"
-      val tailer = s"(${s.numCompletedTasks()} + ${s.numActiveTasks()}) / $total]"
+      val total = s.numTasks
+      val header = s"[Stage ${s.stageId}:"
+      val tailer = s"(${s.numCompleteTasks} + ${s.numActiveTasks}) / $total]"
       val w = width - header.length - tailer.length
       val bar = if (w > 0) {
-        val percent = w * s.numCompletedTasks() / total
+        val percent = w * s.numCompleteTasks / total
         (0 until w).map { i =>
           if (i < percent) "=" else if (i == percent) ">" else " "
         }.mkString("")
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index 35da3c3bfd1a2..b44ac0ea1febc 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -154,8 +154,6 @@ private[spark] object SparkUI {
   val DEFAULT_PORT = 4040
   val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static"
   val DEFAULT_POOL_NAME = "default"
-  val DEFAULT_RETAINED_STAGES = 1000
-  val DEFAULT_RETAINED_JOBS = 1000
 
   def getUIPort(conf: SparkConf): Int = {
     conf.getInt("spark.ui.port", SparkUI.DEFAULT_PORT)
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index b60d39b21b4bf..ff916bb6a5759 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -65,12 +65,10 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
     }.map { job =>
       val jobId = job.jobId
       val status = job.status
-      val displayJobDescription =
-        if (job.description.isEmpty) {
-          job.name
-        } else {
-          UIUtils.makeDescription(job.description.get, "", plainText = true).text
-        }
+      val jobDescription = store.lastStageAttempt(job.stageIds.max).description
+      val displayJobDescription = jobDescription
+        .map(UIUtils.makeDescription(_, "", plainText = true).text)
+        .getOrElse("")
       val submissionTime = job.submissionTime.get.getTime()
       val completionTime = job.completionTime.map(_.getTime()).getOrElse(System.currentTimeMillis())
       val classNameByStatus = status match {
@@ -300,7 +298,13 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
     val shouldShowCompletedJobs = completedJobs.nonEmpty
     val shouldShowFailedJobs = failedJobs.nonEmpty
 
-    val completedJobNumStr = s"${completedJobs.size}"
+    val appSummary = store.appSummary()
+    val completedJobNumStr = if (completedJobs.size == appSummary.numCompletedJobs) {
+      s"${completedJobs.size}"
+    } else {
+      s"${appSummary.numCompletedJobs}, only showing ${completedJobs.size}"
+    }
+
     val schedulingMode = store.environmentInfo().sparkProperties.toMap
       .get("spark.scheduler.mode")
       .map { mode => SchedulingMode.withName(mode).toString }
@@ -423,20 +427,23 @@ private[ui] class JobDataSource(
     val formattedDuration = duration.map(d => UIUtils.formatDuration(d)).getOrElse("Unknown")
     val submissionTime = jobData.submissionTime
     val formattedSubmissionTime = submissionTime.map(UIUtils.formatDate).getOrElse("Unknown")
-    val jobDescription = UIUtils.makeDescription(jobData.description.getOrElse(""),
-      basePath, plainText = false)
+    val lastStageAttempt = store.lastStageAttempt(jobData.stageIds.max)
+    val lastStageDescription = lastStageAttempt.description.getOrElse("")
+
+    val formattedJobDescription =
+      UIUtils.makeDescription(lastStageDescription, basePath, plainText = false)
 
     val detailUrl = "%s/jobs/job?id=%s".format(basePath, jobData.jobId)
 
     new JobTableRowData(
       jobData,
-      jobData.name,
-      jobData.description.getOrElse(jobData.name),
+      lastStageAttempt.name,
+      lastStageDescription,
       duration.getOrElse(-1),
       formattedDuration,
       submissionTime.map(_.getTime()).getOrElse(-1L),
       formattedSubmissionTime,
-      jobDescription,
+      formattedJobDescription,
       detailUrl
     )
   }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
index e4cf99e7b9e04..b1e343451e28e 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
@@ -39,7 +39,6 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
     val completedStages = allStages.filter(_.status == StageStatus.COMPLETE)
     val failedStages = allStages.filter(_.status == StageStatus.FAILED).reverse
 
-    val numCompletedStages = completedStages.size
     val numFailedStages = failedStages.size
     val subPath = "stages"
 
@@ -69,10 +68,11 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
     val shouldShowCompletedStages = completedStages.nonEmpty
     val shouldShowFailedStages = failedStages.nonEmpty
 
-    val completedStageNumStr = if (numCompletedStages == completedStages.size) {
-      s"$numCompletedStages"
+    val appSummary = parent.store.appSummary()
+    val completedStageNumStr = if (appSummary.numCompletedStages == completedStages.size) {
+      s"${appSummary.numCompletedStages}"
     } else {
-      s"$numCompletedStages, only showing ${completedStages.size}"
+      s"${appSummary.numCompletedStages}, only showing ${completedStages.size}"
     }
 
     val summary: NodeSeq =
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
index 41d42b52430a5..95c12b1e73653 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
@@ -87,7 +87,9 @@ private[ui] class ExecutorTable(stage: StageData, store: AppStatusStore) {
   }
 
   private def createExecutorTable(stage: StageData) : Seq[Node] = {
-    stage.executorSummary.getOrElse(Map.empty).toSeq.sortBy(_._1).map { case (k, v) =>
+    val executorSummary = store.executorSummary(stage.stageId, stage.attemptId)
+
+    executorSummary.toSeq.sortBy(_._1).map { case (k, v) =>
       val executor = store.asOption(store.executorSummary(k))
       <tr>
         <td>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index 740f12e7d13d4..bf59152c8c0cd 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -201,7 +201,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
     val stages = jobData.stageIds.map { stageId =>
       // This could be empty if the listener hasn't received information about the
       // stage or if the stage information has been garbage collected
-      store.stageData(stageId).lastOption.getOrElse {
+      store.asOption(store.lastStageAttempt(stageId)).getOrElse {
         new v1.StageData(
           v1.StageStatus.PENDING,
           stageId,
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
deleted file mode 100644
index a18e86ec0a73b..0000000000000
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
+++ /dev/null
@@ -1,612 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ui.jobs
-
-import java.util.concurrent.TimeoutException
-
-import scala.collection.mutable.{HashMap, HashSet, LinkedHashMap, ListBuffer}
-
-import org.apache.spark._
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.executor.TaskMetrics
-import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
-import org.apache.spark.scheduler._
-import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
-import org.apache.spark.storage.BlockManagerId
-import org.apache.spark.ui.SparkUI
-import org.apache.spark.ui.jobs.UIData._
-
-/**
- * :: DeveloperApi ::
- * Tracks task-level information to be displayed in the UI.
- *
- * All access to the data structures in this class must be synchronized on the
- * class, since the UI thread and the EventBus loop may otherwise be reading and
- * updating the internal data structures concurrently.
- */
-@DeveloperApi
-@deprecated("This class will be removed in a future release.", "2.2.0")
-class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
-
-  // Define a handful of type aliases so that data structures' types can serve as documentation.
-  // These type aliases are public because they're used in the types of public fields:
-
-  type JobId = Int
-  type JobGroupId = String
-  type StageId = Int
-  type StageAttemptId = Int
-  type PoolName = String
-  type ExecutorId = String
-
-  // Application:
-  @volatile var startTime = -1L
-  @volatile var endTime = -1L
-
-  // Jobs:
-  val activeJobs = new HashMap[JobId, JobUIData]
-  val completedJobs = ListBuffer[JobUIData]()
-  val failedJobs = ListBuffer[JobUIData]()
-  val jobIdToData = new HashMap[JobId, JobUIData]
-  val jobGroupToJobIds = new HashMap[JobGroupId, HashSet[JobId]]
-
-  // Stages:
-  val pendingStages = new HashMap[StageId, StageInfo]
-  val activeStages = new HashMap[StageId, StageInfo]
-  val completedStages = ListBuffer[StageInfo]()
-  val skippedStages = ListBuffer[StageInfo]()
-  val failedStages = ListBuffer[StageInfo]()
-  val stageIdToData = new HashMap[(StageId, StageAttemptId), StageUIData]
-  val stageIdToInfo = new HashMap[StageId, StageInfo]
-  val stageIdToActiveJobIds = new HashMap[StageId, HashSet[JobId]]
-  val poolToActiveStages = HashMap[PoolName, HashMap[StageId, StageInfo]]()
-  // Total of completed and failed stages that have ever been run.  These may be greater than
-  // `completedStages.size` and `failedStages.size` if we have run more stages or jobs than
-  // JobProgressListener's retention limits.
-  var numCompletedStages = 0
-  var numFailedStages = 0
-  var numCompletedJobs = 0
-  var numFailedJobs = 0
-
-  // Misc:
-  val executorIdToBlockManagerId = HashMap[ExecutorId, BlockManagerId]()
-
-  def blockManagerIds: Seq[BlockManagerId] = executorIdToBlockManagerId.values.toSeq
-
-  var schedulingMode: Option[SchedulingMode] = None
-
-  // To limit the total memory usage of JobProgressListener, we only track information for a fixed
-  // number of non-active jobs and stages (there is no limit for active jobs and stages):
-
-  val retainedStages = conf.getInt("spark.ui.retainedStages", SparkUI.DEFAULT_RETAINED_STAGES)
-  val retainedJobs = conf.getInt("spark.ui.retainedJobs", SparkUI.DEFAULT_RETAINED_JOBS)
-  val retainedTasks = conf.get(UI_RETAINED_TASKS)
-
-  // We can test for memory leaks by ensuring that collections that track non-active jobs and
-  // stages do not grow without bound and that collections for active jobs/stages eventually become
-  // empty once Spark is idle.  Let's partition our collections into ones that should be empty
-  // once Spark is idle and ones that should have a hard- or soft-limited sizes.
-  // These methods are used by unit tests, but they're defined here so that people don't forget to
-  // update the tests when adding new collections.  Some collections have multiple levels of
-  // nesting, etc, so this lets us customize our notion of "size" for each structure:
-
-  // These collections should all be empty once Spark is idle (no active stages / jobs):
-  private[spark] def getSizesOfActiveStateTrackingCollections: Map[String, Int] = {
-    Map(
-      "activeStages" -> activeStages.size,
-      "activeJobs" -> activeJobs.size,
-      "poolToActiveStages" -> poolToActiveStages.values.map(_.size).sum,
-      "stageIdToActiveJobIds" -> stageIdToActiveJobIds.values.map(_.size).sum
-    )
-  }
-
-  // These collections should stop growing once we have run at least `spark.ui.retainedStages`
-  // stages and `spark.ui.retainedJobs` jobs:
-  private[spark] def getSizesOfHardSizeLimitedCollections: Map[String, Int] = {
-    Map(
-      "completedJobs" -> completedJobs.size,
-      "failedJobs" -> failedJobs.size,
-      "completedStages" -> completedStages.size,
-      "skippedStages" -> skippedStages.size,
-      "failedStages" -> failedStages.size
-    )
-  }
-
-  // These collections may grow arbitrarily, but once Spark becomes idle they should shrink back to
-  // some bound based on the `spark.ui.retainedStages` and `spark.ui.retainedJobs` settings:
-  private[spark] def getSizesOfSoftSizeLimitedCollections: Map[String, Int] = {
-    Map(
-      "jobIdToData" -> jobIdToData.size,
-      "stageIdToData" -> stageIdToData.size,
-      "stageIdToStageInfo" -> stageIdToInfo.size,
-      "jobGroupToJobIds" -> jobGroupToJobIds.values.map(_.size).sum,
-      // Since jobGroupToJobIds is map of sets, check that we don't leak keys with empty values:
-      "jobGroupToJobIds keySet" -> jobGroupToJobIds.keys.size
-    )
-  }
-
-  /** If stages is too large, remove and garbage collect old stages */
-  private def trimStagesIfNecessary(stages: ListBuffer[StageInfo]) = synchronized {
-    if (stages.size > retainedStages) {
-      val toRemove = calculateNumberToRemove(stages.size, retainedStages)
-      stages.take(toRemove).foreach { s =>
-        stageIdToData.remove((s.stageId, s.attemptId))
-        stageIdToInfo.remove(s.stageId)
-      }
-      stages.trimStart(toRemove)
-    }
-  }
-
-  /** If jobs is too large, remove and garbage collect old jobs */
-  private def trimJobsIfNecessary(jobs: ListBuffer[JobUIData]) = synchronized {
-    if (jobs.size > retainedJobs) {
-      val toRemove = calculateNumberToRemove(jobs.size, retainedJobs)
-      jobs.take(toRemove).foreach { job =>
-        // Remove the job's UI data, if it exists
-        jobIdToData.remove(job.jobId).foreach { removedJob =>
-          // A null jobGroupId is used for jobs that are run without a job group
-          val jobGroupId = removedJob.jobGroup.orNull
-          // Remove the job group -> job mapping entry, if it exists
-          jobGroupToJobIds.get(jobGroupId).foreach { jobsInGroup =>
-            jobsInGroup.remove(job.jobId)
-            // If this was the last job in this job group, remove the map entry for the job group
-            if (jobsInGroup.isEmpty) {
-              jobGroupToJobIds.remove(jobGroupId)
-            }
-          }
-        }
-      }
-      jobs.trimStart(toRemove)
-    }
-  }
-
-  override def onJobStart(jobStart: SparkListenerJobStart): Unit = synchronized {
-    val jobGroup = for (
-      props <- Option(jobStart.properties);
-      group <- Option(props.getProperty(SparkContext.SPARK_JOB_GROUP_ID))
-    ) yield group
-    val jobData: JobUIData =
-      new JobUIData(
-        jobId = jobStart.jobId,
-        submissionTime = Option(jobStart.time).filter(_ >= 0),
-        stageIds = jobStart.stageIds,
-        jobGroup = jobGroup,
-        status = JobExecutionStatus.RUNNING)
-    // A null jobGroupId is used for jobs that are run without a job group
-    jobGroupToJobIds.getOrElseUpdate(jobGroup.orNull, new HashSet[JobId]).add(jobStart.jobId)
-    jobStart.stageInfos.foreach(x => pendingStages(x.stageId) = x)
-    // Compute (a potential underestimate of) the number of tasks that will be run by this job.
-    // This may be an underestimate because the job start event references all of the result
-    // stages' transitive stage dependencies, but some of these stages might be skipped if their
-    // output is available from earlier runs.
-    // See https://github.com/apache/spark/pull/3009 for a more extensive discussion.
-    jobData.numTasks = {
-      val allStages = jobStart.stageInfos
-      val missingStages = allStages.filter(_.completionTime.isEmpty)
-      missingStages.map(_.numTasks).sum
-    }
-    jobIdToData(jobStart.jobId) = jobData
-    activeJobs(jobStart.jobId) = jobData
-    for (stageId <- jobStart.stageIds) {
-      stageIdToActiveJobIds.getOrElseUpdate(stageId, new HashSet[StageId]).add(jobStart.jobId)
-    }
-    // If there's no information for a stage, store the StageInfo received from the scheduler
-    // so that we can display stage descriptions for pending stages:
-    for (stageInfo <- jobStart.stageInfos) {
-      stageIdToInfo.getOrElseUpdate(stageInfo.stageId, stageInfo)
-      stageIdToData.getOrElseUpdate((stageInfo.stageId, stageInfo.attemptId), new StageUIData)
-    }
-  }
-
-  override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = synchronized {
-    val jobData = activeJobs.remove(jobEnd.jobId).getOrElse {
-      logWarning(s"Job completed for unknown job ${jobEnd.jobId}")
-      new JobUIData(jobId = jobEnd.jobId)
-    }
-    jobData.completionTime = Option(jobEnd.time).filter(_ >= 0)
-
-    jobData.stageIds.foreach(pendingStages.remove)
-    jobEnd.jobResult match {
-      case JobSucceeded =>
-        completedJobs += jobData
-        trimJobsIfNecessary(completedJobs)
-        jobData.status = JobExecutionStatus.SUCCEEDED
-        numCompletedJobs += 1
-      case JobFailed(_) =>
-        failedJobs += jobData
-        trimJobsIfNecessary(failedJobs)
-        jobData.status = JobExecutionStatus.FAILED
-        numFailedJobs += 1
-    }
-    for (stageId <- jobData.stageIds) {
-      stageIdToActiveJobIds.get(stageId).foreach { jobsUsingStage =>
-        jobsUsingStage.remove(jobEnd.jobId)
-        if (jobsUsingStage.isEmpty) {
-          stageIdToActiveJobIds.remove(stageId)
-        }
-        stageIdToInfo.get(stageId).foreach { stageInfo =>
-          if (stageInfo.submissionTime.isEmpty) {
-            // if this stage is pending, it won't complete, so mark it as "skipped":
-            skippedStages += stageInfo
-            trimStagesIfNecessary(skippedStages)
-            jobData.numSkippedStages += 1
-            jobData.numSkippedTasks += stageInfo.numTasks
-          }
-        }
-      }
-    }
-  }
-
-  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = synchronized {
-    val stage = stageCompleted.stageInfo
-    stageIdToInfo(stage.stageId) = stage
-    val stageData = stageIdToData.getOrElseUpdate((stage.stageId, stage.attemptId), {
-      logWarning("Stage completed for unknown stage " + stage.stageId)
-      new StageUIData
-    })
-
-    for ((id, info) <- stageCompleted.stageInfo.accumulables) {
-      stageData.accumulables(id) = info
-    }
-
-    poolToActiveStages.get(stageData.schedulingPool).foreach { hashMap =>
-      hashMap.remove(stage.stageId)
-    }
-    activeStages.remove(stage.stageId)
-    if (stage.failureReason.isEmpty) {
-      completedStages += stage
-      numCompletedStages += 1
-      trimStagesIfNecessary(completedStages)
-    } else {
-      failedStages += stage
-      numFailedStages += 1
-      trimStagesIfNecessary(failedStages)
-    }
-
-    for (
-      activeJobsDependentOnStage <- stageIdToActiveJobIds.get(stage.stageId);
-      jobId <- activeJobsDependentOnStage;
-      jobData <- jobIdToData.get(jobId)
-    ) {
-      jobData.numActiveStages -= 1
-      if (stage.failureReason.isEmpty) {
-        if (stage.submissionTime.isDefined) {
-          jobData.completedStageIndices.add(stage.stageId)
-        }
-      } else {
-        jobData.numFailedStages += 1
-      }
-    }
-  }
-
-  /** For FIFO, all stages are contained by "default" pool but "default" pool here is meaningless */
-  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = synchronized {
-    val stage = stageSubmitted.stageInfo
-    activeStages(stage.stageId) = stage
-    pendingStages.remove(stage.stageId)
-    val poolName = Option(stageSubmitted.properties).map {
-      p => p.getProperty("spark.scheduler.pool", SparkUI.DEFAULT_POOL_NAME)
-    }.getOrElse(SparkUI.DEFAULT_POOL_NAME)
-
-    stageIdToInfo(stage.stageId) = stage
-    val stageData = stageIdToData.getOrElseUpdate((stage.stageId, stage.attemptId), new StageUIData)
-    stageData.schedulingPool = poolName
-
-    stageData.description = Option(stageSubmitted.properties).flatMap {
-      p => Option(p.getProperty(SparkContext.SPARK_JOB_DESCRIPTION))
-    }
-
-    val stages = poolToActiveStages.getOrElseUpdate(poolName, new HashMap[Int, StageInfo])
-    stages(stage.stageId) = stage
-
-    for (
-      activeJobsDependentOnStage <- stageIdToActiveJobIds.get(stage.stageId);
-      jobId <- activeJobsDependentOnStage;
-      jobData <- jobIdToData.get(jobId)
-    ) {
-      jobData.numActiveStages += 1
-
-      // If a stage retries again, it should be removed from completedStageIndices set
-      jobData.completedStageIndices.remove(stage.stageId)
-    }
-  }
-
-  override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = synchronized {
-    val taskInfo = taskStart.taskInfo
-    if (taskInfo != null) {
-      val stageData = stageIdToData.getOrElseUpdate((taskStart.stageId, taskStart.stageAttemptId), {
-        logWarning("Task start for unknown stage " + taskStart.stageId)
-        new StageUIData
-      })
-      stageData.numActiveTasks += 1
-      stageData.taskData.put(taskInfo.taskId, TaskUIData(taskInfo))
-    }
-    for (
-      activeJobsDependentOnStage <- stageIdToActiveJobIds.get(taskStart.stageId);
-      jobId <- activeJobsDependentOnStage;
-      jobData <- jobIdToData.get(jobId)
-    ) {
-      jobData.numActiveTasks += 1
-    }
-  }
-
-  override def onTaskGettingResult(taskGettingResult: SparkListenerTaskGettingResult) {
-    // Do nothing: because we don't do a deep copy of the TaskInfo, the TaskInfo in
-    // stageToTaskInfos already has the updated status.
-  }
-
-  override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = synchronized {
-    val info = taskEnd.taskInfo
-    // If stage attempt id is -1, it means the DAGScheduler had no idea which attempt this task
-    // completion event is for. Let's just drop it here. This means we might have some speculation
-    // tasks on the web ui that's never marked as complete.
-    if (info != null && taskEnd.stageAttemptId != -1) {
-      val stageData = stageIdToData.getOrElseUpdate((taskEnd.stageId, taskEnd.stageAttemptId), {
-        logWarning("Task end for unknown stage " + taskEnd.stageId)
-        new StageUIData
-      })
-
-      for (accumulableInfo <- info.accumulables) {
-        stageData.accumulables(accumulableInfo.id) = accumulableInfo
-      }
-
-      val execSummaryMap = stageData.executorSummary
-      val execSummary = execSummaryMap.getOrElseUpdate(info.executorId, new ExecutorSummary)
-
-      taskEnd.reason match {
-        case Success =>
-          execSummary.succeededTasks += 1
-        case kill: TaskKilled =>
-          execSummary.reasonToNumKilled = execSummary.reasonToNumKilled.updated(
-            kill.reason, execSummary.reasonToNumKilled.getOrElse(kill.reason, 0) + 1)
-        case commitDenied: TaskCommitDenied =>
-          execSummary.reasonToNumKilled = execSummary.reasonToNumKilled.updated(
-            commitDenied.toErrorString, execSummary.reasonToNumKilled.getOrElse(
-              commitDenied.toErrorString, 0) + 1)
-        case _ =>
-          execSummary.failedTasks += 1
-      }
-      execSummary.taskTime += info.duration
-      stageData.numActiveTasks -= 1
-
-      val errorMessage: Option[String] =
-        taskEnd.reason match {
-          case org.apache.spark.Success =>
-            stageData.completedIndices.add(info.index)
-            stageData.numCompleteTasks += 1
-            None
-          case kill: TaskKilled =>
-            stageData.reasonToNumKilled = stageData.reasonToNumKilled.updated(
-              kill.reason, stageData.reasonToNumKilled.getOrElse(kill.reason, 0) + 1)
-            Some(kill.toErrorString)
-          case commitDenied: TaskCommitDenied =>
-            stageData.reasonToNumKilled = stageData.reasonToNumKilled.updated(
-              commitDenied.toErrorString, stageData.reasonToNumKilled.getOrElse(
-                commitDenied.toErrorString, 0) + 1)
-            Some(commitDenied.toErrorString)
-          case e: ExceptionFailure => // Handle ExceptionFailure because we might have accumUpdates
-            stageData.numFailedTasks += 1
-            Some(e.toErrorString)
-          case e: TaskFailedReason => // All other failure cases
-            stageData.numFailedTasks += 1
-            Some(e.toErrorString)
-        }
-
-      val taskMetrics = Option(taskEnd.taskMetrics)
-      taskMetrics.foreach { m =>
-        val oldMetrics = stageData.taskData.get(info.taskId).flatMap(_.metrics)
-        updateAggregateMetrics(stageData, info.executorId, m, oldMetrics)
-      }
-
-      val taskData = stageData.taskData.getOrElseUpdate(info.taskId, TaskUIData(info))
-      taskData.updateTaskInfo(info)
-      taskData.updateTaskMetrics(taskMetrics)
-      taskData.errorMessage = errorMessage
-
-      // If Tasks is too large, remove and garbage collect old tasks
-      if (stageData.taskData.size > retainedTasks) {
-        stageData.taskData = stageData.taskData.drop(
-          calculateNumberToRemove(stageData.taskData.size, retainedTasks))
-      }
-
-      for (
-        activeJobsDependentOnStage <- stageIdToActiveJobIds.get(taskEnd.stageId);
-        jobId <- activeJobsDependentOnStage;
-        jobData <- jobIdToData.get(jobId)
-      ) {
-        jobData.numActiveTasks -= 1
-        taskEnd.reason match {
-          case Success =>
-            jobData.completedIndices.add((taskEnd.stageId, info.index))
-            jobData.numCompletedTasks += 1
-          case kill: TaskKilled =>
-            jobData.reasonToNumKilled = jobData.reasonToNumKilled.updated(
-              kill.reason, jobData.reasonToNumKilled.getOrElse(kill.reason, 0) + 1)
-          case commitDenied: TaskCommitDenied =>
-            jobData.reasonToNumKilled = jobData.reasonToNumKilled.updated(
-              commitDenied.toErrorString, jobData.reasonToNumKilled.getOrElse(
-                commitDenied.toErrorString, 0) + 1)
-          case _ =>
-            jobData.numFailedTasks += 1
-        }
-      }
-    }
-  }
-
-  /**
-   * Remove at least (maxRetained / 10) items to reduce friction.
-   */
-  private def calculateNumberToRemove(dataSize: Int, retainedSize: Int): Int = {
-    math.max(retainedSize / 10, dataSize - retainedSize)
-  }
-
-  /**
-   * Upon receiving new metrics for a task, updates the per-stage and per-executor-per-stage
-   * aggregate metrics by calculating deltas between the currently recorded metrics and the new
-   * metrics.
-   */
-  def updateAggregateMetrics(
-      stageData: StageUIData,
-      execId: String,
-      taskMetrics: TaskMetrics,
-      oldMetrics: Option[TaskMetricsUIData]) {
-    val execSummary = stageData.executorSummary.getOrElseUpdate(execId, new ExecutorSummary)
-
-    val shuffleWriteDelta =
-      taskMetrics.shuffleWriteMetrics.bytesWritten -
-        oldMetrics.map(_.shuffleWriteMetrics.bytesWritten).getOrElse(0L)
-    stageData.shuffleWriteBytes += shuffleWriteDelta
-    execSummary.shuffleWrite += shuffleWriteDelta
-
-    val shuffleWriteRecordsDelta =
-      taskMetrics.shuffleWriteMetrics.recordsWritten -
-        oldMetrics.map(_.shuffleWriteMetrics.recordsWritten).getOrElse(0L)
-    stageData.shuffleWriteRecords += shuffleWriteRecordsDelta
-    execSummary.shuffleWriteRecords += shuffleWriteRecordsDelta
-
-    val shuffleReadDelta =
-      taskMetrics.shuffleReadMetrics.totalBytesRead -
-        oldMetrics.map(_.shuffleReadMetrics.totalBytesRead).getOrElse(0L)
-    stageData.shuffleReadTotalBytes += shuffleReadDelta
-    execSummary.shuffleRead += shuffleReadDelta
-
-    val shuffleReadRecordsDelta =
-      taskMetrics.shuffleReadMetrics.recordsRead -
-        oldMetrics.map(_.shuffleReadMetrics.recordsRead).getOrElse(0L)
-    stageData.shuffleReadRecords += shuffleReadRecordsDelta
-    execSummary.shuffleReadRecords += shuffleReadRecordsDelta
-
-    val inputBytesDelta =
-      taskMetrics.inputMetrics.bytesRead -
-        oldMetrics.map(_.inputMetrics.bytesRead).getOrElse(0L)
-    stageData.inputBytes += inputBytesDelta
-    execSummary.inputBytes += inputBytesDelta
-
-    val inputRecordsDelta =
-      taskMetrics.inputMetrics.recordsRead -
-        oldMetrics.map(_.inputMetrics.recordsRead).getOrElse(0L)
-    stageData.inputRecords += inputRecordsDelta
-    execSummary.inputRecords += inputRecordsDelta
-
-    val outputBytesDelta =
-      taskMetrics.outputMetrics.bytesWritten -
-        oldMetrics.map(_.outputMetrics.bytesWritten).getOrElse(0L)
-    stageData.outputBytes += outputBytesDelta
-    execSummary.outputBytes += outputBytesDelta
-
-    val outputRecordsDelta =
-      taskMetrics.outputMetrics.recordsWritten -
-        oldMetrics.map(_.outputMetrics.recordsWritten).getOrElse(0L)
-    stageData.outputRecords += outputRecordsDelta
-    execSummary.outputRecords += outputRecordsDelta
-
-    val diskSpillDelta =
-      taskMetrics.diskBytesSpilled - oldMetrics.map(_.diskBytesSpilled).getOrElse(0L)
-    stageData.diskBytesSpilled += diskSpillDelta
-    execSummary.diskBytesSpilled += diskSpillDelta
-
-    val memorySpillDelta =
-      taskMetrics.memoryBytesSpilled - oldMetrics.map(_.memoryBytesSpilled).getOrElse(0L)
-    stageData.memoryBytesSpilled += memorySpillDelta
-    execSummary.memoryBytesSpilled += memorySpillDelta
-
-    val timeDelta =
-      taskMetrics.executorRunTime - oldMetrics.map(_.executorRunTime).getOrElse(0L)
-    stageData.executorRunTime += timeDelta
-
-    val cpuTimeDelta =
-      taskMetrics.executorCpuTime - oldMetrics.map(_.executorCpuTime).getOrElse(0L)
-    stageData.executorCpuTime += cpuTimeDelta
-  }
-
-  override def onExecutorMetricsUpdate(executorMetricsUpdate: SparkListenerExecutorMetricsUpdate) {
-    for ((taskId, sid, sAttempt, accumUpdates) <- executorMetricsUpdate.accumUpdates) {
-      val stageData = stageIdToData.getOrElseUpdate((sid, sAttempt), {
-        logWarning("Metrics update for task in unknown stage " + sid)
-        new StageUIData
-      })
-      val taskData = stageData.taskData.get(taskId)
-      val metrics = TaskMetrics.fromAccumulatorInfos(accumUpdates)
-      taskData.foreach { t =>
-        if (!t.taskInfo.finished) {
-          updateAggregateMetrics(stageData, executorMetricsUpdate.execId, metrics, t.metrics)
-          // Overwrite task metrics
-          t.updateTaskMetrics(Some(metrics))
-        }
-      }
-    }
-  }
-
-  override def onEnvironmentUpdate(environmentUpdate: SparkListenerEnvironmentUpdate) {
-    synchronized {
-      schedulingMode = environmentUpdate
-        .environmentDetails("Spark Properties").toMap
-        .get("spark.scheduler.mode")
-        .map(SchedulingMode.withName)
-    }
-  }
-
-  override def onBlockManagerAdded(blockManagerAdded: SparkListenerBlockManagerAdded) {
-    synchronized {
-      val blockManagerId = blockManagerAdded.blockManagerId
-      val executorId = blockManagerId.executorId
-      executorIdToBlockManagerId(executorId) = blockManagerId
-    }
-  }
-
-  override def onBlockManagerRemoved(blockManagerRemoved: SparkListenerBlockManagerRemoved) {
-    synchronized {
-      val executorId = blockManagerRemoved.blockManagerId.executorId
-      executorIdToBlockManagerId.remove(executorId)
-    }
-  }
-
-  override def onApplicationStart(appStarted: SparkListenerApplicationStart) {
-    startTime = appStarted.time
-  }
-
-  override def onApplicationEnd(appEnded: SparkListenerApplicationEnd) {
-    endTime = appEnded.time
-  }
-
-  /**
-   * For testing only. Wait until at least `numExecutors` executors are up, or throw
-   * `TimeoutException` if the waiting time elapsed before `numExecutors` executors up.
-   * Exposed for testing.
-   *
-   * @param numExecutors the number of executors to wait at least
-   * @param timeout time to wait in milliseconds
-   */
-  private[spark] def waitUntilExecutorsUp(numExecutors: Int, timeout: Long): Unit = {
-    val finishTime = System.currentTimeMillis() + timeout
-    while (System.currentTimeMillis() < finishTime) {
-      val numBlockManagers = synchronized {
-        blockManagerIds.size
-      }
-      if (numBlockManagers >= numExecutors + 1) {
-        // Need to count the block manager in driver
-        return
-      }
-      // Sleep rather than using wait/notify, because this is used only for testing and wait/notify
-      // add overhead in the general case.
-      Thread.sleep(10)
-    }
-    throw new TimeoutException(
-      s"Can't find $numExecutors executors before $timeout milliseconds elapsed")
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
index 99eab1b2a27d8..ff1b75e5c5065 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
@@ -34,10 +34,10 @@ private[ui] class JobsTab(parent: SparkUI, store: AppStatusStore)
   val killEnabled = parent.killEnabled
 
   def isFairScheduler: Boolean = {
-    store.environmentInfo().sparkProperties.toMap
-      .get("spark.scheduler.mode")
-      .map { mode => mode == SchedulingMode.FAIR }
-      .getOrElse(false)
+    store
+      .environmentInfo()
+      .sparkProperties
+      .contains(("spark.scheduler.mode", SchedulingMode.FAIR.toString))
   }
 
   def getSparkUser: String = parent.getSparkUser
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 5f93f2ffb412f..7c6e06cf183ba 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ui.jobs
 
 import java.net.URLEncoder
 import java.util.Date
+import java.util.concurrent.TimeUnit
 import javax.servlet.http.HttpServletRequest
 
 import scala.collection.mutable.{HashMap, HashSet}
@@ -29,16 +30,14 @@ import org.apache.commons.lang3.StringEscapeUtils
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.config._
 import org.apache.spark.scheduler.TaskLocality
-import org.apache.spark.status.AppStatusStore
+import org.apache.spark.status._
 import org.apache.spark.status.api.v1._
 import org.apache.spark.ui._
-import org.apache.spark.ui.jobs.UIData._
-import org.apache.spark.util.{Distribution, Utils}
+import org.apache.spark.util.Utils
 
 /** Page showing statistics and task list for a given stage */
 private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends WebUIPage("stage") {
   import ApiHelper._
-  import StagePage._
 
   private val TIMELINE_LEGEND = {
     <div class="legend-area">
@@ -68,17 +67,17 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
   // if we find that it's okay.
   private val MAX_TIMELINE_TASKS = parent.conf.getInt("spark.ui.timeline.tasks.maximum", 1000)
 
-  private def getLocalitySummaryString(stageData: StageData, taskList: Seq[TaskData]): String = {
-    val localities = taskList.map(_.taskLocality)
-    val localityCounts = localities.groupBy(identity).mapValues(_.size)
+  private def getLocalitySummaryString(localitySummary: Map[String, Long]): String = {
     val names = Map(
       TaskLocality.PROCESS_LOCAL.toString() -> "Process local",
       TaskLocality.NODE_LOCAL.toString() -> "Node local",
       TaskLocality.RACK_LOCAL.toString() -> "Rack local",
       TaskLocality.ANY.toString() -> "Any")
-    val localityNamesAndCounts = localityCounts.toSeq.map { case (locality, count) =>
-      s"${names(locality)}: $count"
-    }
+    val localityNamesAndCounts = names.flatMap { case (key, name) =>
+      localitySummary.get(key).map { count =>
+        s"$name: $count"
+      }
+    }.toSeq
     localityNamesAndCounts.sorted.mkString("; ")
   }
 
@@ -109,7 +108,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
 
     val stageHeader = s"Details for Stage $stageId (Attempt $stageAttemptId)"
     val stageData = parent.store
-      .asOption(parent.store.stageAttempt(stageId, stageAttemptId, details = true))
+      .asOption(parent.store.stageAttempt(stageId, stageAttemptId, details = false))
       .getOrElse {
         val content =
           <div id="no-info">
@@ -118,8 +117,11 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
         return UIUtils.headerSparkPage(stageHeader, content, parent)
       }
 
-    val tasks = stageData.tasks.getOrElse(Map.empty).values.toSeq
-    if (tasks.isEmpty) {
+    val localitySummary = store.localitySummary(stageData.stageId, stageData.attemptId)
+
+    val totalTasks = stageData.numActiveTasks + stageData.numCompleteTasks +
+      stageData.numFailedTasks + stageData.numKilledTasks
+    if (totalTasks == 0) {
       val content =
         <div>
           <h4>Summary Metrics</h4> No tasks have started yet
@@ -128,18 +130,14 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
       return UIUtils.headerSparkPage(stageHeader, content, parent)
     }
 
+    val storedTasks = store.taskCount(stageData.stageId, stageData.attemptId)
     val numCompleted = stageData.numCompleteTasks
-    val totalTasks = stageData.numActiveTasks + stageData.numCompleteTasks +
-      stageData.numFailedTasks + stageData.numKilledTasks
-    val totalTasksNumStr = if (totalTasks == tasks.size) {
+    val totalTasksNumStr = if (totalTasks == storedTasks) {
       s"$totalTasks"
     } else {
-      s"$totalTasks, showing ${tasks.size}"
+      s"$totalTasks, showing ${storedTasks}"
     }
 
-    val externalAccumulables = stageData.accumulatorUpdates
-    val hasAccumulators = externalAccumulables.size > 0
-
     val summary =
       <div>
         <ul class="unstyled">
@@ -149,7 +147,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
           </li>
           <li>
             <strong>Locality Level Summary: </strong>
-            {getLocalitySummaryString(stageData, tasks)}
+            {getLocalitySummaryString(localitySummary)}
           </li>
           {if (hasInput(stageData)) {
             <li>
@@ -267,7 +265,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
     val accumulableTable = UIUtils.listingTable(
       accumulableHeaders,
       accumulableRow,
-      externalAccumulables.toSeq)
+      stageData.accumulatorUpdates.toSeq)
 
     val page: Int = {
       // If the user has changed to a larger page size, then go to page 1 in order to avoid
@@ -281,16 +279,9 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
     val currentTime = System.currentTimeMillis()
     val (taskTable, taskTableHTML) = try {
       val _taskTable = new TaskPagedTable(
-        parent.conf,
+        stageData,
         UIUtils.prependBaseUri(parent.basePath) +
           s"/stages/stage?id=${stageId}&attempt=${stageAttemptId}",
-        tasks,
-        hasAccumulators,
-        hasInput(stageData),
-        hasOutput(stageData),
-        hasShuffleRead(stageData),
-        hasShuffleWrite(stageData),
-        hasBytesSpilled(stageData),
         currentTime,
         pageSize = taskPageSize,
         sortColumn = taskSortColumn,
@@ -321,217 +312,155 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
             |  }
             |});
           """.stripMargin
-         }
+          }
         }
       </script>
 
-    val taskIdsInPage = if (taskTable == null) Set.empty[Long]
-      else taskTable.dataSource.slicedTaskIds
+    val metricsSummary = store.taskSummary(stageData.stageId, stageData.attemptId,
+      Array(0, 0.25, 0.5, 0.75, 1.0))
 
-    // Excludes tasks which failed and have incomplete metrics
-    val validTasks = tasks.filter(t => t.status == "SUCCESS" && t.taskMetrics.isDefined)
-
-    val summaryTable: Option[Seq[Node]] =
-      if (validTasks.size == 0) {
-        None
-      } else {
-        def getDistributionQuantiles(data: Seq[Double]): IndexedSeq[Double] = {
-          Distribution(data).get.getQuantiles()
-        }
-        def getFormattedTimeQuantiles(times: Seq[Double]): Seq[Node] = {
-          getDistributionQuantiles(times).map { millis =>
-            <td>{UIUtils.formatDuration(millis.toLong)}</td>
-          }
-        }
-        def getFormattedSizeQuantiles(data: Seq[Double]): Seq[Elem] = {
-          getDistributionQuantiles(data).map(d => <td>{Utils.bytesToString(d.toLong)}</td>)
+    val summaryTable = metricsSummary.map { metrics =>
+      def timeQuantiles(data: IndexedSeq[Double]): Seq[Node] = {
+        data.map { millis =>
+          <td>{UIUtils.formatDuration(millis.toLong)}</td>
         }
+      }
 
-        val deserializationTimes = validTasks.map { task =>
-          task.taskMetrics.get.executorDeserializeTime.toDouble
-        }
-        val deserializationQuantiles =
-          <td>
-            <span data-toggle="tooltip" title={ToolTips.TASK_DESERIALIZATION_TIME}
-                  data-placement="right">
-              Task Deserialization Time
-            </span>
-          </td> +: getFormattedTimeQuantiles(deserializationTimes)
-
-        val serviceTimes = validTasks.map(_.taskMetrics.get.executorRunTime.toDouble)
-        val serviceQuantiles = <td>Duration</td> +: getFormattedTimeQuantiles(serviceTimes)
-
-        val gcTimes = validTasks.map(_.taskMetrics.get.jvmGcTime.toDouble)
-        val gcQuantiles =
-          <td>
-            <span data-toggle="tooltip"
-                title={ToolTips.GC_TIME} data-placement="right">GC Time
-            </span>
-          </td> +: getFormattedTimeQuantiles(gcTimes)
-
-        val serializationTimes = validTasks.map(_.taskMetrics.get.resultSerializationTime.toDouble)
-        val serializationQuantiles =
-          <td>
-            <span data-toggle="tooltip"
-                  title={ToolTips.RESULT_SERIALIZATION_TIME} data-placement="right">
-              Result Serialization Time
-            </span>
-          </td> +: getFormattedTimeQuantiles(serializationTimes)
-
-        val gettingResultTimes = validTasks.map(getGettingResultTime(_, currentTime).toDouble)
-        val gettingResultQuantiles =
-          <td>
-            <span data-toggle="tooltip"
-                title={ToolTips.GETTING_RESULT_TIME} data-placement="right">
-              Getting Result Time
-            </span>
-          </td> +:
-          getFormattedTimeQuantiles(gettingResultTimes)
-
-        val peakExecutionMemory = validTasks.map(_.taskMetrics.get.peakExecutionMemory.toDouble)
-        val peakExecutionMemoryQuantiles = {
-          <td>
-            <span data-toggle="tooltip"
-                  title={ToolTips.PEAK_EXECUTION_MEMORY} data-placement="right">
-              Peak Execution Memory
-            </span>
-          </td> +: getFormattedSizeQuantiles(peakExecutionMemory)
+      def sizeQuantiles(data: IndexedSeq[Double]): Seq[Node] = {
+        data.map { size =>
+          <td>{Utils.bytesToString(size.toLong)}</td>
         }
+      }
 
-        // The scheduler delay includes the network delay to send the task to the worker
-        // machine and to send back the result (but not the time to fetch the task result,
-        // if it needed to be fetched from the block manager on the worker).
-        val schedulerDelays = validTasks.map { task =>
-          getSchedulerDelay(task, task.taskMetrics.get, currentTime).toDouble
-        }
-        val schedulerDelayTitle = <td><span data-toggle="tooltip"
-          title={ToolTips.SCHEDULER_DELAY} data-placement="right">Scheduler Delay</span></td>
-        val schedulerDelayQuantiles = schedulerDelayTitle +:
-          getFormattedTimeQuantiles(schedulerDelays)
-        def getFormattedSizeQuantilesWithRecords(data: Seq[Double], records: Seq[Double])
-          : Seq[Elem] = {
-          val recordDist = getDistributionQuantiles(records).iterator
-          getDistributionQuantiles(data).map(d =>
-            <td>{s"${Utils.bytesToString(d.toLong)} / ${recordDist.next().toLong}"}</td>
-          )
+      def sizeQuantilesWithRecords(
+          data: IndexedSeq[Double],
+          records: IndexedSeq[Double]) : Seq[Node] = {
+        data.zip(records).map { case (d, r) =>
+          <td>{s"${Utils.bytesToString(d.toLong)} / ${r.toLong}"}</td>
         }
+      }
 
-        val inputSizes = validTasks.map(_.taskMetrics.get.inputMetrics.bytesRead.toDouble)
-        val inputRecords = validTasks.map(_.taskMetrics.get.inputMetrics.recordsRead.toDouble)
-        val inputQuantiles = <td>Input Size / Records</td> +:
-          getFormattedSizeQuantilesWithRecords(inputSizes, inputRecords)
+      def titleCell(title: String, tooltip: String): Seq[Node] = {
+        <td>
+          <span data-toggle="tooltip" title={tooltip} data-placement="right">
+            {title}
+          </span>
+        </td>
+      }
 
-        val outputSizes = validTasks.map(_.taskMetrics.get.outputMetrics.bytesWritten.toDouble)
-        val outputRecords = validTasks.map(_.taskMetrics.get.outputMetrics.recordsWritten.toDouble)
-        val outputQuantiles = <td>Output Size / Records</td> +:
-          getFormattedSizeQuantilesWithRecords(outputSizes, outputRecords)
+      def simpleTitleCell(title: String): Seq[Node] = <td>{title}</td>
 
-        val shuffleReadBlockedTimes = validTasks.map { task =>
-          task.taskMetrics.get.shuffleReadMetrics.fetchWaitTime.toDouble
-        }
-        val shuffleReadBlockedQuantiles =
-          <td>
-            <span data-toggle="tooltip"
-                  title={ToolTips.SHUFFLE_READ_BLOCKED_TIME} data-placement="right">
-              Shuffle Read Blocked Time
-            </span>
-          </td> +:
-          getFormattedTimeQuantiles(shuffleReadBlockedTimes)
-
-        val shuffleReadTotalSizes = validTasks.map { task =>
-          totalBytesRead(task.taskMetrics.get.shuffleReadMetrics).toDouble
-        }
-        val shuffleReadTotalRecords = validTasks.map { task =>
-          task.taskMetrics.get.shuffleReadMetrics.recordsRead.toDouble
-        }
-        val shuffleReadTotalQuantiles =
-          <td>
-            <span data-toggle="tooltip"
-                  title={ToolTips.SHUFFLE_READ} data-placement="right">
-              Shuffle Read Size / Records
-            </span>
-          </td> +:
-          getFormattedSizeQuantilesWithRecords(shuffleReadTotalSizes, shuffleReadTotalRecords)
-
-        val shuffleReadRemoteSizes = validTasks.map { task =>
-          task.taskMetrics.get.shuffleReadMetrics.remoteBytesRead.toDouble
-        }
-        val shuffleReadRemoteQuantiles =
-          <td>
-            <span data-toggle="tooltip"
-                  title={ToolTips.SHUFFLE_READ_REMOTE_SIZE} data-placement="right">
-              Shuffle Remote Reads
-            </span>
-          </td> +:
-          getFormattedSizeQuantiles(shuffleReadRemoteSizes)
-
-        val shuffleWriteSizes = validTasks.map { task =>
-          task.taskMetrics.get.shuffleWriteMetrics.bytesWritten.toDouble
-        }
+      val deserializationQuantiles = titleCell("Task Deserialization Time",
+        ToolTips.TASK_DESERIALIZATION_TIME) ++ timeQuantiles(metrics.executorDeserializeTime)
 
-        val shuffleWriteRecords = validTasks.map { task =>
-          task.taskMetrics.get.shuffleWriteMetrics.recordsWritten.toDouble
-        }
+      val serviceQuantiles = simpleTitleCell("Duration") ++ timeQuantiles(metrics.executorRunTime)
 
-        val shuffleWriteQuantiles = <td>Shuffle Write Size / Records</td> +:
-          getFormattedSizeQuantilesWithRecords(shuffleWriteSizes, shuffleWriteRecords)
+      val gcQuantiles = titleCell("GC Time", ToolTips.GC_TIME) ++ timeQuantiles(metrics.jvmGcTime)
 
-        val memoryBytesSpilledSizes = validTasks.map(_.taskMetrics.get.memoryBytesSpilled.toDouble)
-        val memoryBytesSpilledQuantiles = <td>Shuffle spill (memory)</td> +:
-          getFormattedSizeQuantiles(memoryBytesSpilledSizes)
+      val serializationQuantiles = titleCell("Result Serialization Time",
+        ToolTips.RESULT_SERIALIZATION_TIME) ++ timeQuantiles(metrics.resultSerializationTime)
 
-        val diskBytesSpilledSizes = validTasks.map(_.taskMetrics.get.diskBytesSpilled.toDouble)
-        val diskBytesSpilledQuantiles = <td>Shuffle spill (disk)</td> +:
-          getFormattedSizeQuantiles(diskBytesSpilledSizes)
+      val gettingResultQuantiles = titleCell("Getting Result Time", ToolTips.GETTING_RESULT_TIME) ++
+        timeQuantiles(metrics.gettingResultTime)
 
-        val listings: Seq[Seq[Node]] = Seq(
-          <tr>{serviceQuantiles}</tr>,
-          <tr class={TaskDetailsClassNames.SCHEDULER_DELAY}>{schedulerDelayQuantiles}</tr>,
-          <tr class={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}>
-            {deserializationQuantiles}
-          </tr>
-          <tr>{gcQuantiles}</tr>,
-          <tr class={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}>
-            {serializationQuantiles}
-          </tr>,
-          <tr class={TaskDetailsClassNames.GETTING_RESULT_TIME}>{gettingResultQuantiles}</tr>,
-          <tr class={TaskDetailsClassNames.PEAK_EXECUTION_MEMORY}>
-            {peakExecutionMemoryQuantiles}
-          </tr>,
-          if (hasInput(stageData)) <tr>{inputQuantiles}</tr> else Nil,
-          if (hasOutput(stageData)) <tr>{outputQuantiles}</tr> else Nil,
-          if (hasShuffleRead(stageData)) {
-            <tr class={TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME}>
-              {shuffleReadBlockedQuantiles}
-            </tr>
-            <tr>{shuffleReadTotalQuantiles}</tr>
-            <tr class={TaskDetailsClassNames.SHUFFLE_READ_REMOTE_SIZE}>
-              {shuffleReadRemoteQuantiles}
-            </tr>
-          } else {
-            Nil
-          },
-          if (hasShuffleWrite(stageData)) <tr>{shuffleWriteQuantiles}</tr> else Nil,
-          if (hasBytesSpilled(stageData)) <tr>{memoryBytesSpilledQuantiles}</tr> else Nil,
-          if (hasBytesSpilled(stageData)) <tr>{diskBytesSpilledQuantiles}</tr> else Nil)
-
-        val quantileHeaders = Seq("Metric", "Min", "25th percentile",
-          "Median", "75th percentile", "Max")
-        // The summary table does not use CSS to stripe rows, which doesn't work with hidden
-        // rows (instead, JavaScript in table.js is used to stripe the non-hidden rows).
-        Some(UIUtils.listingTable(
-          quantileHeaders,
-          identity[Seq[Node]],
-          listings,
-          fixedWidth = true,
-          id = Some("task-summary-table"),
-          stripeRowsWithCss = false))
+      val peakExecutionMemoryQuantiles = titleCell("Peak Execution Memory",
+        ToolTips.PEAK_EXECUTION_MEMORY) ++ sizeQuantiles(metrics.peakExecutionMemory)
+
+      // The scheduler delay includes the network delay to send the task to the worker
+      // machine and to send back the result (but not the time to fetch the task result,
+      // if it needed to be fetched from the block manager on the worker).
+      val schedulerDelayQuantiles = titleCell("Scheduler Delay", ToolTips.SCHEDULER_DELAY) ++
+        timeQuantiles(metrics.schedulerDelay)
+
+      def inputQuantiles: Seq[Node] = {
+        simpleTitleCell("Input Size / Records") ++
+          sizeQuantilesWithRecords(metrics.inputMetrics.bytesRead, metrics.inputMetrics.recordsRead)
+      }
+
+      def outputQuantiles: Seq[Node] = {
+        simpleTitleCell("Output Size / Records") ++
+          sizeQuantilesWithRecords(metrics.outputMetrics.bytesWritten,
+            metrics.outputMetrics.recordsWritten)
       }
 
+      def shuffleReadBlockedQuantiles: Seq[Node] = {
+        titleCell("Shuffle Read Blocked Time", ToolTips.SHUFFLE_READ_BLOCKED_TIME) ++
+          timeQuantiles(metrics.shuffleReadMetrics.fetchWaitTime)
+      }
+
+      def shuffleReadTotalQuantiles: Seq[Node] = {
+        titleCell("Shuffle Read Size / Records", ToolTips.SHUFFLE_READ) ++
+          sizeQuantilesWithRecords(metrics.shuffleReadMetrics.readBytes,
+            metrics.shuffleReadMetrics.readRecords)
+      }
+
+      def shuffleReadRemoteQuantiles: Seq[Node] = {
+        titleCell("Shuffle Remote Reads", ToolTips.SHUFFLE_READ_REMOTE_SIZE) ++
+          sizeQuantiles(metrics.shuffleReadMetrics.remoteBytesRead)
+      }
+
+      def shuffleWriteQuantiles: Seq[Node] = {
+        simpleTitleCell("Shuffle Write Size / Records") ++
+          sizeQuantilesWithRecords(metrics.shuffleWriteMetrics.writeBytes,
+            metrics.shuffleWriteMetrics.writeRecords)
+      }
+
+      def memoryBytesSpilledQuantiles: Seq[Node] = {
+        simpleTitleCell("Shuffle spill (memory)") ++ sizeQuantiles(metrics.memoryBytesSpilled)
+      }
+
+      def diskBytesSpilledQuantiles: Seq[Node] = {
+        simpleTitleCell("Shuffle spill (disk)") ++ sizeQuantiles(metrics.diskBytesSpilled)
+      }
+
+      val listings: Seq[Seq[Node]] = Seq(
+        <tr>{serviceQuantiles}</tr>,
+        <tr class={TaskDetailsClassNames.SCHEDULER_DELAY}>{schedulerDelayQuantiles}</tr>,
+        <tr class={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}>
+          {deserializationQuantiles}
+        </tr>
+        <tr>{gcQuantiles}</tr>,
+        <tr class={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}>
+          {serializationQuantiles}
+        </tr>,
+        <tr class={TaskDetailsClassNames.GETTING_RESULT_TIME}>{gettingResultQuantiles}</tr>,
+        <tr class={TaskDetailsClassNames.PEAK_EXECUTION_MEMORY}>
+          {peakExecutionMemoryQuantiles}
+        </tr>,
+        if (hasInput(stageData)) <tr>{inputQuantiles}</tr> else Nil,
+        if (hasOutput(stageData)) <tr>{outputQuantiles}</tr> else Nil,
+        if (hasShuffleRead(stageData)) {
+          <tr class={TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME}>
+            {shuffleReadBlockedQuantiles}
+          </tr>
+          <tr>{shuffleReadTotalQuantiles}</tr>
+          <tr class={TaskDetailsClassNames.SHUFFLE_READ_REMOTE_SIZE}>
+            {shuffleReadRemoteQuantiles}
+          </tr>
+        } else {
+          Nil
+        },
+        if (hasShuffleWrite(stageData)) <tr>{shuffleWriteQuantiles}</tr> else Nil,
+        if (hasBytesSpilled(stageData)) <tr>{memoryBytesSpilledQuantiles}</tr> else Nil,
+        if (hasBytesSpilled(stageData)) <tr>{diskBytesSpilledQuantiles}</tr> else Nil)
+
+      val quantileHeaders = Seq("Metric", "Min", "25th percentile", "Median", "75th percentile",
+        "Max")
+      // The summary table does not use CSS to stripe rows, which doesn't work with hidden
+      // rows (instead, JavaScript in table.js is used to stripe the non-hidden rows).
+      UIUtils.listingTable(
+        quantileHeaders,
+        identity[Seq[Node]],
+        listings,
+        fixedWidth = true,
+        id = Some("task-summary-table"),
+        stripeRowsWithCss = false)
+    }
+
     val executorTable = new ExecutorTable(stageData, parent.store)
 
     val maybeAccumulableTable: Seq[Node] =
-      if (hasAccumulators) { <h4>Accumulators</h4> ++ accumulableTable } else Seq()
+      if (hasAccumulators(stageData)) { <h4>Accumulators</h4> ++ accumulableTable } else Seq()
 
     val aggMetrics =
       <span class="collapse-aggregated-metrics collapse-table"
@@ -551,7 +480,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
       showAdditionalMetrics ++
       makeTimeline(
         // Only show the tasks in the table
-        tasks.filter { t => taskIdsInPage.contains(t.taskId) },
+        Option(taskTable).map(_.dataSource.tasks).getOrElse(Nil),
         currentTime) ++
       <h4>Summary Metrics for <a href="#tasks-section">{numCompleted} Completed Tasks</a></h4> ++
       <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
@@ -594,10 +523,9 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
         val serializationTimeProportion = toProportion(serializationTime)
         val deserializationTime = metricsOpt.map(_.executorDeserializeTime).getOrElse(0L)
         val deserializationTimeProportion = toProportion(deserializationTime)
-        val gettingResultTime = getGettingResultTime(taskInfo, currentTime)
+        val gettingResultTime = AppStatusUtils.gettingResultTime(taskInfo)
         val gettingResultTimeProportion = toProportion(gettingResultTime)
-        val schedulerDelay =
-          metricsOpt.map(getSchedulerDelay(taskInfo, _, currentTime)).getOrElse(0L)
+        val schedulerDelay = AppStatusUtils.schedulerDelay(taskInfo)
         val schedulerDelayProportion = toProportion(schedulerDelay)
 
         val executorOverhead = serializationTime + deserializationTime
@@ -709,7 +637,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
       {
         if (MAX_TIMELINE_TASKS < tasks.size) {
           <strong>
-            This stage has more than the maximum number of tasks that can be shown in the
+            This page has more than the maximum number of tasks that can be shown in the
             visualization! Only the most recent {MAX_TIMELINE_TASKS} tasks
             (of {tasks.size} total) are shown.
           </strong>
@@ -734,402 +662,49 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
 
 }
 
-private[ui] object StagePage {
-  private[ui] def getGettingResultTime(info: TaskData, currentTime: Long): Long = {
-    info.resultFetchStart match {
-      case Some(start) =>
-        info.duration match {
-          case Some(duration) =>
-            info.launchTime.getTime() + duration - start.getTime()
-
-          case _ =>
-            currentTime - start.getTime()
-        }
-
-      case _ =>
-        0L
-    }
-  }
-
-  private[ui] def getSchedulerDelay(
-      info: TaskData,
-      metrics: TaskMetrics,
-      currentTime: Long): Long = {
-    info.duration match {
-      case Some(duration) =>
-        val executorOverhead = metrics.executorDeserializeTime + metrics.resultSerializationTime
-        math.max(
-          0,
-          duration - metrics.executorRunTime - executorOverhead -
-            getGettingResultTime(info, currentTime))
-
-      case _ =>
-        // The task is still running and the metrics like executorRunTime are not available.
-        0L
-    }
-  }
-
-}
-
-private[ui] case class TaskTableRowInputData(inputSortable: Long, inputReadable: String)
-
-private[ui] case class TaskTableRowOutputData(outputSortable: Long, outputReadable: String)
-
-private[ui] case class TaskTableRowShuffleReadData(
-    shuffleReadBlockedTimeSortable: Long,
-    shuffleReadBlockedTimeReadable: String,
-    shuffleReadSortable: Long,
-    shuffleReadReadable: String,
-    shuffleReadRemoteSortable: Long,
-    shuffleReadRemoteReadable: String)
-
-private[ui] case class TaskTableRowShuffleWriteData(
-    writeTimeSortable: Long,
-    writeTimeReadable: String,
-    shuffleWriteSortable: Long,
-    shuffleWriteReadable: String)
-
-private[ui] case class TaskTableRowBytesSpilledData(
-    memoryBytesSpilledSortable: Long,
-    memoryBytesSpilledReadable: String,
-    diskBytesSpilledSortable: Long,
-    diskBytesSpilledReadable: String)
-
-/**
- * Contains all data that needs for sorting and generating HTML. Using this one rather than
- * TaskData to avoid creating duplicate contents during sorting the data.
- */
-private[ui] class TaskTableRowData(
-    val index: Int,
-    val taskId: Long,
-    val attempt: Int,
-    val speculative: Boolean,
-    val status: String,
-    val taskLocality: String,
-    val executorId: String,
-    val host: String,
-    val launchTime: Long,
-    val duration: Long,
-    val formatDuration: String,
-    val schedulerDelay: Long,
-    val taskDeserializationTime: Long,
-    val gcTime: Long,
-    val serializationTime: Long,
-    val gettingResultTime: Long,
-    val peakExecutionMemoryUsed: Long,
-    val accumulators: Option[String], // HTML
-    val input: Option[TaskTableRowInputData],
-    val output: Option[TaskTableRowOutputData],
-    val shuffleRead: Option[TaskTableRowShuffleReadData],
-    val shuffleWrite: Option[TaskTableRowShuffleWriteData],
-    val bytesSpilled: Option[TaskTableRowBytesSpilledData],
-    val error: String,
-    val logs: Map[String, String])
-
 private[ui] class TaskDataSource(
-    tasks: Seq[TaskData],
-    hasAccumulators: Boolean,
-    hasInput: Boolean,
-    hasOutput: Boolean,
-    hasShuffleRead: Boolean,
-    hasShuffleWrite: Boolean,
-    hasBytesSpilled: Boolean,
+    stage: StageData,
     currentTime: Long,
     pageSize: Int,
     sortColumn: String,
     desc: Boolean,
-    store: AppStatusStore) extends PagedDataSource[TaskTableRowData](pageSize) {
-  import StagePage._
+    store: AppStatusStore) extends PagedDataSource[TaskData](pageSize) {
+  import ApiHelper._
 
   // Keep an internal cache of executor log maps so that long task lists render faster.
   private val executorIdToLogs = new HashMap[String, Map[String, String]]()
 
-  // Convert TaskData to TaskTableRowData which contains the final contents to show in the table
-  // so that we can avoid creating duplicate contents during sorting the data
-  private val data = tasks.map(taskRow).sorted(ordering(sortColumn, desc))
-
-  private var _slicedTaskIds: Set[Long] = _
+  private var _tasksToShow: Seq[TaskData] = null
 
-  override def dataSize: Int = data.size
+  override def dataSize: Int = stage.numCompleteTasks + stage.numFailedTasks + stage.numKilledTasks
 
-  override def sliceData(from: Int, to: Int): Seq[TaskTableRowData] = {
-    val r = data.slice(from, to)
-    _slicedTaskIds = r.map(_.taskId).toSet
-    r
-  }
-
-  def slicedTaskIds: Set[Long] = _slicedTaskIds
-
-  private def taskRow(info: TaskData): TaskTableRowData = {
-    val metrics = info.taskMetrics
-    val duration = info.duration.getOrElse(1L)
-    val formatDuration = info.duration.map(d => UIUtils.formatDuration(d)).getOrElse("")
-    val schedulerDelay = metrics.map(getSchedulerDelay(info, _, currentTime)).getOrElse(0L)
-    val gcTime = metrics.map(_.jvmGcTime).getOrElse(0L)
-    val taskDeserializationTime = metrics.map(_.executorDeserializeTime).getOrElse(0L)
-    val serializationTime = metrics.map(_.resultSerializationTime).getOrElse(0L)
-    val gettingResultTime = getGettingResultTime(info, currentTime)
-
-    val externalAccumulableReadable = info.accumulatorUpdates.map { acc =>
-      StringEscapeUtils.escapeHtml4(s"${acc.name}: ${acc.update}")
+  override def sliceData(from: Int, to: Int): Seq[TaskData] = {
+    if (_tasksToShow == null) {
+      _tasksToShow = store.taskList(stage.stageId, stage.attemptId, from, to - from,
+        indexName(sortColumn), !desc)
     }
-    val peakExecutionMemoryUsed = metrics.map(_.peakExecutionMemory).getOrElse(0L)
-
-    val maybeInput = metrics.map(_.inputMetrics)
-    val inputSortable = maybeInput.map(_.bytesRead).getOrElse(0L)
-    val inputReadable = maybeInput
-      .map(m => s"${Utils.bytesToString(m.bytesRead)}")
-      .getOrElse("")
-    val inputRecords = maybeInput.map(_.recordsRead.toString).getOrElse("")
-
-    val maybeOutput = metrics.map(_.outputMetrics)
-    val outputSortable = maybeOutput.map(_.bytesWritten).getOrElse(0L)
-    val outputReadable = maybeOutput
-      .map(m => s"${Utils.bytesToString(m.bytesWritten)}")
-      .getOrElse("")
-    val outputRecords = maybeOutput.map(_.recordsWritten.toString).getOrElse("")
-
-    val maybeShuffleRead = metrics.map(_.shuffleReadMetrics)
-    val shuffleReadBlockedTimeSortable = maybeShuffleRead.map(_.fetchWaitTime).getOrElse(0L)
-    val shuffleReadBlockedTimeReadable =
-      maybeShuffleRead.map(ms => UIUtils.formatDuration(ms.fetchWaitTime)).getOrElse("")
-
-    val totalShuffleBytes = maybeShuffleRead.map(ApiHelper.totalBytesRead)
-    val shuffleReadSortable = totalShuffleBytes.getOrElse(0L)
-    val shuffleReadReadable = totalShuffleBytes.map(Utils.bytesToString).getOrElse("")
-    val shuffleReadRecords = maybeShuffleRead.map(_.recordsRead.toString).getOrElse("")
-
-    val remoteShuffleBytes = maybeShuffleRead.map(_.remoteBytesRead)
-    val shuffleReadRemoteSortable = remoteShuffleBytes.getOrElse(0L)
-    val shuffleReadRemoteReadable = remoteShuffleBytes.map(Utils.bytesToString).getOrElse("")
-
-    val maybeShuffleWrite = metrics.map(_.shuffleWriteMetrics)
-    val shuffleWriteSortable = maybeShuffleWrite.map(_.bytesWritten).getOrElse(0L)
-    val shuffleWriteReadable = maybeShuffleWrite
-      .map(m => s"${Utils.bytesToString(m.bytesWritten)}").getOrElse("")
-    val shuffleWriteRecords = maybeShuffleWrite
-      .map(_.recordsWritten.toString).getOrElse("")
-
-    val maybeWriteTime = metrics.map(_.shuffleWriteMetrics.writeTime)
-    val writeTimeSortable = maybeWriteTime.getOrElse(0L)
-    val writeTimeReadable = maybeWriteTime.map(t => t / (1000 * 1000)).map { ms =>
-      if (ms == 0) "" else UIUtils.formatDuration(ms)
-    }.getOrElse("")
-
-    val maybeMemoryBytesSpilled = metrics.map(_.memoryBytesSpilled)
-    val memoryBytesSpilledSortable = maybeMemoryBytesSpilled.getOrElse(0L)
-    val memoryBytesSpilledReadable =
-      maybeMemoryBytesSpilled.map(Utils.bytesToString).getOrElse("")
-
-    val maybeDiskBytesSpilled = metrics.map(_.diskBytesSpilled)
-    val diskBytesSpilledSortable = maybeDiskBytesSpilled.getOrElse(0L)
-    val diskBytesSpilledReadable = maybeDiskBytesSpilled.map(Utils.bytesToString).getOrElse("")
-
-    val input =
-      if (hasInput) {
-        Some(TaskTableRowInputData(inputSortable, s"$inputReadable / $inputRecords"))
-      } else {
-        None
-      }
-
-    val output =
-      if (hasOutput) {
-        Some(TaskTableRowOutputData(outputSortable, s"$outputReadable / $outputRecords"))
-      } else {
-        None
-      }
-
-    val shuffleRead =
-      if (hasShuffleRead) {
-        Some(TaskTableRowShuffleReadData(
-          shuffleReadBlockedTimeSortable,
-          shuffleReadBlockedTimeReadable,
-          shuffleReadSortable,
-          s"$shuffleReadReadable / $shuffleReadRecords",
-          shuffleReadRemoteSortable,
-          shuffleReadRemoteReadable
-        ))
-      } else {
-        None
-      }
-
-    val shuffleWrite =
-      if (hasShuffleWrite) {
-        Some(TaskTableRowShuffleWriteData(
-          writeTimeSortable,
-          writeTimeReadable,
-          shuffleWriteSortable,
-          s"$shuffleWriteReadable / $shuffleWriteRecords"
-        ))
-      } else {
-        None
-      }
-
-    val bytesSpilled =
-      if (hasBytesSpilled) {
-        Some(TaskTableRowBytesSpilledData(
-          memoryBytesSpilledSortable,
-          memoryBytesSpilledReadable,
-          diskBytesSpilledSortable,
-          diskBytesSpilledReadable
-        ))
-      } else {
-        None
-      }
-
-    new TaskTableRowData(
-      info.index,
-      info.taskId,
-      info.attempt,
-      info.speculative,
-      info.status,
-      info.taskLocality.toString,
-      info.executorId,
-      info.host,
-      info.launchTime.getTime(),
-      duration,
-      formatDuration,
-      schedulerDelay,
-      taskDeserializationTime,
-      gcTime,
-      serializationTime,
-      gettingResultTime,
-      peakExecutionMemoryUsed,
-      if (hasAccumulators) Some(externalAccumulableReadable.mkString("<br/>")) else None,
-      input,
-      output,
-      shuffleRead,
-      shuffleWrite,
-      bytesSpilled,
-      info.errorMessage.getOrElse(""),
-      executorLogs(info.executorId))
+    _tasksToShow
   }
 
-  private def executorLogs(id: String): Map[String, String] = {
+  def tasks: Seq[TaskData] = _tasksToShow
+
+  def executorLogs(id: String): Map[String, String] = {
     executorIdToLogs.getOrElseUpdate(id,
       store.asOption(store.executorSummary(id)).map(_.executorLogs).getOrElse(Map.empty))
   }
 
-  /**
-   * Return Ordering according to sortColumn and desc
-   */
-  private def ordering(sortColumn: String, desc: Boolean): Ordering[TaskTableRowData] = {
-    val ordering: Ordering[TaskTableRowData] = sortColumn match {
-      case "Index" => Ordering.by(_.index)
-      case "ID" => Ordering.by(_.taskId)
-      case "Attempt" => Ordering.by(_.attempt)
-      case "Status" => Ordering.by(_.status)
-      case "Locality Level" => Ordering.by(_.taskLocality)
-      case "Executor ID" => Ordering.by(_.executorId)
-      case "Host" => Ordering.by(_.host)
-      case "Launch Time" => Ordering.by(_.launchTime)
-      case "Duration" => Ordering.by(_.duration)
-      case "Scheduler Delay" => Ordering.by(_.schedulerDelay)
-      case "Task Deserialization Time" => Ordering.by(_.taskDeserializationTime)
-      case "GC Time" => Ordering.by(_.gcTime)
-      case "Result Serialization Time" => Ordering.by(_.serializationTime)
-      case "Getting Result Time" => Ordering.by(_.gettingResultTime)
-      case "Peak Execution Memory" => Ordering.by(_.peakExecutionMemoryUsed)
-      case "Accumulators" =>
-        if (hasAccumulators) {
-          Ordering.by(_.accumulators.get)
-        } else {
-          throw new IllegalArgumentException(
-            "Cannot sort by Accumulators because of no accumulators")
-        }
-      case "Input Size / Records" =>
-        if (hasInput) {
-          Ordering.by(_.input.get.inputSortable)
-        } else {
-          throw new IllegalArgumentException(
-            "Cannot sort by Input Size / Records because of no inputs")
-        }
-      case "Output Size / Records" =>
-        if (hasOutput) {
-          Ordering.by(_.output.get.outputSortable)
-        } else {
-          throw new IllegalArgumentException(
-            "Cannot sort by Output Size / Records because of no outputs")
-        }
-      // ShuffleRead
-      case "Shuffle Read Blocked Time" =>
-        if (hasShuffleRead) {
-          Ordering.by(_.shuffleRead.get.shuffleReadBlockedTimeSortable)
-        } else {
-          throw new IllegalArgumentException(
-            "Cannot sort by Shuffle Read Blocked Time because of no shuffle reads")
-        }
-      case "Shuffle Read Size / Records" =>
-        if (hasShuffleRead) {
-          Ordering.by(_.shuffleRead.get.shuffleReadSortable)
-        } else {
-          throw new IllegalArgumentException(
-            "Cannot sort by Shuffle Read Size / Records because of no shuffle reads")
-        }
-      case "Shuffle Remote Reads" =>
-        if (hasShuffleRead) {
-          Ordering.by(_.shuffleRead.get.shuffleReadRemoteSortable)
-        } else {
-          throw new IllegalArgumentException(
-            "Cannot sort by Shuffle Remote Reads because of no shuffle reads")
-        }
-      // ShuffleWrite
-      case "Write Time" =>
-        if (hasShuffleWrite) {
-          Ordering.by(_.shuffleWrite.get.writeTimeSortable)
-        } else {
-          throw new IllegalArgumentException(
-            "Cannot sort by Write Time because of no shuffle writes")
-        }
-      case "Shuffle Write Size / Records" =>
-        if (hasShuffleWrite) {
-          Ordering.by(_.shuffleWrite.get.shuffleWriteSortable)
-        } else {
-          throw new IllegalArgumentException(
-            "Cannot sort by Shuffle Write Size / Records because of no shuffle writes")
-        }
-      // BytesSpilled
-      case "Shuffle Spill (Memory)" =>
-        if (hasBytesSpilled) {
-          Ordering.by(_.bytesSpilled.get.memoryBytesSpilledSortable)
-        } else {
-          throw new IllegalArgumentException(
-            "Cannot sort by Shuffle Spill (Memory) because of no spills")
-        }
-      case "Shuffle Spill (Disk)" =>
-        if (hasBytesSpilled) {
-          Ordering.by(_.bytesSpilled.get.diskBytesSpilledSortable)
-        } else {
-          throw new IllegalArgumentException(
-            "Cannot sort by Shuffle Spill (Disk) because of no spills")
-        }
-      case "Errors" => Ordering.by(_.error)
-      case unknownColumn => throw new IllegalArgumentException(s"Unknown column: $unknownColumn")
-    }
-    if (desc) {
-      ordering.reverse
-    } else {
-      ordering
-    }
-  }
-
 }
 
 private[ui] class TaskPagedTable(
-    conf: SparkConf,
+    stage: StageData,
     basePath: String,
-    data: Seq[TaskData],
-    hasAccumulators: Boolean,
-    hasInput: Boolean,
-    hasOutput: Boolean,
-    hasShuffleRead: Boolean,
-    hasShuffleWrite: Boolean,
-    hasBytesSpilled: Boolean,
     currentTime: Long,
     pageSize: Int,
     sortColumn: String,
     desc: Boolean,
-    store: AppStatusStore) extends PagedTable[TaskTableRowData] {
+    store: AppStatusStore) extends PagedTable[TaskData] {
+
+  import ApiHelper._
 
   override def tableId: String = "task-table"
 
@@ -1143,13 +718,7 @@ private[ui] class TaskPagedTable(
   override def pageNumberFormField: String = "task.page"
 
   override val dataSource: TaskDataSource = new TaskDataSource(
-    data,
-    hasAccumulators,
-    hasInput,
-    hasOutput,
-    hasShuffleRead,
-    hasShuffleWrite,
-    hasBytesSpilled,
+    stage,
     currentTime,
     pageSize,
     sortColumn,
@@ -1181,22 +750,22 @@ private[ui] class TaskPagedTable(
         ("Result Serialization Time", TaskDetailsClassNames.RESULT_SERIALIZATION_TIME),
         ("Getting Result Time", TaskDetailsClassNames.GETTING_RESULT_TIME),
         ("Peak Execution Memory", TaskDetailsClassNames.PEAK_EXECUTION_MEMORY)) ++
-        {if (hasAccumulators) Seq(("Accumulators", "")) else Nil} ++
-        {if (hasInput) Seq(("Input Size / Records", "")) else Nil} ++
-        {if (hasOutput) Seq(("Output Size / Records", "")) else Nil} ++
-        {if (hasShuffleRead) {
+        {if (hasAccumulators(stage)) Seq(("Accumulators", "")) else Nil} ++
+        {if (hasInput(stage)) Seq(("Input Size / Records", "")) else Nil} ++
+        {if (hasOutput(stage)) Seq(("Output Size / Records", "")) else Nil} ++
+        {if (hasShuffleRead(stage)) {
           Seq(("Shuffle Read Blocked Time", TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME),
             ("Shuffle Read Size / Records", ""),
             ("Shuffle Remote Reads", TaskDetailsClassNames.SHUFFLE_READ_REMOTE_SIZE))
         } else {
           Nil
         }} ++
-        {if (hasShuffleWrite) {
+        {if (hasShuffleWrite(stage)) {
           Seq(("Write Time", ""), ("Shuffle Write Size / Records", ""))
         } else {
           Nil
         }} ++
-        {if (hasBytesSpilled) {
+        {if (hasBytesSpilled(stage)) {
           Seq(("Shuffle Spill (Memory)", ""), ("Shuffle Spill (Disk)", ""))
         } else {
           Nil
@@ -1238,7 +807,17 @@ private[ui] class TaskPagedTable(
     <thead>{headerRow}</thead>
   }
 
-  def row(task: TaskTableRowData): Seq[Node] = {
+  def row(task: TaskData): Seq[Node] = {
+    def formatDuration(value: Option[Long], hideZero: Boolean = false): String = {
+      value.map { v =>
+        if (v > 0 || !hideZero) UIUtils.formatDuration(v) else ""
+      }.getOrElse("")
+    }
+
+    def formatBytes(value: Option[Long]): String = {
+      Utils.bytesToString(value.getOrElse(0L))
+    }
+
     <tr>
       <td>{task.index}</td>
       <td>{task.taskId}</td>
@@ -1250,62 +829,98 @@ private[ui] class TaskPagedTable(
         <div style="float: left">{task.host}</div>
         <div style="float: right">
         {
-          task.logs.map {
+          dataSource.executorLogs(task.executorId).map {
             case (logName, logUrl) => <div><a href={logUrl}>{logName}</a></div>
           }
         }
         </div>
       </td>
-      <td>{UIUtils.formatDate(new Date(task.launchTime))}</td>
-      <td>{task.formatDuration}</td>
+      <td>{UIUtils.formatDate(task.launchTime)}</td>
+      <td>{formatDuration(task.duration)}</td>
       <td class={TaskDetailsClassNames.SCHEDULER_DELAY}>
-        {UIUtils.formatDuration(task.schedulerDelay)}
+        {UIUtils.formatDuration(AppStatusUtils.schedulerDelay(task))}
       </td>
       <td class={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}>
-        {UIUtils.formatDuration(task.taskDeserializationTime)}
+        {formatDuration(task.taskMetrics.map(_.executorDeserializeTime))}
       </td>
       <td>
-        {if (task.gcTime > 0) UIUtils.formatDuration(task.gcTime) else ""}
+        {formatDuration(task.taskMetrics.map(_.jvmGcTime), hideZero = true)}
       </td>
       <td class={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}>
-        {UIUtils.formatDuration(task.serializationTime)}
+        {formatDuration(task.taskMetrics.map(_.resultSerializationTime))}
       </td>
       <td class={TaskDetailsClassNames.GETTING_RESULT_TIME}>
-        {UIUtils.formatDuration(task.gettingResultTime)}
+        {UIUtils.formatDuration(AppStatusUtils.gettingResultTime(task))}
       </td>
       <td class={TaskDetailsClassNames.PEAK_EXECUTION_MEMORY}>
-        {Utils.bytesToString(task.peakExecutionMemoryUsed)}
+        {formatBytes(task.taskMetrics.map(_.peakExecutionMemory))}
       </td>
-      {if (task.accumulators.nonEmpty) {
-        <td>{Unparsed(task.accumulators.get)}</td>
+      {if (hasAccumulators(stage)) {
+        accumulatorsInfo(task)
       }}
-      {if (task.input.nonEmpty) {
-        <td>{task.input.get.inputReadable}</td>
+      {if (hasInput(stage)) {
+        metricInfo(task) { m =>
+          val bytesRead = Utils.bytesToString(m.inputMetrics.bytesRead)
+          val records = m.inputMetrics.recordsRead
+          <td>{bytesRead} / {records}</td>
+        }
       }}
-      {if (task.output.nonEmpty) {
-        <td>{task.output.get.outputReadable}</td>
+      {if (hasOutput(stage)) {
+        metricInfo(task) { m =>
+          val bytesWritten = Utils.bytesToString(m.outputMetrics.bytesWritten)
+          val records = m.outputMetrics.recordsWritten
+          <td>{bytesWritten} / {records}</td>
+        }
       }}
-      {if (task.shuffleRead.nonEmpty) {
+      {if (hasShuffleRead(stage)) {
         <td class={TaskDetailsClassNames.SHUFFLE_READ_BLOCKED_TIME}>
-          {task.shuffleRead.get.shuffleReadBlockedTimeReadable}
+          {formatDuration(task.taskMetrics.map(_.shuffleReadMetrics.fetchWaitTime))}
         </td>
-        <td>{task.shuffleRead.get.shuffleReadReadable}</td>
+        <td>{
+          metricInfo(task) { m =>
+            val bytesRead = Utils.bytesToString(totalBytesRead(m.shuffleReadMetrics))
+            val records = m.shuffleReadMetrics.recordsRead
+            Unparsed(s"$bytesRead / $records")
+          }
+        }</td>
         <td class={TaskDetailsClassNames.SHUFFLE_READ_REMOTE_SIZE}>
-          {task.shuffleRead.get.shuffleReadRemoteReadable}
+          {formatBytes(task.taskMetrics.map(_.shuffleReadMetrics.remoteBytesRead))}
         </td>
       }}
-      {if (task.shuffleWrite.nonEmpty) {
-        <td>{task.shuffleWrite.get.writeTimeReadable}</td>
-        <td>{task.shuffleWrite.get.shuffleWriteReadable}</td>
+      {if (hasShuffleWrite(stage)) {
+        <td>{
+          formatDuration(
+            task.taskMetrics.map { m =>
+              TimeUnit.NANOSECONDS.toMillis(m.shuffleWriteMetrics.writeTime)
+            },
+            hideZero = true)
+        }</td>
+        <td>{
+          metricInfo(task) { m =>
+            val bytesWritten = Utils.bytesToString(m.shuffleWriteMetrics.bytesWritten)
+            val records = m.shuffleWriteMetrics.recordsWritten
+            Unparsed(s"$bytesWritten / $records")
+          }
+        }</td>
       }}
-      {if (task.bytesSpilled.nonEmpty) {
-        <td>{task.bytesSpilled.get.memoryBytesSpilledReadable}</td>
-        <td>{task.bytesSpilled.get.diskBytesSpilledReadable}</td>
+      {if (hasBytesSpilled(stage)) {
+        <td>{formatBytes(task.taskMetrics.map(_.memoryBytesSpilled))}</td>
+        <td>{formatBytes(task.taskMetrics.map(_.diskBytesSpilled))}</td>
       }}
-      {errorMessageCell(task.error)}
+      {errorMessageCell(task.errorMessage.getOrElse(""))}
     </tr>
   }
 
+  private def accumulatorsInfo(task: TaskData): Seq[Node] = {
+    task.accumulatorUpdates.map { acc =>
+      Unparsed(StringEscapeUtils.escapeHtml4(s"${acc.name}: ${acc.update}"))
+    }
+  }
+
+  private def metricInfo(task: TaskData)(fn: TaskMetrics => Seq[Node]): Seq[Node] = {
+    task.taskMetrics.map(fn).getOrElse(Nil)
+  }
+
   private def errorMessageCell(error: String): Seq[Node] = {
     val isMultiline = error.indexOf('\n') >= 0
     // Display the first line by default
@@ -1334,6 +949,36 @@ private[ui] class TaskPagedTable(
 
 private object ApiHelper {
 
+
+  private val COLUMN_TO_INDEX = Map(
+    "ID" -> null.asInstanceOf[String],
+    "Index" -> TaskIndexNames.TASK_INDEX,
+    "Attempt" -> TaskIndexNames.ATTEMPT,
+    "Status" -> TaskIndexNames.STATUS,
+    "Locality Level" -> TaskIndexNames.LOCALITY,
+    "Executor ID / Host" -> TaskIndexNames.EXECUTOR,
+    "Launch Time" -> TaskIndexNames.LAUNCH_TIME,
+    "Duration" -> TaskIndexNames.DURATION,
+    "Scheduler Delay" -> TaskIndexNames.SCHEDULER_DELAY,
+    "Task Deserialization Time" -> TaskIndexNames.DESER_TIME,
+    "GC Time" -> TaskIndexNames.GC_TIME,
+    "Result Serialization Time" -> TaskIndexNames.SER_TIME,
+    "Getting Result Time" -> TaskIndexNames.GETTING_RESULT_TIME,
+    "Peak Execution Memory" -> TaskIndexNames.PEAK_MEM,
+    "Accumulators" -> TaskIndexNames.ACCUMULATORS,
+    "Input Size / Records" -> TaskIndexNames.INPUT_SIZE,
+    "Output Size / Records" -> TaskIndexNames.OUTPUT_SIZE,
+    "Shuffle Read Blocked Time" -> TaskIndexNames.SHUFFLE_READ_TIME,
+    "Shuffle Read Size / Records" -> TaskIndexNames.SHUFFLE_TOTAL_READS,
+    "Shuffle Remote Reads" -> TaskIndexNames.SHUFFLE_REMOTE_READS,
+    "Write Time" -> TaskIndexNames.SHUFFLE_WRITE_TIME,
+    "Shuffle Write Size / Records" -> TaskIndexNames.SHUFFLE_WRITE_SIZE,
+    "Shuffle Spill (Memory)" -> TaskIndexNames.MEM_SPILL,
+    "Shuffle Spill (Disk)" -> TaskIndexNames.DISK_SPILL,
+    "Errors" -> TaskIndexNames.ERROR)
+
+  def hasAccumulators(stageData: StageData): Boolean = stageData.accumulatorUpdates.size > 0
+
   def hasInput(stageData: StageData): Boolean = stageData.inputBytes > 0
 
   def hasOutput(stageData: StageData): Boolean = stageData.outputBytes > 0
@@ -1350,4 +995,11 @@ private object ApiHelper {
     metrics.localBytesRead + metrics.remoteBytesRead
   }
 
+  def indexName(sortColumn: String): Option[String] = {
+    COLUMN_TO_INDEX.get(sortColumn) match {
+      case Some(v) => Option(v)
+      case _ => throw new IllegalArgumentException(s"Invalid sort column: $sortColumn")
+    }
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
index be05a963f0e68..10b032084ce4f 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
@@ -37,10 +37,10 @@ private[ui] class StagesTab(val parent: SparkUI, val store: AppStatusStore)
   attachPage(new PoolPage(this))
 
   def isFairScheduler: Boolean = {
-    store.environmentInfo().sparkProperties.toMap
-      .get("spark.scheduler.mode")
-      .map { mode => mode == SchedulingMode.FAIR }
-      .getOrElse(false)
+    store
+      .environmentInfo()
+      .sparkProperties
+      .contains(("spark.scheduler.mode", SchedulingMode.FAIR.toString))
   }
 
   def handleKillRequest(request: HttpServletRequest): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
deleted file mode 100644
index 5acec0d0f54c9..0000000000000
--- a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ui.jobs
-
-import scala.collection.mutable
-import scala.collection.mutable.{HashMap, LinkedHashMap}
-
-import com.google.common.collect.Interners
-
-import org.apache.spark.JobExecutionStatus
-import org.apache.spark.executor._
-import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
-import org.apache.spark.util.AccumulatorContext
-import org.apache.spark.util.collection.OpenHashSet
-
-private[spark] object UIData {
-
-  class ExecutorSummary {
-    var taskTime : Long = 0
-    var failedTasks : Int = 0
-    var succeededTasks : Int = 0
-    var reasonToNumKilled : Map[String, Int] = Map.empty
-    var inputBytes : Long = 0
-    var inputRecords : Long = 0
-    var outputBytes : Long = 0
-    var outputRecords : Long = 0
-    var shuffleRead : Long = 0
-    var shuffleReadRecords : Long = 0
-    var shuffleWrite : Long = 0
-    var shuffleWriteRecords : Long = 0
-    var memoryBytesSpilled : Long = 0
-    var diskBytesSpilled : Long = 0
-    var isBlacklisted : Int = 0
-  }
-
-  class JobUIData(
-    var jobId: Int = -1,
-    var submissionTime: Option[Long] = None,
-    var completionTime: Option[Long] = None,
-    var stageIds: Seq[Int] = Seq.empty,
-    var jobGroup: Option[String] = None,
-    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
-    /* Tasks */
-    // `numTasks` is a potential underestimate of the true number of tasks that this job will run.
-    // This may be an underestimate because the job start event references all of the result
-    // stages' transitive stage dependencies, but some of these stages might be skipped if their
-    // output is available from earlier runs.
-    // See https://github.com/apache/spark/pull/3009 for a more extensive discussion.
-    var numTasks: Int = 0,
-    var numActiveTasks: Int = 0,
-    var numCompletedTasks: Int = 0,
-    var completedIndices: OpenHashSet[(Int, Int)] = new OpenHashSet[(Int, Int)](),
-    var numSkippedTasks: Int = 0,
-    var numFailedTasks: Int = 0,
-    var reasonToNumKilled: Map[String, Int] = Map.empty,
-    /* Stages */
-    var numActiveStages: Int = 0,
-    // This needs to be a set instead of a simple count to prevent double-counting of rerun stages:
-    var completedStageIndices: mutable.HashSet[Int] = new mutable.HashSet[Int](),
-    var numSkippedStages: Int = 0,
-    var numFailedStages: Int = 0
-  )
-
-  class StageUIData {
-    var numActiveTasks: Int = _
-    var numCompleteTasks: Int = _
-    var completedIndices = new OpenHashSet[Int]()
-    var numFailedTasks: Int = _
-    var reasonToNumKilled: Map[String, Int] = Map.empty
-
-    var executorRunTime: Long = _
-    var executorCpuTime: Long = _
-
-    var inputBytes: Long = _
-    var inputRecords: Long = _
-    var outputBytes: Long = _
-    var outputRecords: Long = _
-    var shuffleReadTotalBytes: Long = _
-    var shuffleReadRecords : Long = _
-    var shuffleWriteBytes: Long = _
-    var shuffleWriteRecords: Long = _
-    var memoryBytesSpilled: Long = _
-    var diskBytesSpilled: Long = _
-    var isBlacklisted: Int = _
-    var lastUpdateTime: Option[Long] = None
-
-    var schedulingPool: String = ""
-    var description: Option[String] = None
-
-    var accumulables = new HashMap[Long, AccumulableInfo]
-    var taskData = new LinkedHashMap[Long, TaskUIData]
-    var executorSummary = new HashMap[String, ExecutorSummary]
-
-    def hasInput: Boolean = inputBytes > 0
-    def hasOutput: Boolean = outputBytes > 0
-    def hasShuffleRead: Boolean = shuffleReadTotalBytes > 0
-    def hasShuffleWrite: Boolean = shuffleWriteBytes > 0
-    def hasBytesSpilled: Boolean = memoryBytesSpilled > 0 || diskBytesSpilled > 0
-  }
-
-  /**
-   * These are kept mutable and reused throughout a task's lifetime to avoid excessive reallocation.
-   */
-  class TaskUIData private(private var _taskInfo: TaskInfo) {
-
-    private[this] var _metrics: Option[TaskMetricsUIData] = Some(TaskMetricsUIData.EMPTY)
-
-    var errorMessage: Option[String] = None
-
-    def taskInfo: TaskInfo = _taskInfo
-
-    def metrics: Option[TaskMetricsUIData] = _metrics
-
-    def updateTaskInfo(taskInfo: TaskInfo): Unit = {
-      _taskInfo = TaskUIData.dropInternalAndSQLAccumulables(taskInfo)
-    }
-
-    def updateTaskMetrics(metrics: Option[TaskMetrics]): Unit = {
-      _metrics = metrics.map(TaskMetricsUIData.fromTaskMetrics)
-    }
-
-    def taskDuration(lastUpdateTime: Option[Long] = None): Option[Long] = {
-      if (taskInfo.status == "RUNNING") {
-        Some(_taskInfo.timeRunning(lastUpdateTime.getOrElse(System.currentTimeMillis)))
-      } else {
-        _metrics.map(_.executorRunTime)
-      }
-    }
-  }
-
-  object TaskUIData {
-
-    private val stringInterner = Interners.newWeakInterner[String]()
-
-    /** String interning to reduce the memory usage. */
-    private def weakIntern(s: String): String = {
-      stringInterner.intern(s)
-    }
-
-    def apply(taskInfo: TaskInfo): TaskUIData = {
-      new TaskUIData(dropInternalAndSQLAccumulables(taskInfo))
-    }
-
-    /**
-     * We don't need to store internal or SQL accumulables as their values will be shown in other
-     * places, so drop them to reduce the memory usage.
-     */
-    private[spark] def dropInternalAndSQLAccumulables(taskInfo: TaskInfo): TaskInfo = {
-      val newTaskInfo = new TaskInfo(
-        taskId = taskInfo.taskId,
-        index = taskInfo.index,
-        attemptNumber = taskInfo.attemptNumber,
-        launchTime = taskInfo.launchTime,
-        executorId = weakIntern(taskInfo.executorId),
-        host = weakIntern(taskInfo.host),
-        taskLocality = taskInfo.taskLocality,
-        speculative = taskInfo.speculative
-      )
-      newTaskInfo.gettingResultTime = taskInfo.gettingResultTime
-      newTaskInfo.setAccumulables(taskInfo.accumulables.filter {
-        accum => !accum.internal && accum.metadata != Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER)
-      })
-      newTaskInfo.finishTime = taskInfo.finishTime
-      newTaskInfo.failed = taskInfo.failed
-      newTaskInfo.killed = taskInfo.killed
-      newTaskInfo
-    }
-  }
-
-  case class TaskMetricsUIData(
-      executorDeserializeTime: Long,
-      executorDeserializeCpuTime: Long,
-      executorRunTime: Long,
-      executorCpuTime: Long,
-      resultSize: Long,
-      jvmGCTime: Long,
-      resultSerializationTime: Long,
-      memoryBytesSpilled: Long,
-      diskBytesSpilled: Long,
-      peakExecutionMemory: Long,
-      inputMetrics: InputMetricsUIData,
-      outputMetrics: OutputMetricsUIData,
-      shuffleReadMetrics: ShuffleReadMetricsUIData,
-      shuffleWriteMetrics: ShuffleWriteMetricsUIData)
-
-  object TaskMetricsUIData {
-    def fromTaskMetrics(m: TaskMetrics): TaskMetricsUIData = {
-      TaskMetricsUIData(
-        executorDeserializeTime = m.executorDeserializeTime,
-        executorDeserializeCpuTime = m.executorDeserializeCpuTime,
-        executorRunTime = m.executorRunTime,
-        executorCpuTime = m.executorCpuTime,
-        resultSize = m.resultSize,
-        jvmGCTime = m.jvmGCTime,
-        resultSerializationTime = m.resultSerializationTime,
-        memoryBytesSpilled = m.memoryBytesSpilled,
-        diskBytesSpilled = m.diskBytesSpilled,
-        peakExecutionMemory = m.peakExecutionMemory,
-        inputMetrics = InputMetricsUIData(m.inputMetrics),
-        outputMetrics = OutputMetricsUIData(m.outputMetrics),
-        shuffleReadMetrics = ShuffleReadMetricsUIData(m.shuffleReadMetrics),
-        shuffleWriteMetrics = ShuffleWriteMetricsUIData(m.shuffleWriteMetrics))
-    }
-
-    val EMPTY: TaskMetricsUIData = fromTaskMetrics(TaskMetrics.empty)
-  }
-
-  case class InputMetricsUIData(bytesRead: Long, recordsRead: Long)
-  object InputMetricsUIData {
-    def apply(metrics: InputMetrics): InputMetricsUIData = {
-      if (metrics.bytesRead == 0 && metrics.recordsRead == 0) {
-        EMPTY
-      } else {
-        new InputMetricsUIData(
-          bytesRead = metrics.bytesRead,
-          recordsRead = metrics.recordsRead)
-      }
-    }
-    private val EMPTY = InputMetricsUIData(0, 0)
-  }
-
-  case class OutputMetricsUIData(bytesWritten: Long, recordsWritten: Long)
-  object OutputMetricsUIData {
-    def apply(metrics: OutputMetrics): OutputMetricsUIData = {
-      if (metrics.bytesWritten == 0 && metrics.recordsWritten == 0) {
-        EMPTY
-      } else {
-        new OutputMetricsUIData(
-          bytesWritten = metrics.bytesWritten,
-          recordsWritten = metrics.recordsWritten)
-      }
-    }
-    private val EMPTY = OutputMetricsUIData(0, 0)
-  }
-
-  case class ShuffleReadMetricsUIData(
-      remoteBlocksFetched: Long,
-      localBlocksFetched: Long,
-      remoteBytesRead: Long,
-      remoteBytesReadToDisk: Long,
-      localBytesRead: Long,
-      fetchWaitTime: Long,
-      recordsRead: Long,
-      totalBytesRead: Long,
-      totalBlocksFetched: Long)
-
-  object ShuffleReadMetricsUIData {
-    def apply(metrics: ShuffleReadMetrics): ShuffleReadMetricsUIData = {
-      if (
-          metrics.remoteBlocksFetched == 0 &&
-          metrics.localBlocksFetched == 0 &&
-          metrics.remoteBytesRead == 0 &&
-          metrics.localBytesRead == 0 &&
-          metrics.fetchWaitTime == 0 &&
-          metrics.recordsRead == 0 &&
-          metrics.totalBytesRead == 0 &&
-          metrics.totalBlocksFetched == 0) {
-        EMPTY
-      } else {
-        new ShuffleReadMetricsUIData(
-          remoteBlocksFetched = metrics.remoteBlocksFetched,
-          localBlocksFetched = metrics.localBlocksFetched,
-          remoteBytesRead = metrics.remoteBytesRead,
-          remoteBytesReadToDisk = metrics.remoteBytesReadToDisk,
-          localBytesRead = metrics.localBytesRead,
-          fetchWaitTime = metrics.fetchWaitTime,
-          recordsRead = metrics.recordsRead,
-          totalBytesRead = metrics.totalBytesRead,
-          totalBlocksFetched = metrics.totalBlocksFetched
-        )
-      }
-    }
-    private val EMPTY = ShuffleReadMetricsUIData(0, 0, 0, 0, 0, 0, 0, 0, 0)
-  }
-
-  case class ShuffleWriteMetricsUIData(
-      bytesWritten: Long,
-      recordsWritten: Long,
-      writeTime: Long)
-
-  object ShuffleWriteMetricsUIData {
-    def apply(metrics: ShuffleWriteMetrics): ShuffleWriteMetricsUIData = {
-      if (metrics.bytesWritten == 0 && metrics.recordsWritten == 0 && metrics.writeTime == 0) {
-        EMPTY
-      } else {
-        new ShuffleWriteMetricsUIData(
-          bytesWritten = metrics.bytesWritten,
-          recordsWritten = metrics.recordsWritten,
-          writeTime = metrics.writeTime
-        )
-      }
-    }
-    private val EMPTY = ShuffleWriteMetricsUIData(0, 0, 0)
-  }
-
-}
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index 827a8637b9bd2..948858224d724 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -116,7 +116,7 @@ private[spark] object RDDOperationGraph extends Logging {
     // Use a special prefix here to differentiate this cluster from other operation clusters
     val stageClusterId = STAGE_CLUSTER_PREFIX + stage.stageId
     val stageClusterName = s"Stage ${stage.stageId}" +
-      { if (stage.attemptId == 0) "" else s" (attempt ${stage.attemptId})" }
+      { if (stage.attemptNumber == 0) "" else s" (attempt ${stage.attemptNumber})" }
     val rootCluster = new RDDOperationCluster(stageClusterId, stageClusterName)
 
     var rootNodeCount = 0
diff --git a/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
index a938cb07724c7..a5ee0ff16b5df 100644
--- a/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
@@ -54,7 +54,7 @@ class ByteBufferInputStream(private var buffer: ByteBuffer)
   override def skip(bytes: Long): Long = {
     if (buffer != null) {
       val amountToSkip = math.min(bytes, buffer.remaining).toInt
-      buffer.position(buffer.position + amountToSkip)
+      buffer.position(buffer.position() + amountToSkip)
       if (buffer.remaining() == 0) {
         cleanUp()
       }
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 5e60218c5740b..ff83301d631c4 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -263,7 +263,7 @@ private[spark] object JsonProtocol {
     val completionTime = stageInfo.completionTime.map(JInt(_)).getOrElse(JNothing)
     val failureReason = stageInfo.failureReason.map(JString(_)).getOrElse(JNothing)
     ("Stage ID" -> stageInfo.stageId) ~
-    ("Stage Attempt ID" -> stageInfo.attemptId) ~
+    ("Stage Attempt ID" -> stageInfo.attemptNumber) ~
     ("Stage Name" -> stageInfo.name) ~
     ("Number of Tasks" -> stageInfo.numTasks) ~
     ("RDD Info" -> rddInfo) ~
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 930e09d90c2f5..5853302973140 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -50,6 +50,7 @@ import org.apache.commons.lang3.SystemUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
 import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.log4j.PropertyConfigurator
 import org.eclipse.jetty.util.MultiException
 import org.json4s._
@@ -59,6 +60,7 @@ import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
 
@@ -827,7 +829,18 @@ private[spark] object Utils extends Logging {
   }
 
   private def getOrCreateLocalRootDirsImpl(conf: SparkConf): Array[String] = {
-    getConfiguredLocalDirs(conf).flatMap { root =>
+    val configuredLocalDirs = getConfiguredLocalDirs(conf)
+    val uris = configuredLocalDirs.filter { root =>
+      // Here, we guess if the given value is a URI at its best - check if scheme is set.
+      Try(new URI(root).getScheme != null).getOrElse(false)
+    }
+    if (uris.nonEmpty) {
+      logWarning(
+        "The configured local directories are not expected to be URIs; however, got suspicious " +
+        s"values [${uris.mkString(", ")}]. Please check your configured local directories.")
+    }
+
+    configuredLocalDirs.flatMap { root =>
       try {
         val rootDir = new File(root)
         if (rootDir.exists || rootDir.mkdirs()) {
@@ -2405,8 +2418,8 @@ private[spark] object Utils extends Logging {
    */
   def getSparkOrYarnConfig(conf: SparkConf, key: String, default: String): String = {
     val sparkValue = conf.get(key, default)
-    if (SparkHadoopUtil.get.isYarnMode) {
-      SparkHadoopUtil.get.newConfiguration(conf).get(key, sparkValue)
+    if (conf.get(SparkLauncher.SPARK_MASTER, null) == "yarn") {
+      new YarnConfiguration(SparkHadoopUtil.get.newConfiguration(conf)).get(key, sparkValue)
     } else {
       sparkValue
     }
@@ -2637,15 +2650,29 @@ private[spark] object Utils extends Logging {
     redact(redactionPattern, kvs)
   }
 
+  /**
+   * Redact the sensitive values in the given map. If a map key matches the redaction pattern then
+   * its value is replaced with a dummy text.
+   */
+  def redact(regex: Option[Regex], kvs: Seq[(String, String)]): Seq[(String, String)] = {
+    regex match {
+      case None => kvs
+      case Some(r) => redact(r, kvs)
+    }
+  }
+
   /**
    * Redact the sensitive information in the given string.
    */
-  def redact(conf: SparkConf, text: String): String = {
-    if (text == null || text.isEmpty || conf == null || !conf.contains(STRING_REDACTION_PATTERN)) {
-      text
-    } else {
-      val regex = conf.get(STRING_REDACTION_PATTERN).get
-      regex.replaceAllIn(text, REDACTION_REPLACEMENT_TEXT)
+  def redact(regex: Option[Regex], text: String): String = {
+    regex match {
+      case None => text
+      case Some(r) =>
+        if (text == null || text.isEmpty) {
+          text
+        } else {
+          r.replaceAllIn(text, REDACTION_REPLACEMENT_TEXT)
+        }
     }
   }
 
@@ -2742,6 +2769,42 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  /**
+   * Check the validity of the given Kubernetes master URL and return the resolved URL. Prefix
+   * "k8s://" is appended to the resolved URL as the prefix is used by KubernetesClusterManager
+   * in canCreate to determine if the KubernetesClusterManager should be used.
+   */
+  def checkAndGetK8sMasterUrl(rawMasterURL: String): String = {
+    require(rawMasterURL.startsWith("k8s://"),
+      "Kubernetes master URL must start with k8s://.")
+    val masterWithoutK8sPrefix = rawMasterURL.substring("k8s://".length)
+
+    // To handle master URLs, e.g., k8s://host:port.
+    if (!masterWithoutK8sPrefix.contains("://")) {
+      val resolvedURL = s"https://$masterWithoutK8sPrefix"
+      logInfo("No scheme specified for kubernetes master URL, so defaulting to https. Resolved " +
+        s"URL is $resolvedURL.")
+      return s"k8s://$resolvedURL"
+    }
+
+    val masterScheme = new URI(masterWithoutK8sPrefix).getScheme
+    val resolvedURL = masterScheme.toLowerCase match {
+      case "https" =>
+        masterWithoutK8sPrefix
+      case "http" =>
+        logWarning("Kubernetes master URL uses HTTP instead of HTTPS.")
+        masterWithoutK8sPrefix
+      case null =>
+        val resolvedURL = s"https://$masterWithoutK8sPrefix"
+        logInfo("No scheme specified for kubernetes master URL, so defaulting to https. Resolved " +
+          s"URL is $resolvedURL.")
+        resolvedURL
+      case _ =>
+        throw new IllegalArgumentException("Invalid Kubernetes master scheme: " + masterScheme)
+    }
+
+    s"k8s://$resolvedURL"
+  }
 }
 
 private[util] object CallerContext extends Logging {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 6f5b5bb3652de..375f4a6921225 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -18,8 +18,6 @@
 package org.apache.spark.util.collection
 
 import java.io._
-import java.nio.channels.{Channels, FileChannel}
-import java.nio.file.StandardOpenOption
 import java.util.Comparator
 
 import scala.collection.BufferedIterator
@@ -461,7 +459,7 @@ class ExternalAppendOnlyMap[K, V, C](
     )
 
     private var batchIndex = 0  // Which batch we're in
-    private var fileChannel: FileChannel = null
+    private var fileStream: FileInputStream = null
 
     // An intermediate stream that reads from exactly one batch
     // This guards against pre-fetching and other arbitrary behavior of higher level streams
@@ -478,14 +476,14 @@ class ExternalAppendOnlyMap[K, V, C](
       if (batchIndex < batchOffsets.length - 1) {
         if (deserializeStream != null) {
           deserializeStream.close()
-          fileChannel.close()
+          fileStream.close()
           deserializeStream = null
-          fileChannel = null
+          fileStream = null
         }
 
         val start = batchOffsets(batchIndex)
-        fileChannel = FileChannel.open(file.toPath, StandardOpenOption.READ)
-        fileChannel.position(start)
+        fileStream = new FileInputStream(file)
+        fileStream.getChannel.position(start)
         batchIndex += 1
 
         val end = batchOffsets(batchIndex)
@@ -493,8 +491,7 @@ class ExternalAppendOnlyMap[K, V, C](
         assert(end >= start, "start = " + start + ", end = " + end +
           ", batchOffsets = " + batchOffsets.mkString("[", ", ", "]"))
 
-        val bufferedStream = new BufferedInputStream(
-          ByteStreams.limit(Channels.newInputStream(fileChannel), end - start))
+        val bufferedStream = new BufferedInputStream(ByteStreams.limit(fileStream, end - start))
         val wrappedStream = serializerManager.wrapStream(blockId, bufferedStream)
         ser.deserializeStream(wrappedStream)
       } else {
@@ -554,9 +551,9 @@ class ExternalAppendOnlyMap[K, V, C](
         ds.close()
         deserializeStream = null
       }
-      if (fileChannel != null) {
-        fileChannel.close()
-        fileChannel = null
+      if (fileStream != null) {
+        fileStream.close()
+        fileStream = null
       }
       if (file.exists()) {
         if (!file.delete()) {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index 3593cfd507783..176f84fa2a0d2 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -18,8 +18,6 @@
 package org.apache.spark.util.collection
 
 import java.io._
-import java.nio.channels.{Channels, FileChannel}
-import java.nio.file.StandardOpenOption
 import java.util.Comparator
 
 import scala.collection.mutable
@@ -494,7 +492,7 @@ private[spark] class ExternalSorter[K, V, C](
 
     // Intermediate file and deserializer streams that read from exactly one batch
     // This guards against pre-fetching and other arbitrary behavior of higher level streams
-    var fileChannel: FileChannel = null
+    var fileStream: FileInputStream = null
     var deserializeStream = nextBatchStream()  // Also sets fileStream
 
     var nextItem: (K, C) = null
@@ -507,14 +505,14 @@ private[spark] class ExternalSorter[K, V, C](
       if (batchId < batchOffsets.length - 1) {
         if (deserializeStream != null) {
           deserializeStream.close()
-          fileChannel.close()
+          fileStream.close()
           deserializeStream = null
-          fileChannel = null
+          fileStream = null
         }
 
         val start = batchOffsets(batchId)
-        fileChannel = FileChannel.open(spill.file.toPath, StandardOpenOption.READ)
-        fileChannel.position(start)
+        fileStream = new FileInputStream(spill.file)
+        fileStream.getChannel.position(start)
         batchId += 1
 
         val end = batchOffsets(batchId)
@@ -522,8 +520,7 @@ private[spark] class ExternalSorter[K, V, C](
         assert(end >= start, "start = " + start + ", end = " + end +
           ", batchOffsets = " + batchOffsets.mkString("[", ", ", "]"))
 
-        val bufferedStream = new BufferedInputStream(
-          ByteStreams.limit(Channels.newInputStream(fileChannel), end - start))
+        val bufferedStream = new BufferedInputStream(ByteStreams.limit(fileStream, end - start))
 
         val wrappedStream = serializerManager.wrapStream(spill.blockId, bufferedStream)
         serInstance.deserializeStream(wrappedStream)
@@ -613,7 +610,7 @@ private[spark] class ExternalSorter[K, V, C](
       batchId = batchOffsets.length  // Prevent reading any other batch
       val ds = deserializeStream
       deserializeStream = null
-      fileChannel = null
+      fileStream = null
       if (ds != null) {
         ds.close()
       }
diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
index c28570fb24560..7367af7888bd8 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
@@ -65,7 +65,7 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
     for (bytes <- getChunks()) {
       while (bytes.remaining() > 0) {
         val ioSize = Math.min(bytes.remaining(), bufferWriteChunkSize)
-        bytes.limit(bytes.position + ioSize)
+        bytes.limit(bytes.position() + ioSize)
         channel.write(bytes)
       }
     }
@@ -206,7 +206,7 @@ private[spark] class ChunkedByteBufferInputStream(
   override def skip(bytes: Long): Long = {
     if (currentChunk != null) {
       val amountToSkip = math.min(bytes, currentChunk.remaining).toInt
-      currentChunk.position(currentChunk.position + amountToSkip)
+      currentChunk.position(currentChunk.position() + amountToSkip)
       if (currentChunk.remaining() == 0) {
         if (chunks.hasNext) {
           currentChunk = chunks.next()
diff --git a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
index ac4391e3ef99b..9d2f563b2e367 100644
--- a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
+++ b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
@@ -18,30 +18,27 @@
 package org.apache.spark.launcher;
 
 import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.TimeUnit;
 
 import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.slf4j.bridge.SLF4JBridgeHandler;
 import static org.junit.Assert.*;
 import static org.junit.Assume.*;
+import static org.mockito.Mockito.*;
 
+import org.apache.spark.SparkContext;
 import org.apache.spark.internal.config.package$;
 import org.apache.spark.util.Utils;
 
 /**
  * These tests require the Spark assembly to be built before they can be run.
  */
-public class SparkLauncherSuite {
+public class SparkLauncherSuite extends BaseSuite {
 
-  static {
-    SLF4JBridgeHandler.removeHandlersForRootLogger();
-    SLF4JBridgeHandler.install();
-  }
-
-  private static final Logger LOG = LoggerFactory.getLogger(SparkLauncherSuite.class);
   private static final NamedThreadFactory TF = new NamedThreadFactory("SparkLauncherSuite-%d");
 
   private final SparkLauncher launcher = new SparkLauncher();
@@ -123,6 +120,54 @@ public void testChildProcLauncher() throws Exception {
     assertEquals(0, app.waitFor());
   }
 
+  @Test
+  public void testInProcessLauncher() throws Exception {
+    // Because this test runs SparkLauncher in process and in client mode, it pollutes the system
+    // properties, and that can cause test failures down the test pipeline. So restore the original
+    // system properties after this test runs.
+    Map<Object, Object> properties = new HashMap<>(System.getProperties());
+    try {
+      inProcessLauncherTestImpl();
+    } finally {
+      Properties p = new Properties();
+      for (Map.Entry<Object, Object> e : properties.entrySet()) {
+        p.put(e.getKey(), e.getValue());
+      }
+      System.setProperties(p);
+      // Here DAGScheduler is stopped, while SparkContext.clearActiveContext may not be called yet.
+      // Wait for a reasonable amount of time to avoid creating two active SparkContext in JVM.
+      // See SPARK-23019 and SparkContext.stop() for details.
+      TimeUnit.MILLISECONDS.sleep(500);
+    }
+  }
+
+  private void inProcessLauncherTestImpl() throws Exception {
+    final List<SparkAppHandle.State> transitions = new ArrayList<>();
+    SparkAppHandle.Listener listener = mock(SparkAppHandle.Listener.class);
+    doAnswer(invocation -> {
+      SparkAppHandle h = (SparkAppHandle) invocation.getArguments()[0];
+      transitions.add(h.getState());
+      return null;
+    }).when(listener).stateChanged(any(SparkAppHandle.class));
+
+    SparkAppHandle handle = new InProcessLauncher()
+      .setMaster("local")
+      .setAppResource(SparkLauncher.NO_RESOURCE)
+      .setMainClass(InProcessTestApp.class.getName())
+      .addAppArgs("hello")
+      .startApplication(listener);
+
+    waitFor(handle);
+    assertEquals(SparkAppHandle.State.FINISHED, handle.getState());
+
+    // Matches the behavior of LocalSchedulerBackend.
+    List<SparkAppHandle.State> expected = Arrays.asList(
+      SparkAppHandle.State.CONNECTED,
+      SparkAppHandle.State.RUNNING,
+      SparkAppHandle.State.FINISHED);
+    assertEquals(expected, transitions);
+  }
+
   public static class SparkLauncherTestApp {
 
     public static void main(String[] args) throws Exception {
@@ -134,4 +179,14 @@ public static void main(String[] args) throws Exception {
 
   }
 
+  public static class InProcessTestApp {
+
+    public static void main(String[] args) throws Exception {
+      assertNotEquals(0, args.length);
+      assertEquals(args[0], "hello");
+      new SparkContext().stop();
+    }
+
+  }
+
 }
diff --git a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
index 46b0516e36141..a0664b30d6cc2 100644
--- a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
+++ b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
@@ -21,6 +21,7 @@
 import org.junit.Test;
 
 import org.apache.spark.SparkConf;
+import org.apache.spark.unsafe.memory.MemoryAllocator;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 
 public class TaskMemoryManagerSuite {
@@ -68,6 +69,34 @@ public void encodePageNumberAndOffsetOnHeap() {
     Assert.assertEquals(64, manager.getOffsetInPage(encodedAddress));
   }
 
+  @Test
+  public void freeingPageSetsPageNumberToSpecialConstant() {
+    final TaskMemoryManager manager = new TaskMemoryManager(
+      new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0);
+    final MemoryConsumer c = new TestMemoryConsumer(manager, MemoryMode.ON_HEAP);
+    final MemoryBlock dataPage = manager.allocatePage(256, c);
+    c.freePage(dataPage);
+    Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, dataPage.pageNumber);
+  }
+
+  @Test(expected = AssertionError.class)
+  public void freeingPageDirectlyInAllocatorTriggersAssertionError() {
+    final TaskMemoryManager manager = new TaskMemoryManager(
+      new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0);
+    final MemoryConsumer c = new TestMemoryConsumer(manager, MemoryMode.ON_HEAP);
+    final MemoryBlock dataPage = manager.allocatePage(256, c);
+    MemoryAllocator.HEAP.free(dataPage);
+  }
+
+  @Test(expected = AssertionError.class)
+  public void callingFreePageOnDirectlyAllocatedPageTriggersAssertionError() {
+    final TaskMemoryManager manager = new TaskMemoryManager(
+      new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false")), 0);
+    final MemoryConsumer c = new TestMemoryConsumer(manager, MemoryMode.ON_HEAP);
+    final MemoryBlock dataPage = MemoryAllocator.HEAP.allocate(256);
+    manager.freePage(dataPage, c);
+  }
+
   @Test
   public void cooperativeSpilling() {
     final TestMemoryManager memoryManager = new TestMemoryManager(new SparkConf());
diff --git a/core/src/test/java/test/org/apache/spark/JavaTaskContextCompileCheck.java b/core/src/test/java/test/org/apache/spark/JavaTaskContextCompileCheck.java
index 94f5805853e1e..f8e233a05a447 100644
--- a/core/src/test/java/test/org/apache/spark/JavaTaskContextCompileCheck.java
+++ b/core/src/test/java/test/org/apache/spark/JavaTaskContextCompileCheck.java
@@ -38,6 +38,7 @@ public static void test() {
     tc.attemptNumber();
     tc.partitionId();
     tc.stageId();
+    tc.stageAttemptNumber();
     tc.taskAttemptId();
   }
 
@@ -51,6 +52,7 @@ public void onTaskCompletion(TaskContext context) {
       context.isCompleted();
       context.isInterrupted();
       context.stageId();
+      context.stageAttemptNumber();
       context.partitionId();
       context.addTaskCompletionListener(this);
     }
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
index f8e27703c0def..5c42ac1d87f4c 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
@@ -7,6 +7,9 @@
   "resultSize" : [ 2010.0, 2065.0, 2065.0 ],
   "jvmGcTime" : [ 0.0, 0.0, 7.0 ],
   "resultSerializationTime" : [ 0.0, 0.0, 2.0 ],
+  "gettingResultTime" : [ 0.0, 0.0, 0.0 ],
+  "schedulerDelay" : [ 2.0, 6.0, 53.0 ],
+  "peakExecutionMemory" : [ 0.0, 0.0, 0.0 ],
   "memoryBytesSpilled" : [ 0.0, 0.0, 0.0 ],
   "diskBytesSpilled" : [ 0.0, 0.0, 0.0 ],
   "inputMetrics" : {
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
index a28bda16a956e..e6b705989cc97 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
@@ -7,6 +7,9 @@
   "resultSize" : [ 1034.0, 1034.0, 1034.0, 1034.0, 1034.0 ],
   "jvmGcTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
   "resultSerializationTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "gettingResultTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "schedulerDelay" : [ 4.0, 4.0, 6.0, 7.0, 9.0 ],
+  "peakExecutionMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
   "memoryBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
   "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
   "inputMetrics" : {
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
index ede3eaed1d1d2..788f28cf7b365 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
@@ -7,6 +7,9 @@
   "resultSize" : [ 2010.0, 2065.0, 2065.0, 2065.0, 2065.0 ],
   "jvmGcTime" : [ 0.0, 0.0, 0.0, 5.0, 7.0 ],
   "resultSerializationTime" : [ 0.0, 0.0, 0.0, 0.0, 1.0 ],
+  "gettingResultTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+  "schedulerDelay" : [ 2.0, 4.0, 6.0, 13.0, 40.0 ],
+  "peakExecutionMemory" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
   "memoryBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
   "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
   "inputMetrics" : {
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 5dff8c8479572..03f896a6b9617 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -156,7 +156,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
 
   private def testCaching(conf: SparkConf, storageLevel: StorageLevel): Unit = {
     sc = new SparkContext(conf.setMaster(clusterUrl).setAppName("test"))
-    sc.jobProgressListener.waitUntilExecutorsUp(2, 30000)
+    TestUtils.waitUntilExecutorsUp(sc, 2, 30000)
     val data = sc.parallelize(1 to 1000, 10)
     val cachedData = data.persist(storageLevel)
     assert(cachedData.count === 1000)
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 90b7ec4384abd..a0cae5a9e011c 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -265,7 +265,11 @@ class ExecutorAllocationManagerSuite
 
     val task2Info = createTaskInfo(1, 0, "executor-1")
     post(sc.listenerBus, SparkListenerTaskStart(2, 0, task2Info))
+
+    task1Info.markFinished(TaskState.FINISHED, System.currentTimeMillis())
     post(sc.listenerBus, SparkListenerTaskEnd(2, 0, null, Success, task1Info, null))
+
+    task2Info.markFinished(TaskState.FINISHED, System.currentTimeMillis())
     post(sc.listenerBus, SparkListenerTaskEnd(2, 0, null, Success, task2Info, null))
 
     assert(adjustRequestedExecutors(manager) === -1)
@@ -1063,6 +1067,9 @@ class ExecutorAllocationManagerSuite
         s"${sustainedSchedulerBacklogTimeout.toString}s")
       .set("spark.dynamicAllocation.executorIdleTimeout", s"${executorIdleTimeout.toString}s")
       .set("spark.dynamicAllocation.testing", "true")
+      // SPARK-22864: effectively disable the allocation schedule by setting the period to a
+      // really long value.
+      .set(TESTING_SCHEDULE_INTERVAL_KEY, "10000")
     val sc = new SparkContext(conf)
     contexts += sc
     sc
@@ -1250,28 +1257,19 @@ private class DummyLocalExternalClusterManager extends ExternalClusterManager {
 private class DummyLocalSchedulerBackend (sc: SparkContext, sb: SchedulerBackend)
   extends SchedulerBackend with ExecutorAllocationClient {
 
-  override private[spark] def getExecutorIds(): Seq[String] = sc.getExecutorIds()
+  override private[spark] def getExecutorIds(): Seq[String] = Nil
 
   override private[spark] def requestTotalExecutors(
       numExecutors: Int,
       localityAwareTasks: Int,
-      hostToLocalTaskCount: Map[String, Int]): Boolean =
-    sc.requestTotalExecutors(numExecutors, localityAwareTasks, hostToLocalTaskCount)
+      hostToLocalTaskCount: Map[String, Int]): Boolean = true
 
-  override def requestExecutors(numAdditionalExecutors: Int): Boolean =
-    sc.requestExecutors(numAdditionalExecutors)
+  override def requestExecutors(numAdditionalExecutors: Int): Boolean = true
 
   override def killExecutors(
       executorIds: Seq[String],
       replace: Boolean,
-      force: Boolean): Seq[String] = {
-    val response = sc.killExecutors(executorIds)
-    if (response) {
-      executorIds
-    } else {
-      Seq.empty[String]
-    }
-  }
+      force: Boolean): Seq[String] = executorIds
 
   override def start(): Unit = sb.start()
 
diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
index fe944031bc948..472952addf353 100644
--- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
@@ -66,7 +66,7 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll {
     // local blocks from the local BlockManager and won't send requests to ExternalShuffleService.
     // In this case, we won't receive FetchFailed. And it will make this test fail.
     // Therefore, we should wait until all slaves are up
-    sc.jobProgressListener.waitUntilExecutorsUp(2, 60000)
+    TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
 
     val rdd = sc.parallelize(0 until 1000, 10).map(i => (i, 1)).reduceByKey(_ + _)
 
diff --git a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
index dfe4c25670ce0..155ca17db726b 100644
--- a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
+++ b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
@@ -259,6 +259,33 @@ class PartitioningSuite extends SparkFunSuite with SharedSparkContext with Priva
     val partitioner = new RangePartitioner(22, rdd)
     assert(partitioner.numPartitions === 3)
   }
+
+  test("defaultPartitioner") {
+    val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 150)
+    val rdd2 = sc
+      .parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4)))
+      .partitionBy(new HashPartitioner(10))
+    val rdd3 = sc
+      .parallelize(Array((1, 6), (7, 8), (3, 10), (5, 12), (13, 14)))
+      .partitionBy(new HashPartitioner(100))
+    val rdd4 = sc
+      .parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4)))
+      .partitionBy(new HashPartitioner(9))
+    val rdd5 = sc.parallelize((1 to 10).map(x => (x, x)), 11)
+
+    val partitioner1 = Partitioner.defaultPartitioner(rdd1, rdd2)
+    val partitioner2 = Partitioner.defaultPartitioner(rdd2, rdd3)
+    val partitioner3 = Partitioner.defaultPartitioner(rdd3, rdd1)
+    val partitioner4 = Partitioner.defaultPartitioner(rdd1, rdd2, rdd3)
+    val partitioner5 = Partitioner.defaultPartitioner(rdd4, rdd5)
+
+    assert(partitioner1.numPartitions == rdd1.getNumPartitions)
+    assert(partitioner2.numPartitions == rdd3.getNumPartitions)
+    assert(partitioner3.numPartitions == rdd3.getNumPartitions)
+    assert(partitioner4.numPartitions == rdd3.getNumPartitions)
+    assert(partitioner5.numPartitions == rdd4.getNumPartitions)
+
+  }
 }
 
 
diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
index 9801b2638cc15..cf59265dd646d 100644
--- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
@@ -18,7 +18,13 @@
 package org.apache.spark
 
 import java.io.File
+import java.nio.charset.StandardCharsets.UTF_8
+import java.security.PrivilegedExceptionAction
 
+import org.apache.hadoop.security.UserGroupInformation
+
+import org.apache.spark.internal.config._
+import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.security.GroupMappingServiceProvider
 import org.apache.spark.util.{ResetSystemProperties, SparkConfWithEnv, Utils}
 
@@ -411,8 +417,12 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
 
   test("missing secret authentication key") {
     val conf = new SparkConf().set("spark.authenticate", "true")
+    val mgr = new SecurityManager(conf)
+    intercept[IllegalArgumentException] {
+      mgr.getSecretKey()
+    }
     intercept[IllegalArgumentException] {
-      new SecurityManager(conf)
+      mgr.initializeAuth()
     }
   }
 
@@ -430,5 +440,24 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
     assert(keyFromEnv === new SecurityManager(conf2).getSecretKey())
   }
 
+  test("secret key generation in yarn mode") {
+    val conf = new SparkConf()
+      .set(NETWORK_AUTH_ENABLED, true)
+      .set(SparkLauncher.SPARK_MASTER, "yarn")
+    val mgr = new SecurityManager(conf)
+
+    UserGroupInformation.createUserForTesting("authTest", Array()).doAs(
+      new PrivilegedExceptionAction[Unit]() {
+        override def run(): Unit = {
+          mgr.initializeAuth()
+          val creds = UserGroupInformation.getCurrentUser().getCredentials()
+          val secret = creds.getSecretKey(SecurityManager.SECRET_LOOKUP_KEY)
+          assert(secret != null)
+          assert(new String(secret, UTF_8) === mgr.getSecretKey())
+        }
+      }
+    )
+  }
+
 }
 
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index 3931d53b4ae0a..ced5a06516f75 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -363,14 +363,14 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
 
     // first attempt -- its successful
     val writer1 = manager.getWriter[Int, Int](shuffleHandle, 0,
-      new TaskContextImpl(0, 0, 0L, 0, taskMemoryManager, new Properties, metricsSystem))
+      new TaskContextImpl(0, 0, 0, 0L, 0, taskMemoryManager, new Properties, metricsSystem))
     val data1 = (1 to 10).map { x => x -> x}
 
     // second attempt -- also successful.  We'll write out different data,
     // just to simulate the fact that the records may get written differently
     // depending on what gets spilled, what gets combined, etc.
     val writer2 = manager.getWriter[Int, Int](shuffleHandle, 0,
-      new TaskContextImpl(0, 0, 1L, 0, taskMemoryManager, new Properties, metricsSystem))
+      new TaskContextImpl(0, 0, 0, 1L, 0, taskMemoryManager, new Properties, metricsSystem))
     val data2 = (11 to 20).map { x => x -> x}
 
     // interleave writes of both attempts -- we want to test that both attempts can occur
@@ -398,7 +398,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
     }
 
     val reader = manager.getReader[Int, Int](shuffleHandle, 0, 1,
-      new TaskContextImpl(1, 0, 2L, 0, taskMemoryManager, new Properties, metricsSystem))
+      new TaskContextImpl(1, 0, 0, 2L, 0, taskMemoryManager, new Properties, metricsSystem))
     val readData = reader.read().toIndexedSeq
     assert(readData === data1.toIndexedSeq || readData === data2.toIndexedSeq)
 
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index c771eb4ee3ef5..bff808eb540ac 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -329,6 +329,16 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     conf.validateSettings()
   }
 
+  test("spark.network.timeout should bigger than spark.executor.heartbeatInterval") {
+    val conf = new SparkConf()
+    conf.validateSettings()
+
+    conf.set("spark.network.timeout", "5s")
+    intercept[IllegalArgumentException] {
+      conf.validateSettings()
+    }
+  }
+
 }
 
 class Class1 {}
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 0ed5f26863dad..b30bd74812b36 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark
 import java.io.File
 import java.net.{MalformedURLException, URI}
 import java.nio.charset.StandardCharsets
-import java.util.concurrent.TimeUnit
+import java.util.concurrent.{Semaphore, TimeUnit}
 
 import scala.concurrent.duration._
 
@@ -309,6 +309,17 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     assert(sc.listJars().head.contains(tmpJar.getName))
   }
 
+  test("SPARK-22585 addJar argument without scheme is interpreted literally without url decoding") {
+    val tmpDir = new File(Utils.createTempDir(), "host%3A443")
+    tmpDir.mkdirs()
+    val tmpJar = File.createTempFile("t%2F", ".jar", tmpDir)
+
+    sc = new SparkContext("local", "test")
+
+    sc.addJar(tmpJar.getAbsolutePath)
+    assert(sc.listJars().size === 1)
+  }
+
   test("Cancelling job group should not cause SparkContext to shutdown (SPARK-6414)") {
     try {
       sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
@@ -488,6 +499,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
   test("Cancelling stages/jobs with custom reasons.") {
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
     val REASON = "You shall not pass"
+    val slices = 10
 
     val listener = new SparkListener {
       override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
@@ -497,6 +509,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
           }
           sc.cancelStage(taskStart.stageId, REASON)
           SparkContextSuite.cancelStage = false
+          SparkContextSuite.semaphore.release(slices)
         }
       }
 
@@ -507,21 +520,25 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
           }
           sc.cancelJob(jobStart.jobId, REASON)
           SparkContextSuite.cancelJob = false
+          SparkContextSuite.semaphore.release(slices)
         }
       }
     }
     sc.addSparkListener(listener)
 
     for (cancelWhat <- Seq("stage", "job")) {
+      SparkContextSuite.semaphore.drainPermits()
       SparkContextSuite.isTaskStarted = false
       SparkContextSuite.cancelStage = (cancelWhat == "stage")
       SparkContextSuite.cancelJob = (cancelWhat == "job")
 
       val ex = intercept[SparkException] {
-        sc.range(0, 10000L).mapPartitions { x =>
-          org.apache.spark.SparkContextSuite.isTaskStarted = true
+        sc.range(0, 10000L, numSlices = slices).mapPartitions { x =>
+          SparkContextSuite.isTaskStarted = true
+          // Block waiting for the listener to cancel the stage or job.
+          SparkContextSuite.semaphore.acquire()
           x
-        }.cartesian(sc.range(0, 10L))count()
+        }.count()
       }
 
       ex.getCause() match {
@@ -539,6 +556,12 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     }
   }
 
+  test("client mode with a k8s master url") {
+    intercept[SparkException] {
+      sc = new SparkContext("k8s://https://host:port", "test", new SparkConf())
+    }
+  }
+
   testCancellingTasks("that raise interrupted exception on cancel") {
     Thread.sleep(9999999)
   }
@@ -619,4 +642,5 @@ object SparkContextSuite {
   @volatile var isTaskStarted = false
   @volatile var taskKilled = false
   @volatile var taskSucceeded = false
+  val semaphore = new Semaphore(0)
 }
diff --git a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
index 5483f2b8434aa..a15ae040d43a9 100644
--- a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
@@ -44,13 +44,13 @@ class StatusTrackerSuite extends SparkFunSuite with Matchers with LocalSparkCont
     stageIds.size should be(2)
 
     val firstStageInfo = eventually(timeout(10 seconds)) {
-      sc.statusTracker.getStageInfo(stageIds(0)).get
+      sc.statusTracker.getStageInfo(stageIds.min).get
     }
-    firstStageInfo.stageId() should be(stageIds(0))
+    firstStageInfo.stageId() should be(stageIds.min)
     firstStageInfo.currentAttemptId() should be(0)
     firstStageInfo.numTasks() should be(2)
     eventually(timeout(10 seconds)) {
-      val updatedFirstStageInfo = sc.statusTracker.getStageInfo(stageIds(0)).get
+      val updatedFirstStageInfo = sc.statusTracker.getStageInfo(stageIds.min).get
       updatedFirstStageInfo.numCompletedTasks() should be(2)
       updatedFirstStageInfo.numActiveTasks() should be(0)
       updatedFirstStageInfo.numFailedTasks() should be(0)
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 46f9ac6b0273a..9ad2e9a5e74ac 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -153,6 +153,40 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
     assert(broadcast.value.sum === 10)
   }
 
+  test("One broadcast value instance per executor") {
+    val conf = new SparkConf()
+      .setMaster("local[4]")
+      .setAppName("test")
+
+    sc = new SparkContext(conf)
+    val list = List[Int](1, 2, 3, 4)
+    val broadcast = sc.broadcast(list)
+    val instances = sc.parallelize(1 to 10)
+      .map(x => System.identityHashCode(broadcast.value))
+      .collect()
+      .toSet
+
+    assert(instances.size === 1)
+  }
+
+  test("One broadcast value instance per executor when memory is constrained") {
+    val conf = new SparkConf()
+      .setMaster("local[4]")
+      .setAppName("test")
+      .set("spark.memory.useLegacyMode", "true")
+      .set("spark.storage.memoryFraction", "0.0")
+
+    sc = new SparkContext(conf)
+    val list = List[Int](1, 2, 3, 4)
+    val broadcast = sc.broadcast(list)
+    val instances = sc.parallelize(1 to 10)
+      .map(x => System.identityHashCode(broadcast.value))
+      .collect()
+      .toSet
+
+    assert(instances.size === 1)
+  }
+
   /**
    * Verify the persistence of state associated with a TorrentBroadcast in a local-cluster.
    *
@@ -224,7 +258,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
         new SparkContext("local-cluster[%d, 1, 1024]".format(numSlaves), "test")
       // Wait until all salves are up
       try {
-        _sc.jobProgressListener.waitUntilExecutorsUp(numSlaves, 60000)
+        TestUtils.waitUntilExecutorsUp(_sc, numSlaves, 60000)
         _sc
       } catch {
         case e: Throwable =>
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index d0a34c5cdcf57..27dd435332348 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -235,7 +235,7 @@ class SparkSubmitSuite
     childArgsStr should include ("--class org.SomeClass")
     childArgsStr should include ("--arg arg1 --arg arg2")
     childArgsStr should include regex ("--jar .*thejar.jar")
-    mainClass should be ("org.apache.spark.deploy.yarn.Client")
+    mainClass should be (SparkSubmit.YARN_CLUSTER_SUBMIT_CLASS)
 
     // In yarn cluster mode, also adding jars to classpath
     classpath(0) should endWith ("thejar.jar")
@@ -323,11 +323,11 @@ class SparkSubmitSuite
     val childArgsStr = childArgs.mkString(" ")
     if (useRest) {
       childArgsStr should endWith ("thejar.jar org.SomeClass arg1 arg2")
-      mainClass should be ("org.apache.spark.deploy.rest.RestSubmissionClient")
+      mainClass should be (SparkSubmit.REST_CLUSTER_SUBMIT_CLASS)
     } else {
       childArgsStr should startWith ("--supervise --memory 4g --cores 5")
       childArgsStr should include regex "launch spark://h:p .*thejar.jar org.SomeClass arg1 arg2"
-      mainClass should be ("org.apache.spark.deploy.Client")
+      mainClass should be (SparkSubmit.STANDALONE_CLUSTER_SUBMIT_CLASS)
     }
     classpath should have size 0
     sys.props("SPARK_SUBMIT") should be ("true")
@@ -388,6 +388,33 @@ class SparkSubmitSuite
     conf.get("spark.ui.enabled") should be ("false")
   }
 
+  test("handles k8s cluster mode") {
+    val clArgs = Seq(
+      "--deploy-mode", "cluster",
+      "--master", "k8s://host:port",
+      "--executor-memory", "5g",
+      "--class", "org.SomeClass",
+      "--driver-memory", "4g",
+      "--conf", "spark.kubernetes.namespace=spark",
+      "--conf", "spark.kubernetes.driver.container.image=bar",
+      "/home/thejar.jar",
+      "arg1")
+    val appArgs = new SparkSubmitArguments(clArgs)
+    val (childArgs, classpath, conf, mainClass) = prepareSubmitEnvironment(appArgs)
+
+    val childArgsMap = childArgs.grouped(2).map(a => a(0) -> a(1)).toMap
+    childArgsMap.get("--primary-java-resource") should be (Some("file:/home/thejar.jar"))
+    childArgsMap.get("--main-class") should be (Some("org.SomeClass"))
+    childArgsMap.get("--arg") should be (Some("arg1"))
+    mainClass should be (KUBERNETES_CLUSTER_SUBMIT_CLASS)
+    classpath should have length (0)
+    conf.get("spark.master") should be ("k8s://https://host:port")
+    conf.get("spark.executor.memory") should be ("5g")
+    conf.get("spark.driver.memory") should be ("4g")
+    conf.get("spark.kubernetes.namespace") should be ("spark")
+    conf.get("spark.kubernetes.driver.container.image") should be ("bar")
+  }
+
   test("handles confs with flag equivalents") {
     val clArgs = Seq(
       "--deploy-mode", "cluster",
@@ -402,7 +429,7 @@ class SparkSubmitSuite
     conf.get("spark.executor.memory") should be ("5g")
     conf.get("spark.master") should be ("yarn")
     conf.get("spark.submit.deployMode") should be ("cluster")
-    mainClass should be ("org.apache.spark.deploy.yarn.Client")
+    mainClass should be (SparkSubmit.YARN_CLUSTER_SUBMIT_CLASS)
   }
 
   test("SPARK-21568 ConsoleProgressBar should be enabled only in shells") {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 86c8cdf43258c..84ee01c7f5aaf 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.deploy.history
 
 import java.io._
 import java.nio.charset.StandardCharsets
+import java.util.Date
 import java.util.concurrent.TimeUnit
 import java.util.zip.{ZipInputStream, ZipOutputStream}
 
@@ -42,6 +43,7 @@ import org.apache.spark.io._
 import org.apache.spark.scheduler._
 import org.apache.spark.security.GroupMappingServiceProvider
 import org.apache.spark.status.AppStatusStore
+import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo}
 import org.apache.spark.util.{Clock, JsonProtocol, ManualClock, Utils}
 
 class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matchers with Logging {
@@ -114,9 +116,12 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
           end: Long,
           lastMod: Long,
           user: String,
-          completed: Boolean): ApplicationHistoryInfo = {
-        ApplicationHistoryInfo(id, name,
-          List(ApplicationAttemptInfo(None, start, end, lastMod, user, completed, "")))
+          completed: Boolean): ApplicationInfo = {
+
+        val duration = if (end > 0) end - start else 0
+        new ApplicationInfo(id, name, None, None, None, None,
+          List(ApplicationAttemptInfo(None, new Date(start),
+            new Date(end), new Date(lastMod), duration, user, completed, "")))
       }
 
       // For completed files, lastUpdated would be lastModified time.
@@ -667,7 +672,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
    *     }
    */
   private def updateAndCheck(provider: FsHistoryProvider)
-      (checkFn: Seq[ApplicationHistoryInfo] => Unit): Unit = {
+      (checkFn: Seq[ApplicationInfo] => Unit): Unit = {
     provider.checkForLogs()
     provider.cleanLogs()
     checkFn(provider.getListing().toSeq)
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
new file mode 100644
index 0000000000000..4b1b921582e00
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io.File
+
+import org.mockito.AdditionalAnswers
+import org.mockito.Matchers.{any, anyBoolean, anyLong, eq => meq}
+import org.mockito.Mockito._
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.status.KVUtils
+import org.apache.spark.util.{ManualClock, Utils}
+import org.apache.spark.util.kvstore.KVStore
+
+class HistoryServerDiskManagerSuite extends SparkFunSuite with BeforeAndAfter {
+
+  import config._
+
+  private val MAX_USAGE = 3L
+
+  private var testDir: File = _
+  private var store: KVStore = _
+
+  before {
+    testDir = Utils.createTempDir()
+    store = KVUtils.open(new File(testDir, "listing"), "test")
+  }
+
+  after {
+    store.close()
+    if (testDir != null) {
+      Utils.deleteRecursively(testDir)
+    }
+  }
+
+  private def mockManager(): HistoryServerDiskManager = {
+    val conf = new SparkConf().set(MAX_LOCAL_DISK_USAGE, MAX_USAGE)
+    val manager = spy(new HistoryServerDiskManager(conf, testDir, store, new ManualClock()))
+    doAnswer(AdditionalAnswers.returnsFirstArg[Long]()).when(manager)
+      .approximateSize(anyLong(), anyBoolean())
+    manager
+  }
+
+  test("leasing space") {
+    val manager = mockManager()
+
+    // Lease all available space.
+    val leaseA = manager.lease(1)
+    val leaseB = manager.lease(1)
+    val leaseC = manager.lease(1)
+    assert(manager.free() === 0)
+
+    // Revert one lease, get another one.
+    leaseA.rollback()
+    assert(manager.free() > 0)
+    assert(!leaseA.tmpPath.exists())
+
+    val leaseD = manager.lease(1)
+    assert(manager.free() === 0)
+
+    // Committing B should bring the "used" space up to 4, so there shouldn't be space left yet.
+    doReturn(2L).when(manager).sizeOf(meq(leaseB.tmpPath))
+    val dstB = leaseB.commit("app2", None)
+    assert(manager.free() === 0)
+    assert(manager.committed() === 2)
+
+    // Rollback C and D, now there should be 1 left.
+    leaseC.rollback()
+    leaseD.rollback()
+    assert(manager.free() === 1)
+
+    // Release app 2 to make it available for eviction.
+    doReturn(2L).when(manager).sizeOf(meq(dstB))
+    manager.release("app2", None)
+    assert(manager.committed() === 2)
+
+    // Emulate an updated event log by replacing the store for lease B. Lease 1, and commit with
+    // size 3.
+    val leaseE = manager.lease(1)
+    doReturn(3L).when(manager).sizeOf(meq(leaseE.tmpPath))
+    val dstE = leaseE.commit("app2", None)
+    assert(dstE === dstB)
+    assert(dstE.exists())
+    doReturn(3L).when(manager).sizeOf(meq(dstE))
+    assert(!leaseE.tmpPath.exists())
+    assert(manager.free() === 0)
+    manager.release("app2", None)
+    assert(manager.committed() === 3)
+
+    // Try a big lease that should cause the released app to be evicted.
+    val leaseF = manager.lease(6)
+    assert(!dstB.exists())
+    assert(manager.free() === 0)
+    assert(manager.committed() === 0)
+
+    // Leasing when no free space is available should still be allowed.
+    manager.lease(1)
+    assert(manager.free() === 0)
+  }
+
+  test("tracking active stores") {
+    val manager = mockManager()
+
+    // Lease and commit space for app 1, making it active.
+    val leaseA = manager.lease(2)
+    assert(manager.free() === 1)
+    doReturn(2L).when(manager).sizeOf(leaseA.tmpPath)
+    assert(manager.openStore("appA", None).isEmpty)
+    val dstA = leaseA.commit("appA", None)
+
+    // Create a new lease. Leases are always granted, but this shouldn't cause app1's store
+    // to be deleted.
+    val leaseB = manager.lease(2)
+    assert(dstA.exists())
+
+    // Trying to commit on top of an active application should fail.
+    intercept[IllegalArgumentException] {
+      leaseB.commit("appA", None)
+    }
+
+    leaseB.rollback()
+
+    // Close appA with an updated size, then create a new lease. Now the app's directory should be
+    // deleted.
+    doReturn(3L).when(manager).sizeOf(dstA)
+    manager.release("appA", None)
+    assert(manager.free() === 0)
+
+    val leaseC = manager.lease(1)
+    assert(!dstA.exists())
+    leaseC.rollback()
+
+    assert(manager.openStore("appA", None).isEmpty)
+  }
+
+  test("approximate size heuristic") {
+    val manager = new HistoryServerDiskManager(new SparkConf(false), testDir, store,
+      new ManualClock())
+    assert(manager.approximateSize(50L, false) < 50L)
+    assert(manager.approximateSize(50L, true) > 50L)
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index d22a19e8af74a..87778dda0e2c8 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -45,6 +45,7 @@ import org.scalatest.selenium.WebBrowser
 
 import org.apache.spark._
 import org.apache.spark.deploy.history.config._
+import org.apache.spark.status.api.v1.ApplicationInfo
 import org.apache.spark.status.api.v1.JobData
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.{ResetSystemProperties, Utils}
@@ -263,7 +264,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
 
     val badStageAttemptId = getContentAndCode("applications/local-1422981780767/stages/1/1")
     badStageAttemptId._1 should be (HttpServletResponse.SC_NOT_FOUND)
-    badStageAttemptId._3 should be (Some("unknown attempt 1 for stage 1."))
+    badStageAttemptId._3 should be (Some("unknown attempt for stage 1.  Found attempts: [0]"))
 
     val badStageId2 = getContentAndCode("applications/local-1422981780767/stages/flimflam")
     badStageId2._1 should be (HttpServletResponse.SC_NOT_FOUND)
@@ -344,7 +345,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
         .map(_.get)
         .filter(_.startsWith(url)).toList
 
-      // there are atleast some URL links that were generated via javascript,
+      // there are at least some URL links that were generated via javascript,
       // and they all contain the spark.ui.proxyBase (uiRoot)
       links.length should be > 4
       all(links) should startWith(url + uiRoot)
@@ -485,7 +486,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       json match {
         case JNothing => Seq()
         case apps: JArray =>
-          apps.filter(app => {
+          apps.children.filter(app => {
             (app \ "attempts") match {
               case attempts: JArray =>
                 val state = (attempts.children.head \ "completed").asInstanceOf[JBool]
@@ -505,6 +506,10 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       getAppUI.store.jobsList(List(JobExecutionStatus.RUNNING).asJava)
     }
 
+    def isApplicationCompleted(appInfo: ApplicationInfo): Boolean = {
+      appInfo.attempts.nonEmpty && appInfo.attempts.head.completed
+    }
+
     activeJobs() should have size 0
     completedJobs() should have size 1
     getNumJobs("") should be (1)
@@ -537,7 +542,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       assert(4 === getNumJobsRestful(), s"two jobs back-to-back not updated, server=$server\n")
     }
     val jobcount = getNumJobs("/jobs")
-    assert(!provider.getListing().next.completed)
+    assert(!isApplicationCompleted(provider.getListing().next))
 
     listApplications(false) should contain(appId)
 
@@ -545,7 +550,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     resetSparkContext()
     // check the app is now found as completed
     eventually(stdTimeout, stdInterval) {
-      assert(provider.getListing().next.completed,
+      assert(isApplicationCompleted(provider.getListing().next),
         s"application never completed, server=$server\n")
     }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
index 490baf040491f..e505bc018857d 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
@@ -92,7 +92,7 @@ class StandaloneRestSubmitSuite extends SparkFunSuite with BeforeAndAfterEach {
     conf.set("spark.app.name", "dreamer")
     val appArgs = Array("one", "two", "six")
     // main method calls this
-    val response = RestSubmissionClient.run("app-resource", "main-class", appArgs, conf)
+    val response = new RestSubmissionClientApp().run("app-resource", "main-class", appArgs, conf)
     val submitResponse = getSubmitResponse(response)
     assert(submitResponse.action === Utils.getFormattedClassName(submitResponse))
     assert(submitResponse.serverSparkVersion === SPARK_VERSION)
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
index 725b8848bc052..75c50af23c66a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
@@ -86,6 +86,8 @@ class SubmitRestProtocolSuite extends SparkFunSuite {
     message.clientSparkVersion = "1.2.3"
     message.appResource = "honey-walnut-cherry.jar"
     message.mainClass = "org.apache.spark.examples.SparkPie"
+    message.appArgs = Array("two slices")
+    message.environmentVariables = Map("PATH" -> "/dev/null")
     val conf = new SparkConf(false)
     conf.set("spark.app.name", "SparkPie")
     message.sparkProperties = conf.getAll.toMap
diff --git a/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala b/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala
index bf08276dbf971..02514dc7daef4 100644
--- a/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala
@@ -288,4 +288,24 @@ class ConfigEntrySuite extends SparkFunSuite {
     conf.remove(testKey("b"))
     assert(conf.get(iConf) === 3)
   }
+
+  test("onCreate") {
+    var onCreateCalled = false
+    ConfigBuilder(testKey("oc1")).onCreate(_ => onCreateCalled = true).intConf.createWithDefault(1)
+    assert(onCreateCalled)
+
+    onCreateCalled = false
+    ConfigBuilder(testKey("oc2")).onCreate(_ => onCreateCalled = true).intConf.createOptional
+    assert(onCreateCalled)
+
+    onCreateCalled = false
+    ConfigBuilder(testKey("oc3")).onCreate(_ => onCreateCalled = true).intConf
+      .createWithDefaultString("1.0")
+    assert(onCreateCalled)
+
+    val fallback = ConfigBuilder(testKey("oc4")).intConf.createWithDefault(1)
+    onCreateCalled = false
+    ConfigBuilder(testKey("oc5")).onCreate(_ => onCreateCalled = true).fallbackConf(fallback)
+    assert(onCreateCalled)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/memory/MemoryTestingUtils.scala b/core/src/test/scala/org/apache/spark/memory/MemoryTestingUtils.scala
index 362cd861cc248..dcf89e4f75acf 100644
--- a/core/src/test/scala/org/apache/spark/memory/MemoryTestingUtils.scala
+++ b/core/src/test/scala/org/apache/spark/memory/MemoryTestingUtils.scala
@@ -29,6 +29,7 @@ object MemoryTestingUtils {
     val taskMemoryManager = new TaskMemoryManager(env.memoryManager, 0)
     new TaskContextImpl(
       stageId = 0,
+      stageAttemptNumber = 0,
       partitionId = 0,
       taskAttemptId = 0,
       attemptNumber = 0,
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 0a248b6064ee8..a39e0469272fe 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -30,6 +30,7 @@ import org.apache.hadoop.mapreduce.{Job => NewJob, JobContext => NewJobContext,
   OutputCommitter => NewOutputCommitter, OutputFormat => NewOutputFormat,
   RecordWriter => NewRecordWriter, TaskAttemptContext => NewTaskAttempContext}
 import org.apache.hadoop.util.Progressable
+import org.scalatest.Assertions
 
 import org.apache.spark._
 import org.apache.spark.Partitioner
@@ -309,6 +310,28 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     assert(joined.size > 0)
   }
 
+  // See SPARK-22465
+  test("cogroup between multiple RDD " +
+    "with an order of magnitude difference in number of partitions") {
+    val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 1000)
+    val rdd2 = sc
+      .parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+      .partitionBy(new HashPartitioner(10))
+    val joined = rdd1.cogroup(rdd2)
+    assert(joined.getNumPartitions == rdd1.getNumPartitions)
+  }
+
+  // See SPARK-22465
+  test("cogroup between multiple RDD" +
+    " with number of partitions similar in order of magnitude") {
+    val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 20)
+    val rdd2 = sc
+      .parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+      .partitionBy(new HashPartitioner(10))
+    val joined = rdd1.cogroup(rdd2)
+    assert(joined.getNumPartitions == rdd2.getNumPartitions)
+  }
+
   test("rightOuterJoin") {
     val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
@@ -524,6 +547,15 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     pairs.saveAsNewAPIHadoopFile[ConfigTestFormat]("ignored")
   }
 
+  test("The JobId on the driver and executors should be the same during the commit") {
+    // Create more than one rdd to mimic stageId not equal to rddId
+    val pairs = sc.parallelize(Array((1, 2), (2, 3)), 2)
+      .map { p => (new Integer(p._1 + 1), new Integer(p._2 + 1)) }
+      .filter { p => p._1 > 0 }
+    pairs.saveAsNewAPIHadoopFile[YetAnotherFakeFormat]("ignored")
+    assert(JobID.jobid != -1)
+  }
+
   test("saveAsHadoopFile should respect configured output committers") {
     val pairs = sc.parallelize(Array((new Integer(1), new Integer(1))))
     val conf = new JobConf()
@@ -908,6 +940,40 @@ class NewFakeFormatWithCallback() extends NewFakeFormat {
   }
 }
 
+class YetAnotherFakeCommitter extends NewOutputCommitter with Assertions {
+  def setupJob(j: NewJobContext): Unit = {
+    JobID.jobid = j.getJobID().getId
+  }
+
+  def needsTaskCommit(t: NewTaskAttempContext): Boolean = false
+
+  def setupTask(t: NewTaskAttempContext): Unit = {
+    val jobId = t.getTaskAttemptID().getJobID().getId
+    assert(jobId === JobID.jobid)
+  }
+
+  def commitTask(t: NewTaskAttempContext): Unit = {}
+
+  def abortTask(t: NewTaskAttempContext): Unit = {}
+}
+
+class YetAnotherFakeFormat() extends NewOutputFormat[Integer, Integer]() {
+
+  def checkOutputSpecs(j: NewJobContext): Unit = {}
+
+  def getRecordWriter(t: NewTaskAttempContext): NewRecordWriter[Integer, Integer] = {
+    new NewFakeWriter()
+  }
+
+  def getOutputCommitter(t: NewTaskAttempContext): NewOutputCommitter = {
+    new YetAnotherFakeCommitter()
+  }
+}
+
+object JobID {
+  var jobid = -1
+}
+
 class ConfigTestFormat() extends NewFakeFormat() with Configurable {
 
   var setConfCalled = false
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index d395e09969453..d812b5bd92c1b 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -1832,6 +1832,26 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assertDataStructuresEmpty()
   }
 
+  test("accumulator not calculated for resubmitted task in result stage") {
+    val accum = AccumulatorSuite.createLongAccum("a")
+    val finalRdd = new MyRDD(sc, 2, Nil)
+    submit(finalRdd, Array(0, 1))
+    // finish the first task
+    completeWithAccumulator(accum.id, taskSets(0), Seq((Success, 42)))
+    // verify stage exists
+    assert(scheduler.stageIdToStage.contains(0))
+
+    // finish the first task again (simulate a speculative task or a resubmitted task)
+    completeWithAccumulator(accum.id, taskSets(0), Seq((Success, 42)))
+    assert(results === Map(0 -> 42))
+
+    // The accumulator should only be updated once.
+    assert(accum.value === 1)
+
+    runEvent(makeCompletionEvent(taskSets(0).tasks(1), Success, 42))
+    assertDataStructuresEmpty()
+  }
+
   test("accumulators are updated on exception failures") {
     val acc1 = AccumulatorSuite.createLongAccum("ingenieur")
     val acc2 = AccumulatorSuite.createLongAccum("boulanger")
@@ -2406,13 +2426,13 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   // OutputCommitCoordinator requires the task info itself to not be null.
   private def createFakeTaskInfo(): TaskInfo = {
     val info = new TaskInfo(0, 0, 0, 0L, "", "", TaskLocality.ANY, false)
-    info.finishTime = 1  // to prevent spurious errors in JobProgressListener
+    info.finishTime = 1
     info
   }
 
   private def createFakeTaskInfoWithId(taskId: Long): TaskInfo = {
     val info = new TaskInfo(taskId, 0, 0, 0L, "", "", TaskLocality.ANY, false)
-    info.finishTime = 1  // to prevent spurious errors in JobProgressListener
+    info.finishTime = 1
     info
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
index d17e3864854a8..73e7b3fe8c1de 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
@@ -128,6 +128,35 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
     }
   }
 
+  test("Replay incompatible event log") {
+    val logFilePath = Utils.getFilePath(testDir, "incompatible.txt")
+    val fstream = fileSystem.create(logFilePath)
+    val writer = new PrintWriter(fstream)
+    val applicationStart = SparkListenerApplicationStart("Incompatible App", None,
+      125L, "UserUsingIncompatibleVersion", None)
+    val applicationEnd = SparkListenerApplicationEnd(1000L)
+    // scalastyle:off println
+    writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
+    writer.println("""{"Event":"UnrecognizedEventOnlyForTest","Timestamp":1477593059313}""")
+    writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationEnd))))
+    // scalastyle:on println
+    writer.close()
+
+    val conf = EventLoggingListenerSuite.getLoggingConf(logFilePath)
+    val logData = fileSystem.open(logFilePath)
+    val eventMonster = new EventMonster(conf)
+    try {
+      val replayer = new ReplayListenerBus()
+      replayer.addListener(eventMonster)
+      replayer.replay(logData, logFilePath.toString)
+    } finally {
+      logData.close()
+    }
+    assert(eventMonster.loggedEvents.size === 2)
+    assert(eventMonster.loggedEvents(0) === JsonProtocol.sparkEventToJson(applicationStart))
+    assert(eventMonster.loggedEvents(1) === JsonProtocol.sparkEventToJson(applicationEnd))
+  }
+
   // This assumes the correctness of EventLoggingListener
   test("End-to-end replay") {
     testApplicationReplay()
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index 1beb36afa95f0..da6ecb82c7e42 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -48,7 +48,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     bus.metrics.metricRegistry.counter(s"queue.$SHARED_QUEUE.numDroppedEvents").getCount
   }
 
-  private def queueSize(bus: LiveListenerBus): Int = {
+  private def sharedQueueSize(bus: LiveListenerBus): Int = {
     bus.metrics.metricRegistry.getGauges().get(s"queue.$SHARED_QUEUE.size").getValue()
       .asInstanceOf[Int]
   }
@@ -73,12 +73,11 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     val conf = new SparkConf()
     val counter = new BasicJobCounter
     val bus = new LiveListenerBus(conf)
-    bus.addToSharedQueue(counter)
 
     // Metrics are initially empty.
     assert(bus.metrics.numEventsPosted.getCount === 0)
     assert(numDroppedEvents(bus) === 0)
-    assert(queueSize(bus) === 0)
+    assert(bus.queuedEvents.size === 0)
     assert(eventProcessingTimeCount(bus) === 0)
 
     // Post five events:
@@ -87,7 +86,10 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     // Five messages should be marked as received and queued, but no messages should be posted to
     // listeners yet because the the listener bus hasn't been started.
     assert(bus.metrics.numEventsPosted.getCount === 5)
-    assert(queueSize(bus) === 5)
+    assert(bus.queuedEvents.size === 5)
+
+    // Add the counter to the bus after messages have been queued for later delivery.
+    bus.addToSharedQueue(counter)
     assert(counter.count === 0)
 
     // Starting listener bus should flush all buffered events
@@ -95,9 +97,12 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     Mockito.verify(mockMetricsSystem).registerSource(bus.metrics)
     bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(counter.count === 5)
-    assert(queueSize(bus) === 0)
+    assert(sharedQueueSize(bus) === 0)
     assert(eventProcessingTimeCount(bus) === 5)
 
+    // After the bus is started, there should be no more queued events.
+    assert(bus.queuedEvents === null)
+
     // After listener bus has stopped, posting events should not increment counter
     bus.stop()
     (1 to 5).foreach { _ => bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) }
@@ -188,18 +193,18 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     // Post a message to the listener bus and wait for processing to begin:
     bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded))
     listenerStarted.acquire()
-    assert(queueSize(bus) === 0)
+    assert(sharedQueueSize(bus) === 0)
     assert(numDroppedEvents(bus) === 0)
 
     // If we post an additional message then it should remain in the queue because the listener is
     // busy processing the first event:
     bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded))
-    assert(queueSize(bus) === 1)
+    assert(sharedQueueSize(bus) === 1)
     assert(numDroppedEvents(bus) === 0)
 
     // The queue is now full, so any additional events posted to the listener will be dropped:
     bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded))
-    assert(queueSize(bus) === 1)
+    assert(sharedQueueSize(bus) === 1)
     assert(numDroppedEvents(bus) === 1)
 
     // Allow the the remaining events to be processed so we can stop the listener bus:
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
index dbdd4fff4748e..75fc78de7edf5 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
 
-import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite}
+import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite, TestUtils}
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 
 /**
@@ -44,7 +44,7 @@ class SparkListenerWithClusterSuite extends SparkFunSuite with LocalSparkContext
 
     // This test will check if the number of executors received by "SparkListener" is same as the
     // number of all executors, so we need to wait until all executors are up
-    sc.jobProgressListener.waitUntilExecutorsUp(2, 60000)
+    TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
 
     val rdd1 = sc.parallelize(1 to 100, 4)
     val rdd2 = rdd1.map(_.toString)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index a1d9085fa085d..aa9c36c0aaacb 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.metrics.source.JvmSource
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.rdd.RDD
+import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.util._
 
 class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSparkContext {
@@ -158,6 +159,30 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     assert(attemptIdsWithFailedTask.toSet === Set(0, 1))
   }
 
+  test("TaskContext.stageAttemptNumber getter") {
+    sc = new SparkContext("local[1,2]", "test")
+
+    // Check stageAttemptNumbers are 0 for initial stage
+    val stageAttemptNumbers = sc.parallelize(Seq(1, 2), 2).mapPartitions { _ =>
+      Seq(TaskContext.get().stageAttemptNumber()).iterator
+    }.collect()
+    assert(stageAttemptNumbers.toSet === Set(0))
+
+    // Check stageAttemptNumbers that are resubmitted when tasks have FetchFailedException
+    val stageAttemptNumbersWithFailedStage =
+      sc.parallelize(Seq(1, 2, 3, 4), 4).repartition(1).mapPartitions { _ =>
+      val stageAttemptNumber = TaskContext.get().stageAttemptNumber()
+      if (stageAttemptNumber < 2) {
+        // Throw FetchFailedException to explicitly trigger stage resubmission. A normal exception
+        // will only trigger task resubmission in the same stage.
+        throw new FetchFailedException(null, 0, 0, 0, "Fake")
+      }
+      Seq(stageAttemptNumber).iterator
+    }.collect()
+
+    assert(stageAttemptNumbersWithFailedStage.toSet === Set(2))
+  }
+
   test("accumulators are updated on exception failures") {
     // This means use 1 core and 4 max task failures
     sc = new SparkContext("local[1,4]", "test")
@@ -190,7 +215,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     // accumulator updates from it.
     val taskMetrics = TaskMetrics.empty
     val task = new Task[Int](0, 0, 0) {
-      context = new TaskContextImpl(0, 0, 0L, 0,
+      context = new TaskContextImpl(0, 0, 0, 0L, 0,
         new TaskMemoryManager(SparkEnv.get.memoryManager, 0L),
         new Properties,
         SparkEnv.get.metricsSystem,
@@ -213,7 +238,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     // accumulator updates from it.
     val taskMetrics = TaskMetrics.registered
     val task = new Task[Int](0, 0, 0) {
-      context = new TaskContextImpl(0, 0, 0L, 0,
+      context = new TaskContextImpl(0, 0, 0, 0L, 0,
         new TaskMemoryManager(SparkEnv.get.memoryManager, 0L),
         new Properties,
         SparkEnv.get.metricsSystem,
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index eaec098b8d785..fc78655bf52ec 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -199,7 +199,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     def check[T: ClassTag](t: T) {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
       // Check that very long ranges don't get written one element at a time
-      assert(ser.serialize(t).limit < 100)
+      assert(ser.serialize(t).limit() < 100)
     }
     check(1 to 1000000)
     check(1 to 1000000 by 2)
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index 88fe6bd70a14e..ca66b6b9db890 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -39,16 +39,20 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
   import config._
 
-  private val conf = new SparkConf().set(LIVE_ENTITY_UPDATE_PERIOD, 0L)
+  private val conf = new SparkConf()
+    .set(LIVE_ENTITY_UPDATE_PERIOD, 0L)
+    .set(ASYNC_TRACKING_ENABLED, false)
 
   private var time: Long = _
   private var testDir: File = _
-  private var store: KVStore = _
+  private var store: ElementTrackingStore = _
+  private var taskIdTracker = -1L
 
   before {
     time = 0L
     testDir = Utils.createTempDir()
-    store = KVUtils.open(testDir, getClass().getName())
+    store = new ElementTrackingStore(KVUtils.open(testDir, getClass().getName()), conf)
+    taskIdTracker = -1L
   }
 
   after {
@@ -99,6 +103,8 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
   test("scheduler events") {
     val listener = new AppStatusListener(store, conf, true)
 
+    listener.onOtherEvent(SparkListenerLogStart("TestSparkVersion"))
+
     // Start the application.
     time += 1
     listener.onApplicationStart(SparkListenerApplicationStart(
@@ -121,6 +127,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
       assert(attempt.endTime.getTime() === -1L)
       assert(attempt.sparkUser === "user")
       assert(!attempt.completed)
+      assert(attempt.appSparkVersion === "TestSparkVersion")
     }
 
     // Start a couple of executors.
@@ -185,24 +192,12 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     // Start tasks from stage 1
     time += 1
-    var _taskIdTracker = -1L
-    def nextTaskId(): Long = {
-      _taskIdTracker += 1
-      _taskIdTracker
-    }
-
-    def createTasks(count: Int, time: Long): Seq[TaskInfo] = {
-      (1 to count).map { id =>
-        val exec = execIds(id.toInt % execIds.length)
-        val taskId = nextTaskId()
-        new TaskInfo(taskId, taskId.toInt, 1, time, exec, s"$exec.example.com",
-          TaskLocality.PROCESS_LOCAL, id % 2 == 0)
-      }
-    }
 
-    val s1Tasks = createTasks(4, time)
+    val s1Tasks = createTasks(4, execIds)
     s1Tasks.foreach { task =>
-      listener.onTaskStart(SparkListenerTaskStart(stages.head.stageId, stages.head.attemptId, task))
+      listener.onTaskStart(SparkListenerTaskStart(stages.head.stageId,
+        stages.head.attemptNumber,
+        task))
     }
 
     assert(store.count(classOf[TaskDataWrapper]) === s1Tasks.size)
@@ -218,55 +213,53 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     s1Tasks.foreach { task =>
       check[TaskDataWrapper](task.taskId) { wrapper =>
-        assert(wrapper.info.taskId === task.taskId)
+        assert(wrapper.taskId === task.taskId)
         assert(wrapper.stageId === stages.head.stageId)
         assert(wrapper.stageAttemptId === stages.head.attemptId)
-        assert(Arrays.equals(wrapper.stage, Array(stages.head.stageId, stages.head.attemptId)))
-
-        val runtime = Array[AnyRef](stages.head.stageId: JInteger, stages.head.attemptId: JInteger,
-          -1L: JLong)
-        assert(Arrays.equals(wrapper.runtime, runtime))
-
-        assert(wrapper.info.index === task.index)
-        assert(wrapper.info.attempt === task.attemptNumber)
-        assert(wrapper.info.launchTime === new Date(task.launchTime))
-        assert(wrapper.info.executorId === task.executorId)
-        assert(wrapper.info.host === task.host)
-        assert(wrapper.info.status === task.status)
-        assert(wrapper.info.taskLocality === task.taskLocality.toString())
-        assert(wrapper.info.speculative === task.speculative)
+        assert(wrapper.index === task.index)
+        assert(wrapper.attempt === task.attemptNumber)
+        assert(wrapper.launchTime === task.launchTime)
+        assert(wrapper.executorId === task.executorId)
+        assert(wrapper.host === task.host)
+        assert(wrapper.status === task.status)
+        assert(wrapper.taskLocality === task.taskLocality.toString())
+        assert(wrapper.speculative === task.speculative)
       }
     }
 
-    // Send executor metrics update. Only update one metric to avoid a lot of boilerplate code.
-    s1Tasks.foreach { task =>
-      val accum = new AccumulableInfo(1L, Some(InternalAccumulator.MEMORY_BYTES_SPILLED),
-        Some(1L), None, true, false, None)
-      listener.onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate(
-        task.executorId,
-        Seq((task.taskId, stages.head.stageId, stages.head.attemptId, Seq(accum)))))
-    }
+    // Send two executor metrics update. Only update one metric to avoid a lot of boilerplate code.
+    // The tasks are distributed among the two executors, so the executor-level metrics should
+    // hold half of the cummulative value of the metric being updated.
+    Seq(1L, 2L).foreach { value =>
+      s1Tasks.foreach { task =>
+        val accum = new AccumulableInfo(1L, Some(InternalAccumulator.MEMORY_BYTES_SPILLED),
+          Some(value), None, true, false, None)
+        listener.onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate(
+          task.executorId,
+          Seq((task.taskId, stages.head.stageId, stages.head.attemptNumber, Seq(accum)))))
+      }
 
-    check[StageDataWrapper](key(stages.head)) { stage =>
-      assert(stage.info.memoryBytesSpilled === s1Tasks.size)
-    }
+      check[StageDataWrapper](key(stages.head)) { stage =>
+        assert(stage.info.memoryBytesSpilled === s1Tasks.size * value)
+      }
 
-    val execs = store.view(classOf[ExecutorStageSummaryWrapper]).index("stage")
-      .first(key(stages.head)).last(key(stages.head)).asScala.toSeq
-    assert(execs.size > 0)
-    execs.foreach { exec =>
-      assert(exec.info.memoryBytesSpilled === s1Tasks.size / 2)
+      val execs = store.view(classOf[ExecutorStageSummaryWrapper]).index("stage")
+        .first(key(stages.head)).last(key(stages.head)).asScala.toSeq
+      assert(execs.size > 0)
+      execs.foreach { exec =>
+        assert(exec.info.memoryBytesSpilled === s1Tasks.size * value / 2)
+      }
     }
 
     // Fail one of the tasks, re-start it.
     time += 1
     s1Tasks.head.markFinished(TaskState.FAILED, time)
-    listener.onTaskEnd(SparkListenerTaskEnd(stages.head.stageId, stages.head.attemptId,
+    listener.onTaskEnd(SparkListenerTaskEnd(stages.head.stageId, stages.head.attemptNumber,
       "taskType", TaskResultLost, s1Tasks.head, null))
 
     time += 1
     val reattempt = newAttempt(s1Tasks.head, nextTaskId())
-    listener.onTaskStart(SparkListenerTaskStart(stages.head.stageId, stages.head.attemptId,
+    listener.onTaskStart(SparkListenerTaskStart(stages.head.stageId, stages.head.attemptNumber,
       reattempt))
 
     assert(store.count(classOf[TaskDataWrapper]) === s1Tasks.size + 1)
@@ -282,13 +275,13 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     }
 
     check[TaskDataWrapper](s1Tasks.head.taskId) { task =>
-      assert(task.info.status === s1Tasks.head.status)
-      assert(task.info.errorMessage == Some(TaskResultLost.toErrorString))
+      assert(task.status === s1Tasks.head.status)
+      assert(task.errorMessage == Some(TaskResultLost.toErrorString))
     }
 
     check[TaskDataWrapper](reattempt.taskId) { task =>
-      assert(task.info.index === s1Tasks.head.index)
-      assert(task.info.attempt === reattempt.attemptNumber)
+      assert(task.index === s1Tasks.head.index)
+      assert(task.attempt === reattempt.attemptNumber)
     }
 
     // Kill one task, restart it.
@@ -296,7 +289,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     val killed = s1Tasks.drop(1).head
     killed.finishTime = time
     killed.failed = true
-    listener.onTaskEnd(SparkListenerTaskEnd(stages.head.stageId, stages.head.attemptId,
+    listener.onTaskEnd(SparkListenerTaskEnd(stages.head.stageId, stages.head.attemptNumber,
       "taskType", TaskKilled("killed"), killed, null))
 
     check[JobDataWrapper](1) { job =>
@@ -310,21 +303,21 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     }
 
     check[TaskDataWrapper](killed.taskId) { task =>
-      assert(task.info.index === killed.index)
-      assert(task.info.errorMessage === Some("killed"))
+      assert(task.index === killed.index)
+      assert(task.errorMessage === Some("killed"))
     }
 
     // Start a new attempt and finish it with TaskCommitDenied, make sure it's handled like a kill.
     time += 1
     val denied = newAttempt(killed, nextTaskId())
     val denyReason = TaskCommitDenied(1, 1, 1)
-    listener.onTaskStart(SparkListenerTaskStart(stages.head.stageId, stages.head.attemptId,
+    listener.onTaskStart(SparkListenerTaskStart(stages.head.stageId, stages.head.attemptNumber,
       denied))
 
     time += 1
     denied.finishTime = time
     denied.failed = true
-    listener.onTaskEnd(SparkListenerTaskEnd(stages.head.stageId, stages.head.attemptId,
+    listener.onTaskEnd(SparkListenerTaskEnd(stages.head.stageId, stages.head.attemptNumber,
       "taskType", denyReason, denied, null))
 
     check[JobDataWrapper](1) { job =>
@@ -338,13 +331,13 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     }
 
     check[TaskDataWrapper](denied.taskId) { task =>
-      assert(task.info.index === killed.index)
-      assert(task.info.errorMessage === Some(denyReason.toErrorString))
+      assert(task.index === killed.index)
+      assert(task.errorMessage === Some(denyReason.toErrorString))
     }
 
     // Start a new attempt.
     val reattempt2 = newAttempt(denied, nextTaskId())
-    listener.onTaskStart(SparkListenerTaskStart(stages.head.stageId, stages.head.attemptId,
+    listener.onTaskStart(SparkListenerTaskStart(stages.head.stageId, stages.head.attemptNumber,
       reattempt2))
 
     // Succeed all tasks in stage 1.
@@ -357,7 +350,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     time += 1
     pending.foreach { task =>
       task.markFinished(TaskState.FINISHED, time)
-      listener.onTaskEnd(SparkListenerTaskEnd(stages.head.stageId, stages.head.attemptId,
+      listener.onTaskEnd(SparkListenerTaskEnd(stages.head.stageId, stages.head.attemptNumber,
         "taskType", Success, task, s1Metrics))
     }
 
@@ -377,10 +370,10 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     pending.foreach { task =>
       check[TaskDataWrapper](task.taskId) { wrapper =>
-        assert(wrapper.info.errorMessage === None)
-        assert(wrapper.info.taskMetrics.get.executorCpuTime === 2L)
-        assert(wrapper.info.taskMetrics.get.executorRunTime === 4L)
-        assert(wrapper.info.duration === Some(task.duration))
+        assert(wrapper.errorMessage === None)
+        assert(wrapper.executorCpuTime === 2L)
+        assert(wrapper.executorRunTime === 4L)
+        assert(wrapper.duration === task.duration)
       }
     }
 
@@ -419,15 +412,17 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     // Start and fail all tasks of stage 2.
     time += 1
-    val s2Tasks = createTasks(4, time)
+    val s2Tasks = createTasks(4, execIds)
     s2Tasks.foreach { task =>
-      listener.onTaskStart(SparkListenerTaskStart(stages.last.stageId, stages.last.attemptId, task))
+      listener.onTaskStart(SparkListenerTaskStart(stages.last.stageId,
+        stages.last.attemptNumber,
+        task))
     }
 
     time += 1
     s2Tasks.foreach { task =>
       task.markFinished(TaskState.FAILED, time)
-      listener.onTaskEnd(SparkListenerTaskEnd(stages.last.stageId, stages.last.attemptId,
+      listener.onTaskEnd(SparkListenerTaskEnd(stages.last.stageId, stages.last.attemptNumber,
         "taskType", TaskResultLost, task, null))
     }
 
@@ -462,7 +457,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     // - Re-submit stage 2, all tasks, and succeed them and the stage.
     val oldS2 = stages.last
-    val newS2 = new StageInfo(oldS2.stageId, oldS2.attemptId + 1, oldS2.name, oldS2.numTasks,
+    val newS2 = new StageInfo(oldS2.stageId, oldS2.attemptNumber + 1, oldS2.name, oldS2.numTasks,
       oldS2.rddInfos, oldS2.parentIds, oldS2.details, oldS2.taskMetrics)
 
     time += 1
@@ -470,17 +465,17 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     listener.onStageSubmitted(SparkListenerStageSubmitted(newS2, jobProps))
     assert(store.count(classOf[StageDataWrapper]) === 3)
 
-    val newS2Tasks = createTasks(4, time)
+    val newS2Tasks = createTasks(4, execIds)
 
     newS2Tasks.foreach { task =>
-      listener.onTaskStart(SparkListenerTaskStart(newS2.stageId, newS2.attemptId, task))
+      listener.onTaskStart(SparkListenerTaskStart(newS2.stageId, newS2.attemptNumber, task))
     }
 
     time += 1
     newS2Tasks.foreach { task =>
       task.markFinished(TaskState.FINISHED, time)
-      listener.onTaskEnd(SparkListenerTaskEnd(newS2.stageId, newS2.attemptId, "taskType", Success,
-        task, null))
+      listener.onTaskEnd(SparkListenerTaskEnd(newS2.stageId, newS2.attemptNumber, "taskType",
+        Success, task, null))
     }
 
     time += 1
@@ -526,17 +521,18 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     assert(store.count(classOf[StageDataWrapper]) === 5)
 
     time += 1
-    val j2s2Tasks = createTasks(4, time)
+    val j2s2Tasks = createTasks(4, execIds)
 
     j2s2Tasks.foreach { task =>
-      listener.onTaskStart(SparkListenerTaskStart(j2Stages.last.stageId, j2Stages.last.attemptId,
+      listener.onTaskStart(SparkListenerTaskStart(j2Stages.last.stageId,
+        j2Stages.last.attemptNumber,
         task))
     }
 
     time += 1
     j2s2Tasks.foreach { task =>
       task.markFinished(TaskState.FINISHED, time)
-      listener.onTaskEnd(SparkListenerTaskEnd(j2Stages.last.stageId, j2Stages.last.attemptId,
+      listener.onTaskEnd(SparkListenerTaskEnd(j2Stages.last.stageId, j2Stages.last.attemptNumber,
         "taskType", Success, task, null))
     }
 
@@ -587,8 +583,9 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     }
 
     // Stop executors.
-    listener.onExecutorRemoved(SparkListenerExecutorRemoved(41L, "1", "Test"))
-    listener.onExecutorRemoved(SparkListenerExecutorRemoved(41L, "2", "Test"))
+    time += 1
+    listener.onExecutorRemoved(SparkListenerExecutorRemoved(time, "1", "Test"))
+    listener.onExecutorRemoved(SparkListenerExecutorRemoved(time, "2", "Test"))
 
     Seq("1", "2").foreach { id =>
       check[ExecutorSummaryWrapper](id) { exec =>
@@ -851,7 +848,140 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     }
   }
 
-  private def key(stage: StageInfo): Array[Int] = Array(stage.stageId, stage.attemptId)
+  test("eviction of old data") {
+    val testConf = conf.clone()
+      .set(MAX_RETAINED_JOBS, 2)
+      .set(MAX_RETAINED_STAGES, 2)
+      .set(MAX_RETAINED_TASKS_PER_STAGE, 2)
+      .set(MAX_RETAINED_DEAD_EXECUTORS, 1)
+    val listener = new AppStatusListener(store, testConf, true)
+
+    // Start 3 jobs, all should be kept. Stop one, it should be evicted.
+    time += 1
+    listener.onJobStart(SparkListenerJobStart(1, time, Nil, null))
+    listener.onJobStart(SparkListenerJobStart(2, time, Nil, null))
+    listener.onJobStart(SparkListenerJobStart(3, time, Nil, null))
+    assert(store.count(classOf[JobDataWrapper]) === 3)
+
+    time += 1
+    listener.onJobEnd(SparkListenerJobEnd(2, time, JobSucceeded))
+    assert(store.count(classOf[JobDataWrapper]) === 2)
+    intercept[NoSuchElementException] {
+      store.read(classOf[JobDataWrapper], 2)
+    }
+
+    // Start 3 stages, all should be kept. Stop 2 of them, the stopped one with the lowest id should
+    // be deleted. Start a new attempt of the second stopped one, and verify that the stage graph
+    // data is not deleted.
+    time += 1
+    val stages = Seq(
+      new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1"),
+      new StageInfo(2, 0, "stage2", 4, Nil, Nil, "details2"),
+      new StageInfo(3, 0, "stage3", 4, Nil, Nil, "details3"))
+
+    // Graph data is generated by the job start event, so fire it.
+    listener.onJobStart(SparkListenerJobStart(4, time, stages, null))
+
+    stages.foreach { s =>
+      time += 1
+      s.submissionTime = Some(time)
+      listener.onStageSubmitted(SparkListenerStageSubmitted(s, new Properties()))
+    }
+
+    assert(store.count(classOf[StageDataWrapper]) === 3)
+    assert(store.count(classOf[RDDOperationGraphWrapper]) === 3)
+
+    val dropped = stages.drop(1).head
+
+    // Cache some quantiles by calling AppStatusStore.taskSummary(). For quantiles to be
+    // calculcated, we need at least one finished task.
+    time += 1
+    val task = createTasks(1, Array("1")).head
+    listener.onTaskStart(SparkListenerTaskStart(dropped.stageId, dropped.attemptId, task))
+
+    time += 1
+    task.markFinished(TaskState.FINISHED, time)
+    listener.onTaskEnd(SparkListenerTaskEnd(dropped.stageId, dropped.attemptId,
+      "taskType", Success, task, null))
+
+    new AppStatusStore(store)
+      .taskSummary(dropped.stageId, dropped.attemptId, Array(0.25d, 0.50d, 0.75d))
+    assert(store.count(classOf[CachedQuantile], "stage", key(dropped)) === 3)
+
+    stages.drop(1).foreach { s =>
+      time += 1
+      s.completionTime = Some(time)
+      listener.onStageCompleted(SparkListenerStageCompleted(s))
+    }
+
+    assert(store.count(classOf[StageDataWrapper]) === 2)
+    assert(store.count(classOf[RDDOperationGraphWrapper]) === 2)
+    intercept[NoSuchElementException] {
+      store.read(classOf[StageDataWrapper], Array(2, 0))
+    }
+    assert(store.count(classOf[CachedQuantile], "stage", key(dropped)) === 0)
+
+    val attempt2 = new StageInfo(3, 1, "stage3", 4, Nil, Nil, "details3")
+    time += 1
+    attempt2.submissionTime = Some(time)
+    listener.onStageSubmitted(SparkListenerStageSubmitted(attempt2, new Properties()))
+
+    assert(store.count(classOf[StageDataWrapper]) === 2)
+    assert(store.count(classOf[RDDOperationGraphWrapper]) === 2)
+    intercept[NoSuchElementException] {
+      store.read(classOf[StageDataWrapper], Array(2, 0))
+    }
+    intercept[NoSuchElementException] {
+      store.read(classOf[StageDataWrapper], Array(3, 0))
+    }
+    store.read(classOf[StageDataWrapper], Array(3, 1))
+
+    // Start 2 tasks. Finish the second one.
+    time += 1
+    val tasks = createTasks(2, Array("1"))
+    tasks.foreach { task =>
+      listener.onTaskStart(SparkListenerTaskStart(attempt2.stageId, attempt2.attemptNumber, task))
+    }
+    assert(store.count(classOf[TaskDataWrapper]) === 2)
+
+    // Start a 3rd task. The finished tasks should be deleted.
+    createTasks(1, Array("1")).foreach { task =>
+      listener.onTaskStart(SparkListenerTaskStart(attempt2.stageId, attempt2.attemptNumber, task))
+    }
+    assert(store.count(classOf[TaskDataWrapper]) === 2)
+    intercept[NoSuchElementException] {
+      store.read(classOf[TaskDataWrapper], tasks.last.id)
+    }
+
+    // Start a 4th task. The first task should be deleted, even if it's still running.
+    createTasks(1, Array("1")).foreach { task =>
+      listener.onTaskStart(SparkListenerTaskStart(attempt2.stageId, attempt2.attemptNumber, task))
+    }
+    assert(store.count(classOf[TaskDataWrapper]) === 2)
+    intercept[NoSuchElementException] {
+      store.read(classOf[TaskDataWrapper], tasks.head.id)
+    }
+  }
+
+  test("driver logs") {
+    val listener = new AppStatusListener(store, conf, true)
+
+    val driver = BlockManagerId(SparkContext.DRIVER_IDENTIFIER, "localhost", 42)
+    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(time, driver, 42L))
+    listener.onApplicationStart(SparkListenerApplicationStart(
+      "name",
+      Some("id"),
+      time,
+      "user",
+      Some("attempt"),
+      Some(Map("stdout" -> "file.txt"))))
+
+    check[ExecutorSummaryWrapper](SparkContext.DRIVER_IDENTIFIER) { d =>
+      assert(d.info.executorLogs("stdout") === "file.txt")
+    }
+  }
+
+  private def key(stage: StageInfo): Array[Int] = Array(stage.stageId, stage.attemptNumber)
 
   private def check[T: ClassTag](key: Any)(fn: T => Unit): Unit = {
     val value = store.read(classTag[T].runtimeClass, key).asInstanceOf[T]
@@ -864,6 +994,20 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
       s"${orig.executorId}.example.com", TaskLocality.PROCESS_LOCAL, orig.speculative)
   }
 
+  private def createTasks(count: Int, execs: Array[String]): Seq[TaskInfo] = {
+    (1 to count).map { id =>
+      val exec = execs(id.toInt % execs.length)
+      val taskId = nextTaskId()
+      new TaskInfo(taskId, taskId.toInt, 1, time, exec, s"$exec.example.com",
+        TaskLocality.PROCESS_LOCAL, id % 2 == 0)
+    }
+  }
+
+  private def nextTaskId(): Long = {
+    taskIdTracker += 1
+    taskIdTracker
+  }
+
   private case class RddBlock(
       rddId: Int,
       partId: Int,
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
new file mode 100644
index 0000000000000..92f90f3d96ddf
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.status.api.v1.TaskMetricDistributions
+import org.apache.spark.util.Distribution
+import org.apache.spark.util.kvstore._
+
+class AppStatusStoreSuite extends SparkFunSuite {
+
+  private val uiQuantiles = Array(0.0, 0.25, 0.5, 0.75, 1.0)
+  private val stageId = 1
+  private val attemptId = 1
+
+  test("quantile calculation: 1 task") {
+    compareQuantiles(1, uiQuantiles)
+  }
+
+  test("quantile calculation: few tasks") {
+    compareQuantiles(4, uiQuantiles)
+  }
+
+  test("quantile calculation: more tasks") {
+    compareQuantiles(100, uiQuantiles)
+  }
+
+  test("quantile calculation: lots of tasks") {
+    compareQuantiles(4096, uiQuantiles)
+  }
+
+  test("quantile calculation: custom quantiles") {
+    compareQuantiles(4096, Array(0.01, 0.33, 0.5, 0.42, 0.69, 0.99))
+  }
+
+  test("quantile cache") {
+    val store = new InMemoryStore()
+    (0 until 4096).foreach { i => store.write(newTaskData(i)) }
+
+    val appStore = new AppStatusStore(store)
+
+    appStore.taskSummary(stageId, attemptId, Array(0.13d))
+    intercept[NoSuchElementException] {
+      store.read(classOf[CachedQuantile], Array(stageId, attemptId, "13"))
+    }
+
+    appStore.taskSummary(stageId, attemptId, Array(0.25d))
+    val d1 = store.read(classOf[CachedQuantile], Array(stageId, attemptId, "25"))
+
+    // Add a new task to force the cached quantile to be evicted, and make sure it's updated.
+    store.write(newTaskData(4096))
+    appStore.taskSummary(stageId, attemptId, Array(0.25d, 0.50d, 0.73d))
+
+    val d2 = store.read(classOf[CachedQuantile], Array(stageId, attemptId, "25"))
+    assert(d1.taskCount != d2.taskCount)
+
+    store.read(classOf[CachedQuantile], Array(stageId, attemptId, "50"))
+    intercept[NoSuchElementException] {
+      store.read(classOf[CachedQuantile], Array(stageId, attemptId, "73"))
+    }
+
+    assert(store.count(classOf[CachedQuantile]) === 2)
+  }
+
+  private def compareQuantiles(count: Int, quantiles: Array[Double]): Unit = {
+    val store = new InMemoryStore()
+    val values = (0 until count).map { i =>
+      val task = newTaskData(i)
+      store.write(task)
+      i.toDouble
+    }.toArray
+
+    val summary = new AppStatusStore(store).taskSummary(stageId, attemptId, quantiles).get
+    val dist = new Distribution(values, 0, values.length).getQuantiles(quantiles.sorted)
+
+    dist.zip(summary.executorRunTime).foreach { case (expected, actual) =>
+      assert(expected === actual)
+    }
+  }
+
+  private def newTaskData(i: Int): TaskDataWrapper = {
+    new TaskDataWrapper(
+      i, i, i, i, i, i, i.toString, i.toString, i.toString, i.toString, false, Nil, None,
+      i, i, i, i, i, i, i, i, i, i,
+      i, i, i, i, i, i, i, i, i, i,
+      i, i, i, i, stageId, attemptId)
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/status/ElementTrackingStoreSuite.scala b/core/src/test/scala/org/apache/spark/status/ElementTrackingStoreSuite.scala
new file mode 100644
index 0000000000000..07a7b58404c29
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/status/ElementTrackingStoreSuite.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status
+
+import org.mockito.Mockito._
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.util.kvstore._
+
+class ElementTrackingStoreSuite extends SparkFunSuite {
+
+  import config._
+
+  test("tracking for multiple types") {
+    val store = mock(classOf[KVStore])
+    val tracking = new ElementTrackingStore(store, new SparkConf()
+      .set(ASYNC_TRACKING_ENABLED, false))
+
+    var type1 = 0L
+    var type2 = 0L
+    var flushed = false
+
+    tracking.addTrigger(classOf[Type1], 100) { count =>
+      type1 = count
+    }
+    tracking.addTrigger(classOf[Type2], 1000) { count =>
+      type2 = count
+    }
+    tracking.onFlush {
+      flushed = true
+    }
+
+    when(store.count(classOf[Type1])).thenReturn(1L)
+    tracking.write(new Type1, true)
+    assert(type1 === 0L)
+    assert(type2 === 0L)
+
+    when(store.count(classOf[Type1])).thenReturn(100L)
+    tracking.write(new Type1, true)
+    assert(type1 === 0L)
+    assert(type2 === 0L)
+
+    when(store.count(classOf[Type1])).thenReturn(101L)
+    tracking.write(new Type1, true)
+    assert(type1 === 101L)
+    assert(type2 === 0L)
+
+    when(store.count(classOf[Type1])).thenReturn(200L)
+    tracking.write(new Type1, true)
+    assert(type1 === 200L)
+    assert(type2 === 0L)
+
+    when(store.count(classOf[Type2])).thenReturn(500L)
+    tracking.write(new Type2, true)
+    assert(type1 === 200L)
+    assert(type2 === 0L)
+
+    when(store.count(classOf[Type2])).thenReturn(1000L)
+    tracking.write(new Type2, true)
+    assert(type1 === 200L)
+    assert(type2 === 0L)
+
+    when(store.count(classOf[Type2])).thenReturn(2000L)
+    tracking.write(new Type2, true)
+    assert(type1 === 200L)
+    assert(type2 === 2000L)
+
+    tracking.close(false)
+    assert(flushed)
+    verify(store, never()).close()
+  }
+
+  private class Type1
+  private class Type2
+
+}
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
index 917db766f7f11..9c0699bc981f8 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
@@ -62,7 +62,7 @@ class BlockInfoManagerSuite extends SparkFunSuite with BeforeAndAfterEach {
   private def withTaskId[T](taskAttemptId: Long)(block: => T): T = {
     try {
       TaskContext.setTaskContext(
-        new TaskContextImpl(0, 0, taskAttemptId, 0, null, new Properties, null))
+        new TaskContextImpl(0, 0, 0, taskAttemptId, 0, null, new Properties, null))
       block
     } finally {
       TaskContext.unset()
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
index 7258fdf5efc0d..efdd02fff7871 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
@@ -118,7 +118,7 @@ class DiskStoreSuite extends SparkFunSuite {
     val chunks = chunkedByteBuffer.chunks
     assert(chunks.size === 2)
     for (chunk <- chunks) {
-      assert(chunk.limit === 10 * 1024)
+      assert(chunk.limit() === 10 * 1024)
     }
 
     val e = intercept[IllegalArgumentException]{
diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
index 46932a02f1a1b..0aeddf730cd35 100644
--- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
@@ -22,30 +22,27 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
-import org.mockito.Matchers.anyString
 import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
 
 import org.apache.spark._
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler._
 import org.apache.spark.status.AppStatusStore
+import org.apache.spark.status.config._
 import org.apache.spark.ui.jobs.{StagePage, StagesTab}
-import org.apache.spark.util.Utils
 
 class StagePageSuite extends SparkFunSuite with LocalSparkContext {
 
   private val peakExecutionMemory = 10
 
   test("peak execution memory should displayed") {
-    val conf = new SparkConf(false)
-    val html = renderStagePage(conf).toString().toLowerCase(Locale.ROOT)
+    val html = renderStagePage().toString().toLowerCase(Locale.ROOT)
     val targetString = "peak execution memory"
     assert(html.contains(targetString))
   }
 
   test("SPARK-10543: peak execution memory should be per-task rather than cumulative") {
-    val conf = new SparkConf(false)
-    val html = renderStagePage(conf).toString().toLowerCase(Locale.ROOT)
+    val html = renderStagePage().toString().toLowerCase(Locale.ROOT)
     // verify min/25/50/75/max show task value not cumulative values
     assert(html.contains(s"<td>$peakExecutionMemory.0 b</td>" * 5))
   }
@@ -54,13 +51,14 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext {
    * Render a stage page started with the given conf and return the HTML.
    * This also runs a dummy stage to populate the page with useful content.
    */
-  private def renderStagePage(conf: SparkConf): Seq[Node] = {
-    val bus = new ReplayListenerBus()
-    val store = AppStatusStore.createLiveStore(conf, l => bus.addListener(l))
+  private def renderStagePage(): Seq[Node] = {
+    val conf = new SparkConf(false).set(LIVE_ENTITY_UPDATE_PERIOD, 0L)
+    val statusStore = AppStatusStore.createLiveStore(conf)
+    val listener = statusStore.listener.get
 
     try {
       val tab = mock(classOf[StagesTab], RETURNS_SMART_NULLS)
-      when(tab.store).thenReturn(store)
+      when(tab.store).thenReturn(statusStore)
 
       val request = mock(classOf[HttpServletRequest])
       when(tab.conf).thenReturn(conf)
@@ -68,7 +66,7 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext {
       when(tab.headerTabs).thenReturn(Seq.empty)
       when(request.getParameter("id")).thenReturn("0")
       when(request.getParameter("attempt")).thenReturn("0")
-      val page = new StagePage(tab, store)
+      val page = new StagePage(tab, statusStore)
 
       // Simulate a stage in job progress listener
       val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, "details")
@@ -77,17 +75,17 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext {
         taskId =>
           val taskInfo = new TaskInfo(taskId, taskId, 0, 0, "0", "localhost", TaskLocality.ANY,
             false)
-          bus.postToAll(SparkListenerStageSubmitted(stageInfo))
-          bus.postToAll(SparkListenerTaskStart(0, 0, taskInfo))
+          listener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo))
+          listener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo))
           taskInfo.markFinished(TaskState.FINISHED, System.currentTimeMillis())
           val taskMetrics = TaskMetrics.empty
           taskMetrics.incPeakExecutionMemory(peakExecutionMemory)
-          bus.postToAll(SparkListenerTaskEnd(0, 0, "result", Success, taskInfo, taskMetrics))
+          listener.onTaskEnd(SparkListenerTaskEnd(0, 0, "result", Success, taskInfo, taskMetrics))
       }
-      bus.postToAll(SparkListenerStageCompleted(stageInfo))
+      listener.onStageCompleted(SparkListenerStageCompleted(stageInfo))
       page.render(request)
     } finally {
-      store.close()
+      statusStore.close()
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index df5f0b5335e82..ed51fc445fdfb 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -42,6 +42,7 @@ import org.apache.spark.deploy.history.HistoryServerSuite
 import org.apache.spark.internal.config.MEMORY_OFFHEAP_SIZE
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.status.api.v1.{JacksonMessageWriter, RDDDataDistribution, StageStatus}
+import org.apache.spark.status.config._
 
 private[spark] class SparkUICssErrorHandler extends DefaultCssErrorHandler {
 
@@ -130,7 +131,8 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
 
       val storageJson = getJson(ui, "storage/rdd")
       storageJson.children.length should be (1)
-      (storageJson \ "storageLevel").extract[String] should be (StorageLevels.DISK_ONLY.description)
+      (storageJson.children.head \ "storageLevel").extract[String] should be (
+        StorageLevels.DISK_ONLY.description)
       val rddJson = getJson(ui, "storage/rdd/0")
       (rddJson  \ "storageLevel").extract[String] should be (StorageLevels.DISK_ONLY.description)
 
@@ -149,7 +151,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
 
       val updatedStorageJson = getJson(ui, "storage/rdd")
       updatedStorageJson.children.length should be (1)
-      (updatedStorageJson \ "storageLevel").extract[String] should be (
+      (updatedStorageJson.children.head \ "storageLevel").extract[String] should be (
         StorageLevels.MEMORY_ONLY.description)
       val updatedRddJson = getJson(ui, "storage/rdd/0")
       (updatedRddJson  \ "storageLevel").extract[String] should be (
@@ -203,7 +205,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       }
       val stageJson = getJson(sc.ui.get, "stages")
       stageJson.children.length should be (1)
-      (stageJson \ "status").extract[String] should be (StageStatus.FAILED.name())
+      (stageJson.children.head \ "status").extract[String] should be (StageStatus.FAILED.name())
 
       // Regression test for SPARK-2105
       class NotSerializable
@@ -324,11 +326,11 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
         find(cssSelector(".progress-cell .progress")).get.text should be ("2/2 (1 failed)")
       }
       val jobJson = getJson(sc.ui.get, "jobs")
-      (jobJson \ "numTasks").extract[Int]should be (2)
-      (jobJson \ "numCompletedTasks").extract[Int] should be (3)
-      (jobJson \ "numFailedTasks").extract[Int] should be (1)
-      (jobJson \ "numCompletedStages").extract[Int] should be (2)
-      (jobJson \ "numFailedStages").extract[Int] should be (1)
+      (jobJson \\ "numTasks").extract[Int]should be (2)
+      (jobJson \\ "numCompletedTasks").extract[Int] should be (3)
+      (jobJson \\ "numFailedTasks").extract[Int] should be (1)
+      (jobJson \\ "numCompletedStages").extract[Int] should be (2)
+      (jobJson \\ "numFailedStages").extract[Int] should be (1)
       val stageJson = getJson(sc.ui.get, "stages")
 
       for {
@@ -525,14 +527,15 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
     }
   }
 
-  ignore("stage & job retention") {
+  test("stage & job retention") {
     val conf = new SparkConf()
       .setMaster("local")
       .setAppName("test")
       .set("spark.ui.enabled", "true")
       .set("spark.ui.port", "0")
-      .set("spark.ui.retainedStages", "3")
-      .set("spark.ui.retainedJobs", "2")
+      .set(MAX_RETAINED_STAGES, 3)
+      .set(MAX_RETAINED_JOBS, 2)
+      .set(ASYNC_TRACKING_ENABLED, false)
     val sc = new SparkContext(conf)
     assert(sc.ui.isDefined)
 
@@ -654,7 +657,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
         sc.ui.get.webUrl + "/api/v1/applications"))
       val appListJsonAst = JsonMethods.parse(appListRawJson)
       appListJsonAst.children.length should be (1)
-      val attempts = (appListJsonAst \ "attempts").children
+      val attempts = (appListJsonAst.children.head \ "attempts").children
       attempts.size should be (1)
       (attempts(0) \ "completed").extract[Boolean] should be (false)
       parseDate(attempts(0) \ "startTime") should be (sc.startTime)
diff --git a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
deleted file mode 100644
index 48be3be81755a..0000000000000
--- a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
+++ /dev/null
@@ -1,442 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ui.jobs
-
-import java.util.Properties
-
-import org.scalatest.Matchers
-
-import org.apache.spark._
-import org.apache.spark.{LocalSparkContext, SparkConf, Success}
-import org.apache.spark.executor._
-import org.apache.spark.scheduler._
-import org.apache.spark.ui.jobs.UIData.TaskUIData
-import org.apache.spark.util.{AccumulatorContext, Utils}
-
-class JobProgressListenerSuite extends SparkFunSuite with LocalSparkContext with Matchers {
-
-  val jobSubmissionTime = 1421191042750L
-  val jobCompletionTime = 1421191296660L
-
-  private def createStageStartEvent(stageId: Int) = {
-    val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, null, "")
-    SparkListenerStageSubmitted(stageInfo)
-  }
-
-  private def createStageEndEvent(stageId: Int, failed: Boolean = false) = {
-    val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, null, "")
-    if (failed) {
-      stageInfo.failureReason = Some("Failed!")
-    }
-    SparkListenerStageCompleted(stageInfo)
-  }
-
-  private def createJobStartEvent(
-      jobId: Int,
-      stageIds: Seq[Int],
-      jobGroup: Option[String] = None): SparkListenerJobStart = {
-    val stageInfos = stageIds.map { stageId =>
-      new StageInfo(stageId, 0, stageId.toString, 0, null, null, "")
-    }
-    val properties: Option[Properties] = jobGroup.map { groupId =>
-      val props = new Properties()
-      props.setProperty(SparkContext.SPARK_JOB_GROUP_ID, groupId)
-      props
-    }
-    SparkListenerJobStart(jobId, jobSubmissionTime, stageInfos, properties.orNull)
-  }
-
-  private def createJobEndEvent(jobId: Int, failed: Boolean = false) = {
-    val result = if (failed) JobFailed(new Exception("dummy failure")) else JobSucceeded
-    SparkListenerJobEnd(jobId, jobCompletionTime, result)
-  }
-
-  private def runJob(listener: SparkListener, jobId: Int, shouldFail: Boolean = false) {
-    val stagesThatWontBeRun = jobId * 200 to jobId * 200 + 10
-    val stageIds = jobId * 100 to jobId * 100 + 50
-    listener.onJobStart(createJobStartEvent(jobId, stageIds ++ stagesThatWontBeRun))
-    for (stageId <- stageIds) {
-      listener.onStageSubmitted(createStageStartEvent(stageId))
-      listener.onStageCompleted(createStageEndEvent(stageId, failed = stageId % 2 == 0))
-    }
-    listener.onJobEnd(createJobEndEvent(jobId, shouldFail))
-  }
-
-  private def assertActiveJobsStateIsEmpty(listener: JobProgressListener) {
-    listener.getSizesOfActiveStateTrackingCollections.foreach { case (fieldName, size) =>
-      assert(size === 0, s"$fieldName was not empty")
-    }
-  }
-
-  test("test LRU eviction of stages") {
-    def runWithListener(listener: JobProgressListener) : Unit = {
-      for (i <- 1 to 50) {
-        listener.onStageSubmitted(createStageStartEvent(i))
-        listener.onStageCompleted(createStageEndEvent(i))
-      }
-      assertActiveJobsStateIsEmpty(listener)
-    }
-    val conf = new SparkConf()
-    conf.set("spark.ui.retainedStages", 5.toString)
-    var listener = new JobProgressListener(conf)
-
-    // Test with 5 retainedStages
-    runWithListener(listener)
-    listener.completedStages.size should be (5)
-    listener.completedStages.map(_.stageId).toSet should be (Set(50, 49, 48, 47, 46))
-
-    // Test with 0 retainedStages
-    conf.set("spark.ui.retainedStages", 0.toString)
-    listener = new JobProgressListener(conf)
-    runWithListener(listener)
-    listener.completedStages.size should be (0)
-  }
-
-  test("test clearing of stageIdToActiveJobs") {
-    val conf = new SparkConf()
-    conf.set("spark.ui.retainedStages", 5.toString)
-    val listener = new JobProgressListener(conf)
-    val jobId = 0
-    val stageIds = 1 to 50
-    // Start a job with 50 stages
-    listener.onJobStart(createJobStartEvent(jobId, stageIds))
-    for (stageId <- stageIds) {
-      listener.onStageSubmitted(createStageStartEvent(stageId))
-    }
-    listener.stageIdToActiveJobIds.size should be > 0
-
-    // Complete the stages and job
-    for (stageId <- stageIds) {
-      listener.onStageCompleted(createStageEndEvent(stageId, failed = false))
-    }
-    listener.onJobEnd(createJobEndEvent(jobId, false))
-    assertActiveJobsStateIsEmpty(listener)
-    listener.stageIdToActiveJobIds.size should be (0)
-  }
-
-  test("test clearing of jobGroupToJobIds") {
-    def runWithListener(listener: JobProgressListener): Unit = {
-      // Run 50 jobs, each with one stage
-      for (jobId <- 0 to 50) {
-        listener.onJobStart(createJobStartEvent(jobId, Seq(0), jobGroup = Some(jobId.toString)))
-        listener.onStageSubmitted(createStageStartEvent(0))
-        listener.onStageCompleted(createStageEndEvent(0, failed = false))
-        listener.onJobEnd(createJobEndEvent(jobId, false))
-      }
-      assertActiveJobsStateIsEmpty(listener)
-    }
-    val conf = new SparkConf()
-    conf.set("spark.ui.retainedJobs", 5.toString)
-
-    var listener = new JobProgressListener(conf)
-    runWithListener(listener)
-    // This collection won't become empty, but it should be bounded by spark.ui.retainedJobs
-    listener.jobGroupToJobIds.size should be (5)
-
-    // Test with 0 jobs
-    conf.set("spark.ui.retainedJobs", 0.toString)
-    listener = new JobProgressListener(conf)
-    runWithListener(listener)
-    listener.jobGroupToJobIds.size should be (0)
-  }
-
-  test("test LRU eviction of jobs") {
-    val conf = new SparkConf()
-    conf.set("spark.ui.retainedStages", 5.toString)
-    conf.set("spark.ui.retainedJobs", 5.toString)
-    val listener = new JobProgressListener(conf)
-
-    // Run a bunch of jobs to get the listener into a state where we've exceeded both the
-    // job and stage retention limits:
-    for (jobId <- 1 to 10) {
-      runJob(listener, jobId, shouldFail = false)
-    }
-    for (jobId <- 200 to 210) {
-      runJob(listener, jobId, shouldFail = true)
-    }
-    assertActiveJobsStateIsEmpty(listener)
-    // Snapshot the sizes of various soft- and hard-size-limited collections:
-    val softLimitSizes = listener.getSizesOfSoftSizeLimitedCollections
-    val hardLimitSizes = listener.getSizesOfHardSizeLimitedCollections
-    // Run some more jobs:
-    for (jobId <- 11 to 50) {
-      runJob(listener, jobId, shouldFail = false)
-      // We shouldn't exceed the hard / soft limit sizes after the jobs have finished:
-      listener.getSizesOfSoftSizeLimitedCollections should be (softLimitSizes)
-      listener.getSizesOfHardSizeLimitedCollections should be (hardLimitSizes)
-    }
-
-    listener.completedJobs.size should be (5)
-    listener.completedJobs.map(_.jobId).toSet should be (Set(50, 49, 48, 47, 46))
-
-    for (jobId <- 51 to 100) {
-      runJob(listener, jobId, shouldFail = true)
-      // We shouldn't exceed the hard / soft limit sizes after the jobs have finished:
-      listener.getSizesOfSoftSizeLimitedCollections should be (softLimitSizes)
-      listener.getSizesOfHardSizeLimitedCollections should be (hardLimitSizes)
-    }
-    assertActiveJobsStateIsEmpty(listener)
-
-    // Completed and failed jobs each their own size limits, so this should still be the same:
-    listener.completedJobs.size should be (5)
-    listener.completedJobs.map(_.jobId).toSet should be (Set(50, 49, 48, 47, 46))
-    listener.failedJobs.size should be (5)
-    listener.failedJobs.map(_.jobId).toSet should be (Set(100, 99, 98, 97, 96))
-  }
-
-  test("test executor id to summary") {
-    val conf = new SparkConf()
-    val listener = new JobProgressListener(conf)
-    val taskMetrics = TaskMetrics.empty
-    val shuffleReadMetrics = taskMetrics.createTempShuffleReadMetrics()
-    assert(listener.stageIdToData.size === 0)
-
-    // finish this task, should get updated shuffleRead
-    shuffleReadMetrics.incRemoteBytesRead(1000)
-    taskMetrics.mergeShuffleReadMetrics()
-    var taskInfo = new TaskInfo(1234L, 0, 1, 0L, "exe-1", "host1", TaskLocality.NODE_LOCAL, false)
-    taskInfo.finishTime = 1
-    var task = new ShuffleMapTask(0)
-    val taskType = Utils.getFormattedClassName(task)
-    listener.onTaskEnd(
-      SparkListenerTaskEnd(task.stageId, 0, taskType, Success, taskInfo, taskMetrics))
-    assert(listener.stageIdToData.getOrElse((0, 0), fail())
-      .executorSummary.getOrElse("exe-1", fail()).shuffleRead === 1000)
-
-    // finish a task with unknown executor-id, nothing should happen
-    taskInfo =
-      new TaskInfo(1234L, 0, 1, 1000L, "exe-unknown", "host1", TaskLocality.NODE_LOCAL, true)
-    taskInfo.finishTime = 1
-    task = new ShuffleMapTask(0)
-    listener.onTaskEnd(
-      SparkListenerTaskEnd(task.stageId, 0, taskType, Success, taskInfo, taskMetrics))
-    assert(listener.stageIdToData.size === 1)
-
-    // finish this task, should get updated duration
-    taskInfo = new TaskInfo(1235L, 0, 1, 0L, "exe-1", "host1", TaskLocality.NODE_LOCAL, false)
-    taskInfo.finishTime = 1
-    task = new ShuffleMapTask(0)
-    listener.onTaskEnd(
-      SparkListenerTaskEnd(task.stageId, 0, taskType, Success, taskInfo, taskMetrics))
-    assert(listener.stageIdToData.getOrElse((0, 0), fail())
-      .executorSummary.getOrElse("exe-1", fail()).shuffleRead === 2000)
-
-    // finish this task, should get updated duration
-    taskInfo = new TaskInfo(1236L, 0, 2, 0L, "exe-2", "host1", TaskLocality.NODE_LOCAL, false)
-    taskInfo.finishTime = 1
-    task = new ShuffleMapTask(0)
-    listener.onTaskEnd(
-      SparkListenerTaskEnd(task.stageId, 0, taskType, Success, taskInfo, taskMetrics))
-    assert(listener.stageIdToData.getOrElse((0, 0), fail())
-      .executorSummary.getOrElse("exe-2", fail()).shuffleRead === 1000)
-  }
-
-  test("test task success vs failure counting for different task end reasons") {
-    val conf = new SparkConf()
-    val listener = new JobProgressListener(conf)
-    val metrics = TaskMetrics.empty
-    val taskInfo = new TaskInfo(1234L, 0, 3, 0L, "exe-1", "host1", TaskLocality.NODE_LOCAL, false)
-    taskInfo.finishTime = 1
-    val task = new ShuffleMapTask(0)
-    val taskType = Utils.getFormattedClassName(task)
-
-    // Go through all the failure cases to make sure we are counting them as failures.
-    val taskFailedReasons = Seq(
-      Resubmitted,
-      new FetchFailed(null, 0, 0, 0, "ignored"),
-      ExceptionFailure("Exception", "description", null, null, None),
-      TaskResultLost,
-      ExecutorLostFailure("0", true, Some("Induced failure")),
-      UnknownReason)
-    var failCount = 0
-    for (reason <- taskFailedReasons) {
-      listener.onTaskEnd(
-        SparkListenerTaskEnd(task.stageId, 0, taskType, reason, taskInfo, metrics))
-      failCount += 1
-      assert(listener.stageIdToData((task.stageId, 0)).numCompleteTasks === 0)
-      assert(listener.stageIdToData((task.stageId, 0)).numFailedTasks === failCount)
-    }
-
-    // Make sure killed tasks are accounted for correctly.
-    listener.onTaskEnd(
-      SparkListenerTaskEnd(
-        task.stageId, 0, taskType, TaskKilled("test"), taskInfo, metrics))
-    assert(listener.stageIdToData((task.stageId, 0)).reasonToNumKilled === Map("test" -> 1))
-
-    // Make sure we count success as success.
-    listener.onTaskEnd(
-      SparkListenerTaskEnd(task.stageId, 1, taskType, Success, taskInfo, metrics))
-    assert(listener.stageIdToData((task.stageId, 1)).numCompleteTasks === 1)
-    assert(listener.stageIdToData((task.stageId, 0)).numFailedTasks === failCount)
-  }
-
-  test("test update metrics") {
-    val conf = new SparkConf()
-    val listener = new JobProgressListener(conf)
-
-    val taskType = Utils.getFormattedClassName(new ShuffleMapTask(0))
-    val execId = "exe-1"
-
-    def makeTaskMetrics(base: Int): TaskMetrics = {
-      val taskMetrics = TaskMetrics.registered
-      val shuffleReadMetrics = taskMetrics.createTempShuffleReadMetrics()
-      val shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics
-      val inputMetrics = taskMetrics.inputMetrics
-      val outputMetrics = taskMetrics.outputMetrics
-      shuffleReadMetrics.incRemoteBytesRead(base + 1)
-      shuffleReadMetrics.incLocalBytesRead(base + 9)
-      shuffleReadMetrics.incRemoteBlocksFetched(base + 2)
-      taskMetrics.mergeShuffleReadMetrics()
-      shuffleWriteMetrics.incBytesWritten(base + 3)
-      taskMetrics.setExecutorRunTime(base + 4)
-      taskMetrics.incDiskBytesSpilled(base + 5)
-      taskMetrics.incMemoryBytesSpilled(base + 6)
-      inputMetrics.setBytesRead(base + 7)
-      outputMetrics.setBytesWritten(base + 8)
-      taskMetrics
-    }
-
-    def makeTaskInfo(taskId: Long, finishTime: Int = 0): TaskInfo = {
-      val taskInfo = new TaskInfo(taskId, 0, 1, 0L, execId, "host1", TaskLocality.NODE_LOCAL,
-        false)
-      taskInfo.finishTime = finishTime
-      taskInfo
-    }
-
-    listener.onTaskStart(SparkListenerTaskStart(0, 0, makeTaskInfo(1234L)))
-    listener.onTaskStart(SparkListenerTaskStart(0, 0, makeTaskInfo(1235L)))
-    listener.onTaskStart(SparkListenerTaskStart(1, 0, makeTaskInfo(1236L)))
-    listener.onTaskStart(SparkListenerTaskStart(1, 0, makeTaskInfo(1237L)))
-
-    listener.onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate(execId, Array(
-      (1234L, 0, 0, makeTaskMetrics(0).accumulators().map(AccumulatorSuite.makeInfo)),
-      (1235L, 0, 0, makeTaskMetrics(100).accumulators().map(AccumulatorSuite.makeInfo)),
-      (1236L, 1, 0, makeTaskMetrics(200).accumulators().map(AccumulatorSuite.makeInfo)))))
-
-    var stage0Data = listener.stageIdToData.get((0, 0)).get
-    var stage1Data = listener.stageIdToData.get((1, 0)).get
-    assert(stage0Data.shuffleReadTotalBytes == 220)
-    assert(stage1Data.shuffleReadTotalBytes == 410)
-    assert(stage0Data.shuffleWriteBytes == 106)
-    assert(stage1Data.shuffleWriteBytes == 203)
-    assert(stage0Data.executorRunTime == 108)
-    assert(stage1Data.executorRunTime == 204)
-    assert(stage0Data.diskBytesSpilled == 110)
-    assert(stage1Data.diskBytesSpilled == 205)
-    assert(stage0Data.memoryBytesSpilled == 112)
-    assert(stage1Data.memoryBytesSpilled == 206)
-    assert(stage0Data.inputBytes == 114)
-    assert(stage1Data.inputBytes == 207)
-    assert(stage0Data.outputBytes == 116)
-    assert(stage1Data.outputBytes == 208)
-
-    assert(
-      stage0Data.taskData.get(1234L).get.metrics.get.shuffleReadMetrics.totalBlocksFetched == 2)
-    assert(
-      stage0Data.taskData.get(1235L).get.metrics.get.shuffleReadMetrics.totalBlocksFetched == 102)
-    assert(
-      stage1Data.taskData.get(1236L).get.metrics.get.shuffleReadMetrics.totalBlocksFetched == 202)
-
-    // task that was included in a heartbeat
-    listener.onTaskEnd(SparkListenerTaskEnd(0, 0, taskType, Success, makeTaskInfo(1234L, 1),
-      makeTaskMetrics(300)))
-    // task that wasn't included in a heartbeat
-    listener.onTaskEnd(SparkListenerTaskEnd(1, 0, taskType, Success, makeTaskInfo(1237L, 1),
-      makeTaskMetrics(400)))
-
-    stage0Data = listener.stageIdToData.get((0, 0)).get
-    stage1Data = listener.stageIdToData.get((1, 0)).get
-    // Task 1235 contributed (100+1)+(100+9) = 210 shuffle bytes, and task 1234 contributed
-    // (300+1)+(300+9) = 610 total shuffle bytes, so the total for the stage is 820.
-    assert(stage0Data.shuffleReadTotalBytes == 820)
-    // Task 1236 contributed 410 shuffle bytes, and task 1237 contributed 810 shuffle bytes.
-    assert(stage1Data.shuffleReadTotalBytes == 1220)
-    assert(stage0Data.shuffleWriteBytes == 406)
-    assert(stage1Data.shuffleWriteBytes == 606)
-    assert(stage0Data.executorRunTime == 408)
-    assert(stage1Data.executorRunTime == 608)
-    assert(stage0Data.diskBytesSpilled == 410)
-    assert(stage1Data.diskBytesSpilled == 610)
-    assert(stage0Data.memoryBytesSpilled == 412)
-    assert(stage1Data.memoryBytesSpilled == 612)
-    assert(stage0Data.inputBytes == 414)
-    assert(stage1Data.inputBytes == 614)
-    assert(stage0Data.outputBytes == 416)
-    assert(stage1Data.outputBytes == 616)
-    assert(
-      stage0Data.taskData.get(1234L).get.metrics.get.shuffleReadMetrics.totalBlocksFetched == 302)
-    assert(
-      stage1Data.taskData.get(1237L).get.metrics.get.shuffleReadMetrics.totalBlocksFetched == 402)
-  }
-
-  test("drop internal and sql accumulators") {
-    val taskInfo = new TaskInfo(0, 0, 0, 0, "", "", TaskLocality.ANY, false)
-    val internalAccum =
-      AccumulableInfo(id = 1, name = Some("internal"), None, None, true, false, None)
-    val sqlAccum = AccumulableInfo(
-      id = 2,
-      name = Some("sql"),
-      update = None,
-      value = None,
-      internal = false,
-      countFailedValues = false,
-      metadata = Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER))
-    val userAccum = AccumulableInfo(
-      id = 3,
-      name = Some("user"),
-      update = None,
-      value = None,
-      internal = false,
-      countFailedValues = false,
-      metadata = None)
-    taskInfo.setAccumulables(List(internalAccum, sqlAccum, userAccum))
-
-    val newTaskInfo = TaskUIData.dropInternalAndSQLAccumulables(taskInfo)
-    assert(newTaskInfo.accumulables === Seq(userAccum))
-  }
-
-  test("SPARK-19146 drop more elements when stageData.taskData.size > retainedTasks") {
-    val conf = new SparkConf()
-    conf.set("spark.ui.retainedTasks", "100")
-    val taskMetrics = TaskMetrics.empty
-    taskMetrics.mergeShuffleReadMetrics()
-    val task = new ShuffleMapTask(0)
-    val taskType = Utils.getFormattedClassName(task)
-
-    val listener1 = new JobProgressListener(conf)
-    for (t <- 1 to 101) {
-      val taskInfo = new TaskInfo(t, 0, 1, 0L, "exe-1", "host1", TaskLocality.NODE_LOCAL, false)
-      taskInfo.finishTime = 1
-      listener1.onTaskEnd(
-        SparkListenerTaskEnd(task.stageId, 0, taskType, Success, taskInfo, taskMetrics))
-    }
-    // 101 - math.max(100 / 10, 101 - 100) = 91
-    assert(listener1.stageIdToData((task.stageId, task.stageAttemptId)).taskData.size === 91)
-
-    val listener2 = new JobProgressListener(conf)
-    for (t <- 1 to 150) {
-      val taskInfo = new TaskInfo(t, 0, 1, 0L, "exe-1", "host1", TaskLocality.NODE_LOCAL, false)
-      taskInfo.finishTime = 1
-      listener2.onTaskEnd(
-        SparkListenerTaskEnd(task.stageId, 0, taskType, Success, taskInfo, taskMetrics))
-    }
-    // 150 - math.max(100 / 10, 150 - 100) = 100
-    assert(listener2.stageIdToData((task.stageId, task.stageAttemptId)).taskData.size === 100)
-  }
-
-}
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index cd0ed5b036bf9..52cd5378bc715 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -356,7 +356,7 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
     generatedFiles
   }
 
-  /** Delete all the generated rolledover files */
+  /** Delete all the generated rolled over files */
   def cleanup() {
     testFile.getParentFile.listFiles.filter { file =>
       file.getName.startsWith(testFile.getName)
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 4d3adeb968e84..eaea6b030c154 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -1146,6 +1146,27 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     }
   }
 
+  test("check Kubernetes master URL") {
+    val k8sMasterURLHttps = Utils.checkAndGetK8sMasterUrl("k8s://https://host:port")
+    assert(k8sMasterURLHttps === "k8s://https://host:port")
+
+    val k8sMasterURLHttp = Utils.checkAndGetK8sMasterUrl("k8s://http://host:port")
+    assert(k8sMasterURLHttp === "k8s://http://host:port")
+
+    val k8sMasterURLWithoutScheme = Utils.checkAndGetK8sMasterUrl("k8s://127.0.0.1:8443")
+    assert(k8sMasterURLWithoutScheme === "k8s://https://127.0.0.1:8443")
+
+    val k8sMasterURLWithoutScheme2 = Utils.checkAndGetK8sMasterUrl("k8s://127.0.0.1")
+    assert(k8sMasterURLWithoutScheme2 === "k8s://https://127.0.0.1")
+
+    intercept[IllegalArgumentException] {
+      Utils.checkAndGetK8sMasterUrl("k8s:https://host:port")
+    }
+
+    intercept[IllegalArgumentException] {
+      Utils.checkAndGetK8sMasterUrl("k8s://foo://host:port")
+    }
+  }
 }
 
 private class SimpleExtension
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 5b43f9bab7505..a3579f21fc539 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -23,23 +23,19 @@ usage: release-build.sh <package|docs|publish-snapshot|publish-release>
 Creates build deliverables from a Spark commit.
 
 Top level targets are
-  package: Create binary packages and copy them to home.apache
-  docs: Build docs and copy them to home.apache
+  package: Create binary packages and commit them to dist.apache.org/repos/dist/dev/spark/
+  docs: Build docs and commit them to dist.apache.org/repos/dist/dev/spark/
   publish-snapshot: Publish snapshot release to Apache snapshots
   publish-release: Publish a release to Apache release repo
 
 All other inputs are environment variables
 
 GIT_REF - Release tag or commit to build from
-SPARK_VERSION - Version of Spark being built (e.g. 2.1.2)
 SPARK_PACKAGE_VERSION - Release identifier in top level package directory (e.g. 2.1.2-rc1)
-REMOTE_PARENT_DIR - Parent in which to create doc or release builds.
-REMOTE_PARENT_MAX_LENGTH - If set, parent directory will be cleaned to only
- have this number of subdirectories (by deleting old ones). WARNING: This deletes data.
+SPARK_VERSION - (optional) Version of Spark being built (e.g. 2.1.2)
 
 ASF_USERNAME - Username of ASF committer account
 ASF_PASSWORD - Password of ASF committer account
-ASF_RSA_KEY - RSA private key file for ASF committer account
 
 GPG_KEY - GPG key used to sign release artifacts
 GPG_PASSPHRASE - Passphrase for GPG key
@@ -57,7 +53,20 @@ if [[ $@ == *"help"* ]]; then
   exit_with_usage
 fi
 
-for env in ASF_USERNAME ASF_RSA_KEY GPG_PASSPHRASE GPG_KEY; do
+if [[ -z "$ASF_PASSWORD" ]]; then
+  echo 'The environment variable ASF_PASSWORD is not set. Enter the password.'
+  echo
+  stty -echo && printf "ASF password: " && read ASF_PASSWORD && printf '\n' && stty echo
+fi
+
+if [[ -z "$GPG_PASSPHRASE" ]]; then
+  echo 'The environment variable GPG_PASSPHRASE is not set. Enter the passphrase to'
+  echo 'unlock the GPG signing key that will be used to sign the release!'
+  echo
+  stty -echo && printf "GPG passphrase: " && read GPG_PASSPHRASE && printf '\n' && stty echo
+fi
+
+for env in ASF_USERNAME GPG_PASSPHRASE GPG_KEY; do
   if [ -z "${!env}" ]; then
     echo "ERROR: $env must be set to run this script"
     exit_with_usage
@@ -71,8 +80,7 @@ export LC_ALL=C
 # Commit ref to checkout when building
 GIT_REF=${GIT_REF:-master}
 
-# Destination directory parent on remote server
-REMOTE_PARENT_DIR=${REMOTE_PARENT_DIR:-/home/$ASF_USERNAME/public_html}
+RELEASE_STAGING_LOCATION="https://dist.apache.org/repos/dist/dev/spark"
 
 GPG="gpg -u $GPG_KEY --no-tty --batch"
 NEXUS_ROOT=https://repository.apache.org/service/local/staging
@@ -84,9 +92,9 @@ MVN="build/mvn --force"
 # Hive-specific profiles for some builds
 HIVE_PROFILES="-Phive -Phive-thriftserver"
 # Profiles for publishing snapshots and release to Maven Central
-PUBLISH_PROFILES="-Pmesos -Pyarn -Pflume $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl"
+PUBLISH_PROFILES="-Pmesos -Pyarn -Pkubernetes -Pflume $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl"
 # Profiles for building binary releases
-BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Pflume -Psparkr"
+BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Pkubernetes -Pflume -Psparkr"
 # Scala 2.11 only profiles for some builds
 SCALA_2_11_PROFILES="-Pkafka-0-8"
 # Scala 2.12 only profiles for some builds
@@ -142,42 +150,16 @@ if [ -z "$SPARK_PACKAGE_VERSION" ]; then
   SPARK_PACKAGE_VERSION="${SPARK_VERSION}-$(date +%Y_%m_%d_%H_%M)-${git_hash}"
 fi
 
-DEST_DIR_NAME="spark-$SPARK_PACKAGE_VERSION"
-
-function LFTP {
-  SSH="ssh -o ConnectTimeout=300 -o StrictHostKeyChecking=no -i $ASF_RSA_KEY"
-  COMMANDS=$(cat <<EOF
-     set net:max-retries 2 &&
-     set sftp:connect-program $SSH &&
-     connect -u $ASF_USERNAME,p sftp://home.apache.org &&
-     $@
-EOF
-)
-  lftp --norc -c "$COMMANDS"
-}
-export -f LFTP
-
+DEST_DIR_NAME="$SPARK_PACKAGE_VERSION"
 
 git clean -d -f -x
 rm .gitignore
 rm -rf .git
 cd ..
 
-if [ -n "$REMOTE_PARENT_MAX_LENGTH" ]; then
-  old_dirs=$(
-    LFTP nlist $REMOTE_PARENT_DIR \
-        | grep -v "^\." \
-        | sort -r \
-        | tail -n +$REMOTE_PARENT_MAX_LENGTH)
-  for old_dir in $old_dirs; do
-    echo "Removing directory: $old_dir"
-    LFTP "rm -rf $REMOTE_PARENT_DIR/$old_dir && exit 0"
-  done
-fi
-
 if [[ "$1" == "package" ]]; then
   # Source and binary tarballs
-  echo "Packaging release tarballs"
+  echo "Packaging release source tarballs"
   cp -r spark spark-$SPARK_VERSION
   tar cvzf spark-$SPARK_VERSION.tgz spark-$SPARK_VERSION
   echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour --output spark-$SPARK_VERSION.tgz.asc \
@@ -185,7 +167,7 @@ if [[ "$1" == "package" ]]; then
   echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md MD5 spark-$SPARK_VERSION.tgz > \
     spark-$SPARK_VERSION.tgz.md5
   echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
-    SHA512 spark-$SPARK_VERSION.tgz > spark-$SPARK_VERSION.tgz.sha
+    SHA512 spark-$SPARK_VERSION.tgz > spark-$SPARK_VERSION.tgz.sha512
   rm -rf spark-$SPARK_VERSION
 
   # Updated for each binary build
@@ -238,7 +220,7 @@ if [[ "$1" == "package" ]]; then
         $R_DIST_NAME.md5
       echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
         SHA512 $R_DIST_NAME > \
-        $R_DIST_NAME.sha
+        $R_DIST_NAME.sha512
     else
       echo "Creating distribution with PIP package"
       ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz --pip $FLAGS \
@@ -257,7 +239,7 @@ if [[ "$1" == "package" ]]; then
         $PYTHON_DIST_NAME.md5
       echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
         SHA512 $PYTHON_DIST_NAME > \
-        $PYTHON_DIST_NAME.sha
+        $PYTHON_DIST_NAME.sha512
     fi
 
     echo "Copying and signing regular binary distribution"
@@ -270,7 +252,7 @@ if [[ "$1" == "package" ]]; then
       spark-$SPARK_VERSION-bin-$NAME.tgz.md5
     echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
       SHA512 spark-$SPARK_VERSION-bin-$NAME.tgz > \
-      spark-$SPARK_VERSION-bin-$NAME.tgz.sha
+      spark-$SPARK_VERSION-bin-$NAME.tgz.sha512
   }
 
   # TODO: Check exit codes of children here:
@@ -284,22 +266,20 @@ if [[ "$1" == "package" ]]; then
   wait
   rm -rf spark-$SPARK_VERSION-bin-*/
 
-  # Copy data
-  dest_dir="$REMOTE_PARENT_DIR/${DEST_DIR_NAME}-bin"
-  echo "Copying release tarballs to $dest_dir"
-  # Put to new directory:
-  LFTP mkdir -p $dest_dir || true
-  LFTP mput -O $dest_dir 'spark-*'
-  LFTP mput -O $dest_dir 'pyspark-*'
-  LFTP mput -O $dest_dir 'SparkR_*'
-  # Delete /latest directory and rename new upload to /latest
-  LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0"
-  LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest"
-  # Re-upload a second time and leave the files in the timestamped upload directory:
-  LFTP mkdir -p $dest_dir || true
-  LFTP mput -O $dest_dir 'spark-*'
-  LFTP mput -O $dest_dir 'pyspark-*'
-  LFTP mput -O $dest_dir 'SparkR_*'
+  svn co --depth=empty $RELEASE_STAGING_LOCATION svn-spark
+  rm -rf "svn-spark/${DEST_DIR_NAME}-bin"
+  mkdir -p "svn-spark/${DEST_DIR_NAME}-bin"
+
+  echo "Copying release tarballs"
+  cp spark-* "svn-spark/${DEST_DIR_NAME}-bin/"
+  cp pyspark-* "svn-spark/${DEST_DIR_NAME}-bin/"
+  cp SparkR_* "svn-spark/${DEST_DIR_NAME}-bin/"
+  svn add "svn-spark/${DEST_DIR_NAME}-bin"
+
+  cd svn-spark
+  svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m"Apache Spark $SPARK_PACKAGE_VERSION"
+  cd ..
+  rm -rf svn-spark
   exit 0
 fi
 
@@ -307,21 +287,24 @@ if [[ "$1" == "docs" ]]; then
   # Documentation
   cd spark
   echo "Building Spark docs"
-  dest_dir="$REMOTE_PARENT_DIR/${DEST_DIR_NAME}-docs"
   cd docs
   # TODO: Make configurable to add this: PRODUCTION=1
   PRODUCTION=1 RELEASE_VERSION="$SPARK_VERSION" jekyll build
-  echo "Copying release documentation to $dest_dir"
-  # Put to new directory:
-  LFTP mkdir -p $dest_dir || true
-  LFTP mirror -R _site $dest_dir
-  # Delete /latest directory and rename new upload to /latest
-  LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0"
-  LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest"
-  # Re-upload a second time and leave the files in the timestamped upload directory:
-  LFTP mkdir -p $dest_dir || true
-  LFTP mirror -R _site $dest_dir
   cd ..
+  cd ..
+
+  svn co --depth=empty $RELEASE_STAGING_LOCATION svn-spark
+  rm -rf "svn-spark/${DEST_DIR_NAME}-docs"
+  mkdir -p "svn-spark/${DEST_DIR_NAME}-docs"
+
+  echo "Copying release documentation"
+  cp -R "spark/docs/_site" "svn-spark/${DEST_DIR_NAME}-docs/"
+  svn add "svn-spark/${DEST_DIR_NAME}-docs"
+
+  cd svn-spark
+  svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m"Apache Spark $SPARK_PACKAGE_VERSION docs"
+  cd ..
+  rm -rf svn-spark
   exit 0
 fi
 
@@ -399,6 +382,7 @@ if [[ "$1" == "publish-release" ]]; then
   find . -type f |grep -v \.jar |grep -v \.pom | xargs rm
 
   echo "Creating hash and signature files"
+  # this must have .asc, .md5 and .sha1 - it really doesn't like anything else there
   for file in $(find . -type f)
   do
     echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --output $file.asc \
diff --git a/dev/create-release/release-tag.sh b/dev/create-release/release-tag.sh
index 370a62ce15bc4..a05716a5f66bb 100755
--- a/dev/create-release/release-tag.sh
+++ b/dev/create-release/release-tag.sh
@@ -41,6 +41,12 @@ if [[ $@ == *"help"* ]]; then
   exit_with_usage
 fi
 
+if [[ -z "$ASF_PASSWORD" ]]; then
+  echo 'The environment variable ASF_PASSWORD is not set. Enter the password.'
+  echo
+  stty -echo && printf "ASF password: " && read ASF_PASSWORD && printf '\n' && stty echo
+fi
+
 for env in ASF_USERNAME ASF_PASSWORD RELEASE_VERSION RELEASE_TAG NEXT_VERSION GIT_EMAIL GIT_NAME GIT_BRANCH; do
   if [ -z "${!env}" ]; then
     echo "$env must be set to run this script"
@@ -52,7 +58,7 @@ ASF_SPARK_REPO="git-wip-us.apache.org/repos/asf/spark.git"
 MVN="build/mvn --force"
 
 rm -rf spark
-git clone https://$ASF_USERNAME:$ASF_PASSWORD@$ASF_SPARK_REPO -b $GIT_BRANCH
+git clone "https://$ASF_USERNAME:$ASF_PASSWORD@$ASF_SPARK_REPO" -b $GIT_BRANCH
 cd spark
 
 git config user.name "$GIT_NAME"
diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py
index 730138195e5fe..32f6cbb29f0be 100755
--- a/dev/create-release/releaseutils.py
+++ b/dev/create-release/releaseutils.py
@@ -185,6 +185,8 @@ def get_commits(tag):
     "graphx": "GraphX",
     "input/output": CORE_COMPONENT,
     "java api": "Java API",
+    "k8s": "Kubernetes",
+    "kubernetes": "Kubernetes",
     "mesos": "Mesos",
     "ml": "MLlib",
     "mllib": "MLlib",
diff --git a/dev/deps/spark-deps-hadoop-palantir b/dev/deps/spark-deps-hadoop-palantir
index fee1fbb36ff63..6d2b0a65589ff 100644
--- a/dev/deps/spark-deps-hadoop-palantir
+++ b/dev/deps/spark-deps-hadoop-palantir
@@ -14,9 +14,9 @@ apacheds-kerberos-codec-2.0.0-M15.jar
 api-asn1-api-1.0.0-M20.jar
 api-util-1.0.0-M20.jar
 arpack_combined_all-0.1.jar
-arrow-format-0.4.0.jar
-arrow-memory-0.4.0.jar
-arrow-vector-0.4.0.jar
+arrow-format-0.8.0.jar
+arrow-memory-0.8.0.jar
+arrow-vector-0.8.0.jar
 automaton-1.11-8.jar
 avro-1.8.1.jar
 avro-ipc-1.8.1.jar
@@ -25,8 +25,8 @@ aws-java-sdk-core-1.11.45.jar
 aws-java-sdk-kms-1.11.45.jar
 aws-java-sdk-s3-1.11.45.jar
 base64-2.3.8.jar
-bcpkix-jdk15on-1.54.jar
-bcprov-jdk15on-1.54.jar
+bcpkix-jdk15on-1.58.jar
+bcprov-jdk15on-1.58.jar
 bonecp-0.8.0.RELEASE.jar
 breeze-macros_2.11-0.13.2.jar
 breeze_2.11-0.13.2.jar
@@ -41,7 +41,7 @@ commons-beanutils-core-1.8.0.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
 commons-collections-3.2.2.jar
-commons-compiler-3.0.0.jar
+commons-compiler-3.0.8.jar
 commons-compress-1.8.1.jar
 commons-configuration-1.6.jar
 commons-crypto-1.0.0.jar
@@ -101,10 +101,10 @@ hibernate-validator-5.2.2.Final.jar
 hk2-api-2.5.0-b32.jar
 hk2-locator-2.5.0-b32.jar
 hk2-utils-2.5.0-b32.jar
-hppc-0.7.1.jar
+hppc-0.7.2.jar
 htrace-core4-4.0.1-incubating.jar
-httpclient-4.5.3.jar
-httpcore-4.4.6.jar
+httpclient-4.5.4.jar
+httpcore-4.4.8.jar
 ion-java-1.0.1.jar
 ivy-2.4.0.jar
 jackson-annotations-2.6.7.jar
@@ -125,8 +125,8 @@ jackson-module-jaxb-annotations-2.6.7.jar
 jackson-module-paranamer-2.7.9.jar
 jackson-module-scala_2.11-2.6.7.1.jar
 jackson-xc-1.9.13.jar
-janino-3.0.0.jar
-java-xmlbuilder-1.0.jar
+janino-3.0.8.jar
+java-xmlbuilder-1.1.jar
 javassist-3.20.0-GA.jar
 javax.annotation-api-1.2.jar
 javax.el-3.0.0.jar
@@ -148,7 +148,7 @@ jersey-guava-2.25.1.jar
 jersey-media-jaxb-2.25.1.jar
 jersey-media-multipart-2.25.1.jar
 jersey-server-2.25.1.jar
-jets3t-0.9.3.jar
+jets3t-0.9.4.jar
 jetty-6.1.26.jar
 jetty-sslengine-6.1.26.jar
 jetty-util-6.1.26.jar
@@ -156,26 +156,26 @@ jmespath-java-1.0.jar
 joda-time-2.9.9.jar
 jodd-core-3.5.2.jar
 json-smart-1.1.1.jar
-json4s-ast_2.11-3.2.11.jar
-json4s-core_2.11-3.2.11.jar
-json4s-jackson_2.11-3.2.11.jar
+json4s-ast_2.11-3.5.3.jar
+json4s-core_2.11-3.5.3.jar
+json4s-jackson_2.11-3.5.3.jar
+json4s-scalap_2.11-3.5.3.jar
 jsp-api-2.1.jar
 jsr305-3.0.1.jar
 jta-1.1.jar
 jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.25.jar
 kryo-shaded-3.0.3.jar
-kubernetes-client-2.2.13.jar
-kubernetes-model-1.0.67.jar
+kubernetes-client-3.0.0.jar
+kubernetes-model-2.0.0.jar
 leveldbjni-all-1.8.jar
 libfb303-0.9.3.jar
 libthrift-0.9.3.jar
 log4j-1.2.17.jar
-logging-interceptor-3.6.0.jar
+logging-interceptor-3.8.1.jar
 lz4-java-1.4.0.jar
 machinist_2.11-0.6.1.jar
 macro-compat_2.11-1.1.1.jar
-mail-1.4.7.jar
 metrics-core-3.2.5.jar
 metrics-graphite-3.2.5.jar
 metrics-influxdb-1.1.8.jar
@@ -183,13 +183,12 @@ metrics-json-3.2.5.jar
 metrics-jvm-3.2.5.jar
 mimepull-1.9.6.jar
 minlog-1.3.0.jar
-mx4j-3.0.2.jar
 netty-3.10.6.Final.jar
-netty-all-4.0.50.Final.jar
+netty-all-4.1.17.Final.jar
 nimbus-jose-jwt-3.9.jar
 objenesis-2.5.1.jar
 okhttp-2.7.5.jar
-okhttp-3.9.0.jar
+okhttp-3.9.1.jar
 okio-1.13.0.jar
 opencsv-2.3.jar
 orc-core-1.4.1-nohive.jar
@@ -213,7 +212,6 @@ scala-library-2.11.8.jar
 scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.5.jar
-scalap-2.11.8.jar
 shapeless_2.11-2.3.2.jar
 slf4j-api-1.7.25.jar
 slf4j-log4j12-1.7.25.jar
@@ -226,7 +224,7 @@ stax-api-1.0-2.jar
 stax-api-1.0.1.jar
 stream-2.7.0.jar
 stringtemplate-3.2.1.jar
-univocity-parsers-2.5.4.jar
+univocity-parsers-2.5.9.jar
 validation-api-1.1.0.Final.jar
 xbean-asm5-shaded-4.4.jar
 xmlenc-0.52.jar
diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py
index acc9aeabbb9fb..7df7b325cabe0 100755
--- a/dev/github_jira_sync.py
+++ b/dev/github_jira_sync.py
@@ -43,7 +43,7 @@
 # "notification overload" when running for the first time.
 MIN_COMMENT_PR = int(os.environ.get("MIN_COMMENT_PR", "1496"))
 
-# File used as an opitimization to store maximum previously seen PR
+# File used as an optimization to store maximum previously seen PR
 # Used mostly because accessing ASF JIRA is slow, so we want to avoid checking
 # the state of JIRA's that are tied to PR's we've already looked at.
 MAX_FILE = ".github-jira-max"
diff --git a/dev/lint-java b/dev/lint-java
index 08992f42e6e4c..58ee47ae85fef 100755
--- a/dev/lint-java
+++ b/dev/lint-java
@@ -20,7 +20,7 @@
 SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 SPARK_ROOT_DIR="$(dirname $SCRIPT_DIR)"
 
-ERRORS=$($SCRIPT_DIR/../build/mvn -Phadoop-cloud -Pkubernetes -Pkinesis-asl -Pmesos -Pyarn -Phive -Phive-thriftserver checkstyle:check | grep ERROR)
+ERRORS=$($SCRIPT_DIR/../build/mvn -Phadoop-cloud -Pkubernetes -Pkinesis-asl -Pyarn -Phive checkstyle:check | grep ERROR)
 
 if test ! -z "$ERRORS"; then
     echo -e "Checkstyle checks failed at following occurrences:\n$ERRORS"
diff --git a/dev/lint-python b/dev/lint-python
index 07e2606d45143..df8df037a5f69 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -19,7 +19,7 @@
 
 SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
-# Exclude auto-geneated configuration file.
+# Exclude auto-generated configuration file.
 PATHS_TO_CHECK="$( cd "$SPARK_ROOT_DIR" && find . -name "*.py" )"
 PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
 PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt"
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index ea10fb850ad2f..eeb3dc0a93051 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -183,12 +183,18 @@ mkdir -p "$DISTDIR/dockerfiles"
 DOCKERFILES_SRC="$SPARK_HOME/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker"
 cp -R "$DOCKERFILES_SRC/." "$DISTDIR/dockerfiles/."
 
-# Only create the yarn directory if the yarn artifacts were build.
+# Only create the yarn directory if the yarn artifacts were built.
 if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then
   mkdir "$DISTDIR/yarn"
   cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/yarn"
 fi
 
+# Only create and copy the dockerfiles directory if the kubernetes artifacts were built.
+if [ -d "$SPARK_HOME"/resource-managers/kubernetes/core/target/ ]; then
+  mkdir -p "$DISTDIR/kubernetes/"
+  cp -a "$SPARK_HOME"/resource-managers/kubernetes/docker/src/main/dockerfiles "$DISTDIR/kubernetes/"
+fi
+
 # Copy examples and dependencies
 mkdir -p "$DISTDIR/examples/jars"
 cp "$SPARK_HOME"/examples/target/scala*/jars/* "$DISTDIR/examples/jars"
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 28971b87f403f..57ca8400b6f3d 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -241,6 +241,9 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
     cur_status = issue.fields.status.name
     cur_summary = issue.fields.summary
     cur_assignee = issue.fields.assignee
+    if cur_assignee is None:
+        cur_assignee = choose_jira_assignee(issue, asf_jira)
+    # Check again, we might not have chosen an assignee
     if cur_assignee is None:
         cur_assignee = "NOT ASSIGNED!!!"
     else:
@@ -290,6 +293,31 @@ def get_version_json(version_str):
     print("Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions))
 
 
+def choose_jira_assignee(issue, asf_jira):
+    """
+    Prompt the user to choose who to assign the issue to in jira, given a list of candidates,
+    including the original reporter and all commentors
+    """
+    reporter = issue.fields.reporter
+    commentors = map(lambda x: x.author, issue.fields.comment.comments)
+    candidates = set(commentors)
+    candidates.add(reporter)
+    candidates = list(candidates)
+    print("JIRA is unassigned, choose assignee")
+    for idx, author in enumerate(candidates):
+        annotations = ["Reporter"] if author == reporter else []
+        if author in commentors:
+            annotations.append("Commentor")
+        print("[%d] %s (%s)" % (idx, author.displayName, ",".join(annotations)))
+    assignee = raw_input("Enter number of user to assign to (blank to leave unassigned):")
+    if assignee == "":
+        return None
+    else:
+        assignee = candidates[int(assignee)]
+        asf_jira.assign_issue(issue.key, assignee.key)
+        return assignee
+
+
 def resolve_jira_issues(title, merge_branches, comment):
     jira_ids = re.findall("SPARK-[0-9]{4,5}", title)
 
diff --git a/dev/mima b/dev/mima
index 1e3ca9700bc07..e6402fb3771f6 100755
--- a/dev/mima
+++ b/dev/mima
@@ -24,7 +24,7 @@ set -e
 FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 cd "$FWDIR"
 
-SPARK_PROFILES="-Pmesos -Pkafka-0-8 -Pyarn -Pflume -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
+SPARK_PROFILES="-Phadoop-palantir -Pmesos -Phadoop-cloud -Pkafka-0-8 -Pkubernetes -Pyarn -Pflume -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
 TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)"
 OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"
 
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 914eb93622d51..3960a0de62530 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -181,8 +181,8 @@ def main():
     short_commit_hash = ghprb_actual_commit[0:7]
 
     # format: http://linux.die.net/man/1/timeout
-    # must be less than the timeout configured on Jenkins (currently 300m)
-    tests_timeout = "250m"
+    # must be less than the timeout configured on Jenkins (currently 350m)
+    tests_timeout = "300m"
 
     # Array to capture all test names to run on the pull request. These tests are represented
     # by their file equivalents in the dev/tests/ directory.
diff --git a/dev/run-tests.py b/dev/run-tests.py
index f52df0709f261..0456745dc16a3 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -253,9 +253,9 @@ def kill_zinc_on_port(zinc_port):
     """
     Kill the Zinc process running on the given port, if one exists.
     """
-    cmd = ("/usr/sbin/lsof -P |grep %s | grep LISTEN "
-           "| awk '{ print $2; }' | xargs kill") % zinc_port
-    subprocess.check_call(cmd, shell=True)
+    cmd = "%s -P |grep %s | grep LISTEN | awk '{ print $2; }' | xargs kill"
+    lsof_exe = which("lsof")
+    subprocess.check_call(cmd % (lsof_exe if lsof_exe else "/usr/sbin/lsof", zinc_port), shell=True)
 
 
 def exec_maven(mvn_args=()):
diff --git a/dev/scalastyle b/dev/scalastyle
index cf5db2c20e9c6..e071c9a73968d 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -25,6 +25,7 @@ ERRORS=$(echo -e "q\n" \
         -Pkinesis-asl \
         -Pmesos \
         -Pkafka-0-8 \
+        -Pkubernetes \
         -Pyarn \
         -Pflume \
         -Phive \
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index f4f72413473d9..2a9fcabcecb06 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -418,6 +418,10 @@ def __hash__(self):
     source_file_regexes=[
         "python/pyspark/streaming"
     ],
+    environ={
+        "ENABLE_FLUME_TESTS": "1",
+        "ENABLE_KAFKA_0_8_TESTS": "1"
+    },
     python_test_goals=[
         "pyspark.streaming.util",
         "pyspark.streaming.tests",
diff --git a/dists/hadoop-palantir/pom.xml b/dists/hadoop-palantir/pom.xml
index 8fb0b874664b7..c3174e4191eae 100644
--- a/dists/hadoop-palantir/pom.xml
+++ b/dists/hadoop-palantir/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index dcc211204d766..095fadb93fe5d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.3.0-SNAPSHOT
-SPARK_VERSION_SHORT: 2.3.0
+SPARK_VERSION: 2.4.0-SNAPSHOT
+SPARK_VERSION_SHORT: 2.4.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.8"
 MESOS_VERSION: 1.0.0
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 98f7df155456f..c391255a91596 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -49,7 +49,7 @@ To create a Spark distribution like those distributed by the
 to be runnable, use `./dev/make-distribution.sh` in the project root directory. It can be configured
 with Maven profile settings and so on like the direct Maven build. Example:
 
-    ./dev/make-distribution.sh --name custom-spark --pip --r --tgz -Psparkr -Phadoop-2.7 -Phive -Phive-thriftserver -Pmesos -Pyarn
+    ./dev/make-distribution.sh --name custom-spark --pip --r --tgz -Psparkr -Phadoop-2.7 -Phive -Phive-thriftserver -Pmesos -Pyarn -Pkubernetes
 
 This will build Spark distribution along with Python pip and R packages. For more information on usage, run `./dev/make-distribution.sh --help`
 
@@ -90,6 +90,10 @@ like ZooKeeper and Hadoop itself.
 ## Building with Mesos support
 
     ./build/mvn -Pmesos -DskipTests clean package
+
+## Building with Kubernetes support
+
+    ./build/mvn -Pkubernetes -DskipTests clean package
     
 ## Building with Kafka 0.8 support
 
diff --git a/docs/cluster-overview.md b/docs/cluster-overview.md
index c42bb4bb8377e..658e67f99dd71 100644
--- a/docs/cluster-overview.md
+++ b/docs/cluster-overview.md
@@ -52,11 +52,8 @@ The system currently supports three cluster managers:
 * [Apache Mesos](running-on-mesos.html) -- a general cluster manager that can also run Hadoop MapReduce
   and service applications.
 * [Hadoop YARN](running-on-yarn.html) -- the resource manager in Hadoop 2.
-* [Kubernetes (experimental)](https://github.com/apache-spark-on-k8s/spark) -- In addition to the above,
-there is experimental support for Kubernetes. Kubernetes is an open-source platform
-for providing container-centric infrastructure. Kubernetes support is being actively
-developed in an [apache-spark-on-k8s](https://github.com/apache-spark-on-k8s/) Github organization. 
-For documentation, refer to that project's README.
+* [Kubernetes](running-on-kubernetes.html) -- [Kubernetes](https://kubernetes.io/docs/concepts/overview/what-is-kubernetes/)
+is an open-source platform that provides container-centric infrastructure.
 
 A third-party project (not supported by the Spark project) exists to add support for
 [Nomad](https://github.com/hashicorp/nomad-spark) as a cluster manager.
diff --git a/docs/configuration.md b/docs/configuration.md
index 9b9583d9165ef..1189aea2aa71f 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -157,6 +157,16 @@ of the most common options to set are:
     or in your default properties file.
   </td>
 </tr>
+<tr>
+  <td><code>spark.driver.memoryOverhead</code></td>
+  <td>driverMemory * 0.10, with minimum of 384 </td>
+  <td>
+    The amount of off-heap memory (in megabytes) to be allocated per driver in cluster mode. This is
+    memory that accounts for things like VM overheads, interned strings, other native overheads, etc.
+    This tends to grow with the container size (typically 6-10%). This option is currently supported
+    on YARN and Kubernetes.
+  </td>
+</tr>
 <tr>
   <td><code>spark.executor.memory</code></td>
   <td>1g</td>
@@ -164,6 +174,16 @@ of the most common options to set are:
     Amount of memory to use per executor process (e.g. <code>2g</code>, <code>8g</code>).
   </td>
 </tr>
+<tr>
+ <td><code>spark.executor.memoryOverhead</code></td>
+  <td>executorMemory * 0.10, with minimum of 384 </td>
+  <td>
+    The amount of off-heap memory (in megabytes) to be allocated per executor. This is memory that
+    accounts for things like VM overheads, interned strings, other native overheads, etc. This tends
+    to grow with the executor size (typically 6-10%). This option is currently supported on YARN and
+    Kubernetes.
+  </td>
+</tr>
 <tr>
   <td><code>spark.extraListeners</code></td>
   <td>(none)</td>
@@ -1132,6 +1152,46 @@ Apart from these, the following properties are also available, and may be useful
     to get the replication level of the block to the initial number.
   </td>
 </tr>
+<tr>
+  <td><code>spark.cleaner.periodicGC.interval</code></td>
+  <td>30min</td>
+  <td>
+    Controls how often to trigger a garbage collection.<br><br>
+    This context cleaner triggers cleanups only when weak references are garbage collected.
+    In long-running applications with large driver JVMs, where there is little memory pressure
+    on the driver, this may happen very occasionally or not at all. Not cleaning at all may
+    lead to executors running out of disk space after a while.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.cleaner.referenceTracking</code></td>
+  <td>true</td>
+  <td>
+    Enables or disables context cleaning.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.cleaner.referenceTracking.blocking</code></td>
+  <td>true</td>
+  <td>
+    Controls whether the cleaning thread should block on cleanup tasks (other than shuffle, which is controlled by
+    <code>spark.cleaner.referenceTracking.blocking.shuffle</code> Spark property).
+  </td>
+</tr>
+<tr>
+  <td><code>spark.cleaner.referenceTracking.blocking.shuffle</code></td>
+  <td>false</td>
+  <td>
+    Controls whether the cleaning thread should block on shuffle cleanup tasks.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.cleaner.referenceTracking.cleanCheckpoints</code></td>
+  <td>false</td>
+  <td>
+    Controls whether to clean checkpoint files if the reference is out of scope.
+  </td>
+</tr>
 </table>
 
 ### Execution Behavior
@@ -1438,10 +1498,10 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.scheduler.minRegisteredResourcesRatio</code></td>
-  <td>0.8 for YARN mode; 0.0 for standalone mode and Mesos coarse-grained mode</td>
+  <td>0.8 for KUBERNETES mode; 0.8 for YARN mode; 0.0 for standalone mode and Mesos coarse-grained mode</td>
   <td>
     The minimum ratio of registered resources (registered resources / total expected resources)
-    (resources are executors in yarn mode, CPU cores in standalone mode and Mesos coarsed-grained
+    (resources are executors in yarn mode and Kubernetes mode, CPU cores in standalone mode and Mesos coarsed-grained
      mode ['spark.cores.max' value is total expected resources for Mesos coarse-grained mode] )
     to wait for before scheduling begins. Specified as a double between 0.0 and 1.0.
     Regardless of whether the minimum ratio of resources has been reached,
@@ -2316,6 +2376,8 @@ can be found on the pages for each mode:
 
 #### [Mesos](running-on-mesos.html#configuration)
 
+#### [Kubernetes](running-on-kubernetes.html#configuration)
+
 #### [Standalone Mode](spark-standalone.html#cluster-launch-scripts)
 
 # Environment Variables
diff --git a/docs/img/k8s-cluster-mode.png b/docs/img/k8s-cluster-mode.png
new file mode 100644
index 0000000000000..12a6288ed5823
Binary files /dev/null and b/docs/img/k8s-cluster-mode.png differ
diff --git a/docs/index.md b/docs/index.md
index ed819ec2fad6e..82394335f1059 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -81,6 +81,7 @@ options for deployment:
 * [Standalone Deploy Mode](spark-standalone.html): simplest way to deploy Spark on a private cluster
 * [Apache Mesos](running-on-mesos.html)
 * [Hadoop YARN](running-on-yarn.html)
+* [Kubernetes](running-on-kubernetes.html)
 
 # Where to Go from Here
 
diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md
index 083df2e405d62..bf979f3c73a52 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -223,6 +223,14 @@ More details on parameters can be found in the [Python API documentation](api/py
 
 </div>
 
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.decisionTree.html) for more details.
+
+{% include_example classification r/ml/decisionTree.R %}
+
+</div>
+
 </div>
 
 ## Random forest classifier
diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md
index 0e753b8dd04a2..ec13b81f85557 100644
--- a/docs/mllib-decision-tree.md
+++ b/docs/mllib-decision-tree.md
@@ -91,7 +91,7 @@ For a categorical feature with `$M$` possible values (categories), one could com
 `$2^{M-1}-1$` split candidates. For binary (0/1) classification and regression,
 we can reduce the number of split candidates to `$M-1$` by ordering the
 categorical feature values by the average label. (See Section 9.2.4 in
-[Elements of Statistical Machine Learning](http://statweb.stanford.edu/~tibs/ElemStatLearn/) for
+[Elements of Statistical Machine Learning](https://web.stanford.edu/~hastie/ElemStatLearn/) for
 details.) For example, for a binary classification problem with one categorical feature with three
 categories A, B and C whose corresponding proportions of label 1 are 0.2, 0.6 and 0.4, the categorical
 features are ordered as A, C, B. The two split candidates are A \| C, B
diff --git a/docs/mllib-frequent-pattern-mining.md b/docs/mllib-frequent-pattern-mining.md
index c9cd7cc85e754..0d3192c6b1d9c 100644
--- a/docs/mllib-frequent-pattern-mining.md
+++ b/docs/mllib-frequent-pattern-mining.md
@@ -41,7 +41,7 @@ We refer users to the papers for more details.
 
 [`FPGrowth`](api/scala/index.html#org.apache.spark.mllib.fpm.FPGrowth) implements the
 FP-growth algorithm.
-It take a `RDD` of transactions, where each transaction is an `Array` of items of a generic type.
+It takes an `RDD` of transactions, where each transaction is an `Array` of items of a generic type.
 Calling `FPGrowth.run` with transactions returns an
 [`FPGrowthModel`](api/scala/index.html#org.apache.spark.mllib.fpm.FPGrowthModel)
 that stores the frequent itemsets with their frequencies.  The following
@@ -60,7 +60,7 @@ Refer to the [`FPGrowth` Scala docs](api/scala/index.html#org.apache.spark.mllib
 
 [`FPGrowth`](api/java/org/apache/spark/mllib/fpm/FPGrowth.html) implements the
 FP-growth algorithm.
-It take an `JavaRDD` of transactions, where each transaction is an `Iterable` of items of a generic type.
+It takes a `JavaRDD` of transactions, where each transaction is an `Iterable` of items of a generic type.
 Calling `FPGrowth.run` with transactions returns an
 [`FPGrowthModel`](api/java/org/apache/spark/mllib/fpm/FPGrowthModel.html)
 that stores the frequent itemsets with their frequencies.  The following
@@ -79,7 +79,7 @@ Refer to the [`FPGrowth` Java docs](api/java/org/apache/spark/mllib/fpm/FPGrowth
 
 [`FPGrowth`](api/python/pyspark.mllib.html#pyspark.mllib.fpm.FPGrowth) implements the
 FP-growth algorithm.
-It take an `RDD` of transactions, where each transaction is an `List` of items of a generic type.
+It takes an `RDD` of transactions, where each transaction is an `List` of items of a generic type.
 Calling `FPGrowth.train` with transactions returns an
 [`FPGrowthModel`](api/python/pyspark.mllib.html#pyspark.mllib.fpm.FPGrowthModel)
 that stores the frequent itemsets with their frequencies.
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index 29af159510e46..2e29aef7f21a2 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -91,7 +91,7 @@ so C libraries like NumPy can be used. It also works with PyPy 2.3+.
 
 Python 2.6 support was removed in Spark 2.2.0.
 
-Spark applications in Python can either be run with the `bin/spark-submit` script which includes Spark at runtime, or by including including it in your setup.py as:
+Spark applications in Python can either be run with the `bin/spark-submit` script which includes Spark at runtime, or by including it in your setup.py as:
 
 {% highlight python %}
     install_requires=[
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 7b01cf4c3c3d8..a9a9c49ea3c32 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -415,9 +415,10 @@ from the other deployment modes. See the [configuration page](configuration.html
 </tr>
 <tr>
   <td><code>spark.kubernetes.allocation.batch.delay</code></td>
-  <td><code>1</code></td>
+  <td><code>1s</code></td>
   <td>
-    Number of seconds to wait between each round of executor pod allocation.
+    Time to wait between each round of executor pod allocation. Specifying values less than 1 second may lead to
+    excessive CPU usage on the spark driver.
   </td>
 </tr>
 <tr>
@@ -489,7 +490,7 @@ from the other deployment modes. See the [configuration page](configuration.html
   <td><code>spark.kubernetes.authenticate.driver.oauthToken</code></td>
   <td>(none)</td>
   <td>
-    OAuth token to use when authenticating against the against the Kubernetes API server from the driver pod when
+    OAuth token to use when authenticating against the Kubernetes API server from the driver pod when
     requesting executors. Note that unlike the other authentication options, this must be the exact string value of
     the token to use for the authentication. This token value is uploaded to the driver pod. If this is specified, it is
     highly recommended to set up TLS for the driver submission server, as this value is sensitive information that would
@@ -773,24 +774,18 @@ from the other deployment modes. See the [configuration page](configuration.html
   <td><code>spark.kubernetes.driver.secrets.[SecretName]</code></td>
   <td>(none)</td>
   <td>
-    Mounts the Kubernetes secret named <code>SecretName</code> onto the path specified by the value
-    in the driver Pod. The user can specify multiple instances of this for multiple secrets.
+   Add the <a href="https://kubernetes.io/docs/concepts/configuration/secret/">Kubernetes Secret</a> named <code>SecretName</code> to the driver pod on the path specified in the value. For example,
+   <code>spark.kubernetes.driver.secrets.spark-secret=/etc/secrets</code>. Note that if an init-container is used,
+   the secret will also be added to the init-container in the driver pod.
   </td>
 </tr>
 <tr>
   <td><code>spark.kubernetes.executor.secrets.[SecretName]</code></td>
   <td>(none)</td>
   <td>
-    Mounts the Kubernetes secret named <code>SecretName</code> onto the path specified by the value
-    in the executor Pods. The user can specify multiple instances of this for multiple secrets.
+   Add the <a href="https://kubernetes.io/docs/concepts/configuration/secret/">Kubernetes Secret</a> named <code>SecretName</code> to the executor pod on the path specified in the value. For example,
+   <code>spark.kubernetes.executor.secrets.spark-secret=/etc/secrets</code>. Note that if an init-container is used,
+   the secret will also be added to the init-container in the executor pod.
   </td>
 </tr>
 </table>
-
-
-## Current Limitations
-
-Running Spark on Kubernetes is currently an experimental feature. Some restrictions on the current implementation that
-should be lifted in the future include:
-* Applications can only run in cluster mode.
-* Only Scala and Java applications can be run.
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 19ec7c1e0aeee..382cbfd5301b0 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -47,7 +47,7 @@ To install Apache Mesos from source, follow these steps:
 
 1. Download a Mesos release from a
    [mirror](http://www.apache.org/dyn/closer.lua/mesos/{{site.MESOS_VERSION}}/)
-2. Follow the Mesos [Getting Started](http://mesos.apache.org/gettingstarted) page for compiling and
+2. Follow the Mesos [Getting Started](http://mesos.apache.org/getting-started) page for compiling and
    installing Mesos
 
 **Note:** If you want to run Mesos without installing it into the default paths on your system
@@ -159,7 +159,7 @@ By setting the Mesos proxy config property (requires mesos version >= 1.4), `--c
 If you like to run the `MesosClusterDispatcher` with Marathon, you need to run the `MesosClusterDispatcher` in the foreground (i.e: `bin/spark-class org.apache.spark.deploy.mesos.MesosClusterDispatcher`). Note that the `MesosClusterDispatcher` not yet supports multiple instances for HA.
 
 The `MesosClusterDispatcher` also supports writing recovery state into Zookeeper. This will allow the `MesosClusterDispatcher` to be able to recover all submitted and running containers on relaunch.   In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env by configuring `spark.deploy.recoveryMode` and related spark.deploy.zookeeper.* configurations.
-For more information about these configurations please refer to the configurations [doc](configurations.html#deploy).
+For more information about these configurations please refer to the configurations [doc](configuration.html#deploy).
 
 You can also specify any additional jars required by the `MesosClusterDispatcher` in the classpath by setting the environment variable SPARK_DAEMON_CLASSPATH in spark-env.
 
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 9599d40c545b2..e7edec5990363 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -18,7 +18,9 @@ Spark application's configuration (driver, executors, and the AM when running in
 
 There are two deploy modes that can be used to launch Spark applications on YARN. In `cluster` mode, the Spark driver runs inside an application master process which is managed by YARN on the cluster, and the client can go away after initiating the application. In `client` mode, the driver runs in the client process, and the application master is only used for requesting resources from YARN.
 
-Unlike [Spark standalone](spark-standalone.html) and [Mesos](running-on-mesos.html) modes, in which the master's address is specified in the `--master` parameter, in YARN mode the ResourceManager's address is picked up from the Hadoop configuration. Thus, the `--master` parameter is `yarn`.
+Unlike other cluster managers supported by Spark in which the master's address is specified in the `--master`
+parameter, in YARN mode the ResourceManager's address is picked up from the Hadoop configuration.
+Thus, the `--master` parameter is `yarn`.
 
 To launch a Spark application in `cluster` mode:
 
@@ -227,25 +229,11 @@ To use a custom metrics.properties for the application master and executors, upd
     The number of executors for static allocation. With <code>spark.dynamicAllocation.enabled</code>, the initial set of executors will be at least this large.
   </td>
 </tr>
-<tr>
- <td><code>spark.yarn.executor.memoryOverhead</code></td>
-  <td>executorMemory * 0.10, with minimum of 384 </td>
-  <td>
-    The amount of off-heap memory (in megabytes) to be allocated per executor. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the executor size (typically 6-10%).
-  </td>
-</tr>
-<tr>
-  <td><code>spark.yarn.driver.memoryOverhead</code></td>
-  <td>driverMemory * 0.10, with minimum of 384 </td>
-  <td>
-    The amount of off-heap memory (in megabytes) to be allocated per driver in cluster mode. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the container size (typically 6-10%).
-  </td>
-</tr>
 <tr>
   <td><code>spark.yarn.am.memoryOverhead</code></td>
   <td>AM memory * 0.10, with minimum of 384 </td>
   <td>
-    Same as <code>spark.yarn.driver.memoryOverhead</code>, but for the YARN Application Master in client mode.
+    Same as <code>spark.driver.memoryOverhead</code>, but for the YARN Application Master in client mode.
   </td>
 </tr>
 <tr>
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index f51c5cc38f4de..8fa643abf1373 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -364,7 +364,7 @@ By default, standalone scheduling clusters are resilient to Worker failures (ins
 
 Utilizing ZooKeeper to provide leader election and some state storage, you can launch multiple Masters in your cluster connected to the same ZooKeeper instance. One will be elected "leader" and the others will remain in standby mode. If the current leader dies, another Master will be elected, recover the old Master's state, and then resume scheduling. The entire recovery process (from the time the first leader goes down) should take between 1 and 2 minutes. Note that this delay only affects scheduling _new_ applications -- applications that were already running during Master failover are unaffected.
 
-Learn more about getting started with ZooKeeper [here](http://zookeeper.apache.org/doc/trunk/zookeeperStarted.html).
+Learn more about getting started with ZooKeeper [here](http://zookeeper.apache.org/doc/current/zookeeperStarted.html).
 
 **Configuration**
 
diff --git a/docs/sparkr.md b/docs/sparkr.md
index a3254e7654134..6685b585a393a 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -596,7 +596,7 @@ The following example shows how to save/load a MLlib model by SparkR.
 
 # Structured Streaming
 
-SparkR supports the Structured Streaming API (experimental). Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. For more information see the R API on the [Structured Streaming Programming Guide](structured-streaming-programming-guide.html)
+SparkR supports the Structured Streaming API. Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. For more information see the R API on the [Structured Streaming Programming Guide](structured-streaming-programming-guide.html)
 
 # R Function Name Conflicts
 
@@ -657,3 +657,9 @@ You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-ma
  - By default, derby.log is now saved to `tempdir()`. This will be created when instantiating the SparkSession with `enableHiveSupport` set to `TRUE`.
  - `spark.lda` was not setting the optimizer correctly. It has been corrected.
  - Several model summary outputs are updated to have `coefficients` as `matrix`. This includes `spark.logit`, `spark.kmeans`, `spark.glm`. Model summary outputs for `spark.gaussianMixture` have added log-likelihood as `loglik`.
+
+## Upgrading to SparkR 2.3.0
+
+ - The `stringsAsFactors` parameter was previously ignored with `collect`, for example, in `collect(createDataFrame(iris), stringsAsFactors = TRUE))`. It has been corrected.
+ - For `summary`, option for statistics to compute has been added. Its output is changed from that from `describe`.
+ - A warning can be raised if versions of SparkR package and the Spark JVM do not match.
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 983770d506836..258c769ff593b 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -501,6 +501,7 @@ To load a CSV file you can use:
 
 </div>
 </div>
+
 ### Run SQL on files directly
 
 Instead of using read API to load a file into DataFrame and query it, you can also query that
@@ -914,6 +915,14 @@ spark.catalog.refreshTable("my_table")
 
 </div>
 
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+refreshTable("my_table")
+{% endhighlight %}
+
+</div>
+
 <div data-lang="sql"  markdown="1">
 
 {% highlight sql %}
@@ -952,8 +961,10 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.compression.codec</code></td>
   <td>snappy</td>
   <td>
-    Sets the compression codec use when writing Parquet files. Acceptable values include:
-    uncompressed, snappy, gzip, lzo.
+    Sets the compression codec used when writing Parquet files. If either `compression` or
+    `parquet.compression` is specified in the table-specific options/properties, the precedence would be
+    `compression`, `parquet.compression`, `spark.sql.parquet.compression.codec`. Acceptable values include:
+    none, uncompressed, snappy, gzip, lzo.
   </td>
 </tr>
 <tr>
@@ -1492,6 +1503,64 @@ that these options will be deprecated in future release as more optimizations ar
   </tr>
 </table>
 
+## Broadcast Hint for SQL Queries
+
+The `BROADCAST` hint guides Spark to broadcast each specified table when joining them with another table or view.
+When Spark deciding the join methods, the broadcast hash join (i.e., BHJ) is preferred,
+even if the statistics is above the configuration `spark.sql.autoBroadcastJoinThreshold`.
+When both sides of a join are specified, Spark broadcasts the one having the lower statistics.
+Note Spark does not guarantee BHJ is always chosen, since not all cases (e.g. full outer join)
+support BHJ. When the broadcast nested loop join is selected, we still respect the hint.
+
+<div class="codetabs">
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import org.apache.spark.sql.functions.broadcast
+broadcast(spark.table("src")).join(spark.table("records"), "key").show()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+import static org.apache.spark.sql.functions.broadcast;
+broadcast(spark.table("src")).join(spark.table("records"), "key").show();
+{% endhighlight %}
+
+</div>
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+from pyspark.sql.functions import broadcast
+broadcast(spark.table("src")).join(spark.table("records"), "key").show()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+src <- sql("SELECT * FROM src")
+records <- sql("SELECT * FROM records")
+head(join(broadcast(src), records, src$key == records$key))
+{% endhighlight %}
+
+</div>
+
+<div data-lang="sql"  markdown="1">
+
+{% highlight sql %}
+-- We accept BROADCAST, BROADCASTJOIN and MAPJOIN for broadcast hint
+SELECT /*+ BROADCAST(r) */ * FROM records r JOIN src s ON r.key = s.key
+{% endhighlight %}
+
+</div>
+</div>
+
 # Distributed SQL Engine
 
 Spark SQL can also act as a distributed query engine using its JDBC/ODBC or command-line interface.
@@ -1577,6 +1646,7 @@ options.
 
   - Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the referenced columns only include the internal corrupt record column (named `_corrupt_record` by default). For example, `spark.read.schema(schema).json(file).filter($"_corrupt_record".isNotNull).count()` and `spark.read.schema(schema).json(file).select("_corrupt_record").show()`. Instead, you can cache or save the parsed results and then send the same query. For example, `val df = spark.read.schema(schema).json(file).cache()` and then `df.filter($"_corrupt_record".isNotNull).count()`.
   - The `percentile_approx` function previously accepted numeric type input and output double type results. Now it supports date type, timestamp type and numeric types as input types. The result type is also changed to be the same as the input type, which is more reasonable for percentiles.
+  - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown.
   - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below:
 
     <table class="table">
@@ -1718,6 +1788,10 @@ options.
     Note that, for <b>DecimalType(38,0)*</b>, the table above intentionally does not cover all other combinations of scales and precisions because currently we only infer decimal type like `BigInteger`/`BigInt`. For example, 1.1 is inferred as double type.
   - In PySpark, now we need Pandas 0.19.2 or upper if you want to use Pandas related functionalities, such as `toPandas`, `createDataFrame` from Pandas DataFrame, etc.
   - In PySpark, the behavior of timestamp values for Pandas related functionalities was changed to respect session timezone. If you want to use the old behavior, you need to set a configuration `spark.sql.execution.pandas.respectSessionTimeZone` to `False`. See [SPARK-22395](https://issues.apache.org/jira/browse/SPARK-22395) for details.
+  - In PySpark, `na.fill()` or `fillna` also accepts boolean and replaces nulls with booleans. In prior Spark versions, PySpark just ignores it and returns the original Dataset/DataFrame.  
+  - Since Spark 2.3, when either broadcast hash join or broadcast nested loop join is applicable, we prefer to broadcasting the table that is explicitly specified in a broadcast hint. For details, see the section [Broadcast Hint](#broadcast-hint-for-sql-queries) and [SPARK-22489](https://issues.apache.org/jira/browse/SPARK-22489).
+  - Since Spark 2.3, when all inputs are binary, `functions.concat()` returns an output as binary. Otherwise, it returns as a string. Until Spark 2.3, it always returns as a string despite of input types. To keep the old behavior, set `spark.sql.function.concatBinaryAsString` to `true`.
+  - Since Spark 2.3, when all inputs are binary, SQL `elt()` returns an output as binary. Otherwise, it returns as a string. Until Spark 2.3, it always returns as a string despite of input types. To keep the old behavior, set `spark.sql.function.eltOutputAsString` to `true`.
 
 ## Upgrading From Spark SQL 2.1 to 2.2
 
@@ -2099,7 +2173,7 @@ Not all the APIs of the Hive UDF/UDTF/UDAF are supported by Spark SQL. Below are
   Spark SQL currently does not support the reuse of aggregation.
 * `getWindowingEvaluator` (`GenericUDAFEvaluator`) is a function to optimize aggregation by evaluating
   an aggregate over a fixed window.
-  
+
 ### Incompatible Hive UDF
 
 Below are the scenarios in which Hive and Spark generate different results:
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 93bef8d5bb7e2..de13e281916db 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -493,7 +493,7 @@ returned by `SparkSession.readStream()`. In [R](api/R/read.stream.html), with th
 #### Input Sources
 There are a few built-in sources.
 
-  - **File source** - Reads files written in a directory as a stream of data. Supported file formats are text, csv, json, parquet. See the docs of the DataStreamReader interface for a more up-to-date list, and supported options for each file format. Note that the files must be atomically placed in the given directory, which in most file systems, can be achieved by file move operations.
+  - **File source** - Reads files written in a directory as a stream of data. Supported file formats are text, csv, json, orc, parquet. See the docs of the DataStreamReader interface for a more up-to-date list, and supported options for each file format. Note that the files must be atomically placed in the given directory, which in most file systems, can be achieved by file move operations.
 
   - **Kafka source** - Reads data from Kafka. It's compatible with Kafka broker versions 0.10.0 or higher. See the [Kafka Integration Guide](structured-streaming-kafka-integration.html) for more details.
 
@@ -827,8 +827,8 @@ df.isStreaming()
 {% endhighlight %}
 </div>
 <div data-lang="r"  markdown="1">
-{% highlight bash %}
-Not available.
+{% highlight r %}
+isStreaming(df)
 {% endhighlight %}
 </div>
 </div>
@@ -885,6 +885,19 @@ windowedCounts = words.groupBy(
 ).count()
 {% endhighlight %}
 
+</div>
+<div data-lang="r"  markdown="1">
+{% highlight r %}
+words <- ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+# Group the data by window and word and compute the count of each group
+windowedCounts <- count(
+                    groupBy(
+                      words,
+                      window(words$timestamp, "10 minutes", "5 minutes"),
+                      words$word))
+{% endhighlight %}
+
 </div>
 </div>
 
@@ -959,6 +972,21 @@ windowedCounts = words \
     .count()
 {% endhighlight %}
 
+</div>
+<div data-lang="r"  markdown="1">
+{% highlight r %}
+words <- ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+# Group the data by window and word and compute the count of each group
+
+words <- withWatermark(words, "timestamp", "10 minutes")
+windowedCounts <- count(
+                    groupBy(
+                      words,
+                      window(words$timestamp, "10 minutes", "5 minutes"),
+                      words$word))
+{% endhighlight %}
+
 </div>
 </div>
 
diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index 866d6e527549c..0473ab73a5e6c 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -127,6 +127,16 @@ export HADOOP_CONF_DIR=XXX
   http://path/to/examples.jar \
   1000
 
+# Run on a Kubernetes cluster in cluster deploy mode
+./bin/spark-submit \
+  --class org.apache.spark.examples.SparkPi \
+  --master k8s://xx.yy.zz.ww:443 \
+  --deploy-mode cluster \
+  --executor-memory 20G \
+  --num-executors 50 \
+  http://path/to/examples.jar \
+  1000
+
 {% endhighlight %}
 
 # Master URLs
@@ -155,6 +165,12 @@ The master URL passed to Spark can be in one of the following formats:
         <code>client</code> or <code>cluster</code> mode depending on the value of <code>--deploy-mode</code>.
         The cluster location will be found based on the <code>HADOOP_CONF_DIR</code> or <code>YARN_CONF_DIR</code> variable.
 </td></tr>
+<tr><td> <code>k8s://HOST:PORT</code> </td><td> Connect to a <a href="running-on-kubernetes.html">Kubernetes</a> cluster in
+        <code>cluster</code> mode. Client mode is currently unsupported and will be supported in future releases.
+        The <code>HOST</code> and <code>PORT</code> refer to the [Kubernetes API Server](https://kubernetes.io/docs/reference/generated/kube-apiserver/).
+        It connects using TLS by default. In order to force it to use an unsecured connection, you can use
+        <code>k8s://http://HOST:PORT</code>.
+</td></tr>
 </table>
 
 
diff --git a/docs/tuning.md b/docs/tuning.md
index 7d5f97a02fe6e..fc27713f28d46 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -219,7 +219,7 @@ temporary objects created during task execution. Some steps which may be useful
   
 * Try the G1GC garbage collector with `-XX:+UseG1GC`. It can improve performance in some situations where
   garbage collection is a bottleneck. Note that with large executor heap sizes, it may be important to
-  increase the [G1 region size](https://blogs.oracle.com/g1gc/entry/g1_gc_tuning_a_case) 
+  increase the [G1 region size](http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html) 
   with `-XX:G1HeapRegionSize`
 
 * As an example, if your task is reading data from HDFS, the amount of memory used by the task can be estimated using
diff --git a/examples/pom.xml b/examples/pom.xml
index 407f7a1b862ca..9ae7c23ce5c6d 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java
index d8f948ae38cb3..dc4b0bcb59657 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java
@@ -20,6 +20,7 @@
 // $example on$
 import org.apache.spark.ml.clustering.KMeansModel;
 import org.apache.spark.ml.clustering.KMeans;
+import org.apache.spark.ml.evaluation.ClusteringEvaluator;
 import org.apache.spark.ml.linalg.Vector;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
@@ -51,9 +52,14 @@ public static void main(String[] args) {
     KMeans kmeans = new KMeans().setK(2).setSeed(1L);
     KMeansModel model = kmeans.fit(dataset);
 
-    // Evaluate clustering by computing Within Set Sum of Squared Errors.
-    double WSSSE = model.computeCost(dataset);
-    System.out.println("Within Set Sum of Squared Errors = " + WSSSE);
+    // Make predictions
+    Dataset<Row> predictions = model.transform(dataset);
+
+    // Evaluate clustering by computing Silhouette score
+    ClusteringEvaluator evaluator = new ClusteringEvaluator();
+
+    double silhouette = evaluator.evaluate(predictions);
+    System.out.println("Silhouette with squared euclidean distance = " + silhouette);
 
     // Shows the result.
     Vector[] centers = model.clusterCenters();
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java
index dd20cac621102..43cc30c1a899b 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java
@@ -66,7 +66,7 @@ public static void main(String[] args) {
       .setNumBuckets(3);
 
     Dataset<Row> result = discretizer.fit(df).transform(df);
-    result.show();
+    result.show(false);
     // $example off$
     spark.stop();
   }
diff --git a/examples/src/main/python/ml/kmeans_example.py b/examples/src/main/python/ml/kmeans_example.py
index 6846ec4599714..5f77843e3743a 100644
--- a/examples/src/main/python/ml/kmeans_example.py
+++ b/examples/src/main/python/ml/kmeans_example.py
@@ -19,6 +19,7 @@
 
 # $example on$
 from pyspark.ml.clustering import KMeans
+from pyspark.ml.evaluation import ClusteringEvaluator
 # $example off$
 
 from pyspark.sql import SparkSession
@@ -45,9 +46,14 @@
     kmeans = KMeans().setK(2).setSeed(1)
     model = kmeans.fit(dataset)
 
-    # Evaluate clustering by computing Within Set Sum of Squared Errors.
-    wssse = model.computeCost(dataset)
-    print("Within Set Sum of Squared Errors = " + str(wssse))
+    # Make predictions
+    predictions = model.transform(dataset)
+
+    # Evaluate clustering by computing Silhouette score
+    evaluator = ClusteringEvaluator()
+
+    silhouette = evaluator.evaluate(predictions)
+    print("Silhouette with squared euclidean distance = " + str(silhouette))
 
     # Shows the result.
     centers = model.clusterCenters()
diff --git a/examples/src/main/python/ml/linearsvc.py b/examples/src/main/python/ml/linearsvc.py
index 18cbf87a10695..9b79abbf96f88 100644
--- a/examples/src/main/python/ml/linearsvc.py
+++ b/examples/src/main/python/ml/linearsvc.py
@@ -37,7 +37,7 @@
     # Fit the model
     lsvcModel = lsvc.fit(training)
 
-    # Print the coefficients and intercept for linearsSVC
+    # Print the coefficients and intercept for linear SVC
     print("Coefficients: " + str(lsvcModel.coefficients))
     print("Intercept: " + str(lsvcModel.intercept))
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
index 25718f904cc49..3311de12dbd97 100644
--- a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
@@ -42,7 +42,7 @@ object BroadcastTest {
     val arr1 = (0 until num).toArray
 
     for (i <- 0 until 3) {
-      println("Iteration " + i)
+      println(s"Iteration $i")
       println("===========")
       val startTime = System.nanoTime
       val barr1 = sc.broadcast(arr1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala b/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala
index 3bff7ce736d08..1a779716ec4c0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala
@@ -49,12 +49,10 @@ object DFSReadWriteTest {
   }
 
   private def printUsage(): Unit = {
-    val usage: String = "DFS Read-Write Test\n" +
-    "\n" +
-    "Usage: localFile dfsDir\n" +
-    "\n" +
-    "localFile - (string) local file to use in test\n" +
-    "dfsDir - (string) DFS directory for read/write tests\n"
+    val usage = """DFS Read-Write Test
+    |Usage: localFile dfsDir
+    |localFile - (string) local file to use in test
+    |dfsDir - (string) DFS directory for read/write tests""".stripMargin
 
     println(usage)
   }
@@ -69,13 +67,13 @@ object DFSReadWriteTest {
 
     localFilePath = new File(args(i))
     if (!localFilePath.exists) {
-      System.err.println("Given path (" + args(i) + ") does not exist.\n")
+      System.err.println(s"Given path (${args(i)}) does not exist")
       printUsage()
       System.exit(1)
     }
 
     if (!localFilePath.isFile) {
-      System.err.println("Given path (" + args(i) + ") is not a file.\n")
+      System.err.println(s"Given path (${args(i)}) is not a file")
       printUsage()
       System.exit(1)
     }
@@ -108,7 +106,7 @@ object DFSReadWriteTest {
       .getOrCreate()
 
     println("Writing local file to DFS")
-    val dfsFilename = dfsDirPath + "/dfs_read_write_test"
+    val dfsFilename = s"$dfsDirPath/dfs_read_write_test"
     val fileRDD = spark.sparkContext.parallelize(fileContents)
     fileRDD.saveAsTextFile(dfsFilename)
 
@@ -127,11 +125,11 @@ object DFSReadWriteTest {
     spark.stop()
 
     if (localWordCount == dfsWordCount) {
-      println(s"Success! Local Word Count ($localWordCount) " +
-        s"and DFS Word Count ($dfsWordCount) agree.")
+      println(s"Success! Local Word Count $localWordCount and " +
+        s"DFS Word Count $dfsWordCount agree.")
     } else {
-      println(s"Failure! Local Word Count ($localWordCount) " +
-        s"and DFS Word Count ($dfsWordCount) disagree.")
+      println(s"Failure! Local Word Count $localWordCount " +
+        s"and DFS Word Count $dfsWordCount disagree.")
     }
 
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
index aa8de69839e28..e1f985ece8c06 100644
--- a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
@@ -39,7 +39,7 @@ object HdfsTest {
       val start = System.currentTimeMillis()
       for (x <- mapped) { x + 2 }
       val end = System.currentTimeMillis()
-      println("Iteration " + iter + " took " + (end-start) + " ms")
+      println(s"Iteration $iter took ${end-start} ms")
     }
     spark.stop()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
index 97aefac025e55..3f9cea35d6503 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
@@ -129,8 +129,7 @@ object LocalALS {
       println(s"Iteration $iter:")
       ms = (0 until M).map(i => updateMovie(i, ms(i), us, R)).toArray
       us = (0 until U).map(j => updateUser(j, us(j), ms, R)).toArray
-      println("RMSE = " + rmse(R, ms, us))
-      println()
+      println(s"RMSE = ${rmse(R, ms, us)}")
     }
   }
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
index 8dbb7ee4e5307..5512e33e41ac3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -58,10 +58,10 @@ object LocalFileLR {
 
     // Initialize w to a random value
     val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
-    println("Initial w: " + w)
+    println(s"Initial w: $w")
 
     for (i <- 1 to ITERATIONS) {
-      println("On iteration " + i)
+      println(s"On iteration $i")
       val gradient = DenseVector.zeros[Double](D)
       for (p <- points) {
         val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
@@ -71,7 +71,7 @@ object LocalFileLR {
     }
 
     fileSrc.close()
-    println("Final w: " + w)
+    println(s"Final w: $w")
   }
 }
 // scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
index 963c9a56d6cac..f5162a59522f0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -88,7 +88,7 @@ object LocalKMeans {
       kPoints.put(i, iter.next())
     }
 
-    println("Initial centers: " + kPoints)
+    println(s"Initial centers: $kPoints")
 
     while(tempDist > convergeDist) {
       val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))
@@ -114,7 +114,7 @@ object LocalKMeans {
       }
     }
 
-    println("Final centers: " + kPoints)
+    println(s"Final centers: $kPoints")
   }
 }
 // scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
index eb5221f085937..bde8ccd305960 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
@@ -61,10 +61,10 @@ object LocalLR {
     val data = generateData
     // Initialize w to a random value
     val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
-    println("Initial w: " + w)
+    println(s"Initial w: $w")
 
     for (i <- 1 to ITERATIONS) {
-      println("On iteration " + i)
+      println(s"On iteration $i")
       val gradient = DenseVector.zeros[Double](D)
       for (p <- data) {
         val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
@@ -73,7 +73,7 @@ object LocalLR {
       w -= gradient
     }
 
-    println("Final w: " + w)
+    println(s"Final w: $w")
   }
 }
 // scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
index 121b768e4198e..a93c15c85cfc1 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
@@ -28,7 +28,7 @@ object LocalPi {
       val y = random * 2 - 1
       if (x*x + y*y <= 1) count += 1
     }
-    println("Pi is roughly " + 4 * count / 100000.0)
+    println(s"Pi is roughly ${4 * count / 100000.0}")
   }
 }
 // scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
index 8e1a574c92221..2332a661f26a0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
@@ -59,11 +59,7 @@ object SimpleSkewedGroupByTest {
     // Enforce that everything has been calculated and in cache
     pairs1.count
 
-    println("RESULT: " + pairs1.groupByKey(numReducers).count)
-    // Print how many keys each reducer got (for debugging)
-    // println("RESULT: " + pairs1.groupByKey(numReducers)
-    //                           .map{case (k,v) => (k, v.size)}
-    //                           .collectAsMap)
+    println(s"RESULT: ${pairs1.groupByKey(numReducers).count}")
 
     spark.stop()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
index a99ddd9fd37db..d3e7b7a967de7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -135,10 +135,8 @@ object SparkALS {
                 .map(i => update(i, usb.value(i), msb.value, Rc.value.transpose()))
                 .collect()
       usb = sc.broadcast(us) // Re-broadcast us because it was updated
-      println("RMSE = " + rmse(R, ms, us))
-      println()
+      println(s"RMSE = ${rmse(R, ms, us)}")
     }
-
     spark.stop()
   }
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
index 9d675bbc18f38..23eaa879114a9 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -79,17 +79,17 @@ object SparkHdfsLR {
 
     // Initialize w to a random value
     val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
-    println("Initial w: " + w)
+    println(s"Initial w: $w")
 
     for (i <- 1 to ITERATIONS) {
-      println("On iteration " + i)
+      println(s"On iteration $i")
       val gradient = points.map { p =>
         p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
       }.reduce(_ + _)
       w -= gradient
     }
 
-    println("Final w: " + w)
+    println(s"Final w: $w")
     spark.stop()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
index fec3160e9f37b..b005cb6971c16 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -95,7 +95,7 @@ object SparkKMeans {
       for (newP <- newPoints) {
         kPoints(newP._1) = newP._2
       }
-      println("Finished iteration (delta = " + tempDist + ")")
+      println(s"Finished iteration (delta = $tempDist)")
     }
 
     println("Final centers:")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
index c18e3d31f149e..4b1497345af82 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -73,17 +73,17 @@ object SparkLR {
 
     // Initialize w to a random value
     val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
-    println("Initial w: " + w)
+    println(s"Initial w: $w")
 
     for (i <- 1 to ITERATIONS) {
-      println("On iteration " + i)
+      println(s"On iteration $i")
       val gradient = points.map { p =>
         p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
       }.reduce(_ + _)
       w -= gradient
     }
 
-    println("Final w: " + w)
+    println(s"Final w: $w")
 
     spark.stop()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
index 5d8831265e4ad..9299bad5d3290 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
@@ -77,7 +77,7 @@ object SparkPageRank {
     }
 
     val output = ranks.collect()
-    output.foreach(tup => println(tup._1 + " has rank: " + tup._2 + "."))
+    output.foreach(tup => println(s"${tup._1} has rank:  ${tup._2} ."))
 
     spark.stop()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
index a5cacf17a5cca..828d98b5001d7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
@@ -36,7 +36,7 @@ object SparkPi {
       val y = random * 2 - 1
       if (x*x + y*y <= 1) 1 else 0
     }.reduce(_ + _)
-    println("Pi is roughly " + 4.0 * count / (n - 1))
+    println(s"Pi is roughly ${4.0 * count / (n - 1)}")
     spark.stop()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
index 558295ab928af..f5d42141f5dd2 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
@@ -68,7 +68,7 @@ object SparkTC {
       nextCount = tc.count()
     } while (nextCount != oldCount)
 
-    println("TC has " + tc.count() + " edges.")
+    println(s"TC has ${tc.count()} edges.")
     spark.stop()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala
index 619e585b6ca17..815404d1218b7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala
@@ -27,6 +27,7 @@ import org.apache.spark.graphx.lib._
 import org.apache.spark.internal.Logging
 import org.apache.spark.storage.StorageLevel
 
+
 /**
  * Driver program for running graph algorithms.
  */
@@ -34,12 +35,12 @@ object Analytics extends Logging {
 
   def main(args: Array[String]): Unit = {
     if (args.length < 2) {
-      System.err.println(
-        "Usage: Analytics <taskType> <file> --numEPart=<num_edge_partitions> [other options]")
-      System.err.println("Supported 'taskType' as follows:")
-      System.err.println("  pagerank    Compute PageRank")
-      System.err.println("  cc          Compute the connected components of vertices")
-      System.err.println("  triangles   Count the number of triangles")
+      val usage = """Usage: Analytics <taskType> <file> --numEPart=<num_edge_partitions>
+      |[other options] Supported 'taskType' as follows:
+      |pagerank    Compute PageRank
+      |cc          Compute the connected components of vertices
+      |triangles   Count the number of triangles""".stripMargin
+      System.err.println(usage)
       System.exit(1)
     }
 
@@ -48,7 +49,7 @@ object Analytics extends Logging {
     val optionsList = args.drop(2).map { arg =>
       arg.dropWhile(_ == '-').split('=') match {
         case Array(opt, v) => (opt -> v)
-        case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+        case _ => throw new IllegalArgumentException(s"Invalid argument: $arg")
       }
     }
     val options = mutable.Map(optionsList: _*)
@@ -74,14 +75,14 @@ object Analytics extends Logging {
         val numIterOpt = options.remove("numIter").map(_.toInt)
 
         options.foreach {
-          case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+          case (opt, _) => throw new IllegalArgumentException(s"Invalid option: $opt")
         }
 
         println("======================================")
         println("|             PageRank               |")
         println("======================================")
 
-        val sc = new SparkContext(conf.setAppName("PageRank(" + fname + ")"))
+        val sc = new SparkContext(conf.setAppName(s"PageRank($fname)"))
 
         val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
           numEdgePartitions = numEPart,
@@ -89,18 +90,18 @@ object Analytics extends Logging {
           vertexStorageLevel = vertexStorageLevel).cache()
         val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
 
-        println("GRAPHX: Number of vertices " + graph.vertices.count)
-        println("GRAPHX: Number of edges " + graph.edges.count)
+        println(s"GRAPHX: Number of vertices ${graph.vertices.count}")
+        println(s"GRAPHX: Number of edges ${graph.edges.count}")
 
         val pr = (numIterOpt match {
           case Some(numIter) => PageRank.run(graph, numIter)
           case None => PageRank.runUntilConvergence(graph, tol)
         }).vertices.cache()
 
-        println("GRAPHX: Total rank: " + pr.map(_._2).reduce(_ + _))
+        println(s"GRAPHX: Total rank: ${pr.map(_._2).reduce(_ + _)}")
 
         if (!outFname.isEmpty) {
-          logWarning("Saving pageranks of pages to " + outFname)
+          logWarning(s"Saving pageranks of pages to $outFname")
           pr.map { case (id, r) => id + "\t" + r }.saveAsTextFile(outFname)
         }
 
@@ -108,14 +109,14 @@ object Analytics extends Logging {
 
       case "cc" =>
         options.foreach {
-          case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+          case (opt, _) => throw new IllegalArgumentException(s"Invalid option: $opt")
         }
 
         println("======================================")
         println("|      Connected Components          |")
         println("======================================")
 
-        val sc = new SparkContext(conf.setAppName("ConnectedComponents(" + fname + ")"))
+        val sc = new SparkContext(conf.setAppName(s"ConnectedComponents($fname)"))
         val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
           numEdgePartitions = numEPart,
           edgeStorageLevel = edgeStorageLevel,
@@ -123,19 +124,19 @@ object Analytics extends Logging {
         val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
 
         val cc = ConnectedComponents.run(graph)
-        println("Components: " + cc.vertices.map { case (vid, data) => data }.distinct())
+        println(s"Components: ${cc.vertices.map { case (vid, data) => data }.distinct()}")
         sc.stop()
 
       case "triangles" =>
         options.foreach {
-          case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+          case (opt, _) => throw new IllegalArgumentException(s"Invalid option: $opt")
         }
 
         println("======================================")
         println("|      Triangle Count                |")
         println("======================================")
 
-        val sc = new SparkContext(conf.setAppName("TriangleCount(" + fname + ")"))
+        val sc = new SparkContext(conf.setAppName(s"TriangleCount($fname)"))
         val graph = GraphLoader.edgeListFile(sc, fname,
           canonicalOrientation = true,
           numEdgePartitions = numEPart,
@@ -144,9 +145,11 @@ object Analytics extends Logging {
           // TriangleCount requires the graph to be partitioned
           .partitionBy(partitionStrategy.getOrElse(RandomVertexCut)).cache()
         val triangles = TriangleCount.run(graph)
-        println("Triangles: " + triangles.vertices.map {
+        val triangleTypes = triangles.vertices.map {
           case (vid, data) => data.toLong
-        }.reduce(_ + _) / 3)
+        }.reduce(_ + _) / 3
+
+        println(s"Triangles: ${triangleTypes}")
         sc.stop()
 
       case _ =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
index 6d2228c8742aa..57b2edf992208 100644
--- a/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
@@ -52,7 +52,7 @@ object SynthBenchmark {
       arg =>
         arg.dropWhile(_ == '-').split('=') match {
           case Array(opt, v) => (opt -> v)
-          case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+          case _ => throw new IllegalArgumentException(s"Invalid argument: $arg")
         }
     }
 
@@ -76,7 +76,7 @@ object SynthBenchmark {
       case ("sigma", v) => sigma = v.toDouble
       case ("degFile", v) => degFile = v
       case ("seed", v) => seed = v.toInt
-      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+      case (opt, _) => throw new IllegalArgumentException(s"Invalid option: $opt")
     }
 
     val conf = new SparkConf()
@@ -86,7 +86,7 @@ object SynthBenchmark {
     val sc = new SparkContext(conf)
 
     // Create the graph
-    println(s"Creating graph...")
+    println("Creating graph...")
     val unpartitionedGraph = GraphGenerators.logNormalGraph(sc, numVertices,
       numEPart.getOrElse(sc.defaultParallelism), mu, sigma, seed)
     // Repartition the graph
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala
index dcee1e427ce58..5146fd0316467 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala
@@ -52,9 +52,9 @@ object ChiSquareTestExample {
 
     val df = data.toDF("label", "features")
     val chi = ChiSquareTest.test(df, "features", "label").head
-    println("pValues = " + chi.getAs[Vector](0))
-    println("degreesOfFreedom = " + chi.getSeq[Int](1).mkString("[", ",", "]"))
-    println("statistics = " + chi.getAs[Vector](2))
+    println(s"pValues = ${chi.getAs[Vector](0)}")
+    println(s"degreesOfFreedom ${chi.getSeq[Int](1).mkString("[", ",", "]")}")
+    println(s"statistics ${chi.getAs[Vector](2)}")
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala
index 3f57dc342eb00..d7f1fc8ed74d7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala
@@ -51,10 +51,10 @@ object CorrelationExample {
 
     val df = data.map(Tuple1.apply).toDF("features")
     val Row(coeff1: Matrix) = Correlation.corr(df, "features").head
-    println("Pearson correlation matrix:\n" + coeff1.toString)
+    println(s"Pearson correlation matrix:\n $coeff1")
 
     val Row(coeff2: Matrix) = Correlation.corr(df, "features", "spearman").head
-    println("Spearman correlation matrix:\n" + coeff2.toString)
+    println(s"Spearman correlation matrix:\n $coeff2")
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
index 0658bddf16961..ee4469faab3a0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
@@ -47,7 +47,7 @@ object DataFrameExample {
     val parser = new OptionParser[Params]("DataFrameExample") {
       head("DataFrameExample: an example app using DataFrame for ML.")
       opt[String]("input")
-        .text(s"input path to dataframe")
+        .text("input path to dataframe")
         .action((x, c) => c.copy(input = x))
       checkConfig { params =>
         success
@@ -93,7 +93,7 @@ object DataFrameExample {
     // Load the records back.
     println(s"Loading Parquet file with UDT from $outputDir.")
     val newDF = spark.read.parquet(outputDir)
-    println(s"Schema from Parquet:")
+    println("Schema from Parquet:")
     newDF.printSchema()
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala
index bc6d3275933ea..276cedab11abc 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala
@@ -83,10 +83,10 @@ object DecisionTreeClassificationExample {
       .setPredictionCol("prediction")
       .setMetricName("accuracy")
     val accuracy = evaluator.evaluate(predictions)
-    println("Test Error = " + (1.0 - accuracy))
+    println(s"Test Error = ${(1.0 - accuracy)}")
 
     val treeModel = model.stages(2).asInstanceOf[DecisionTreeClassificationModel]
-    println("Learned classification tree model:\n" + treeModel.toDebugString)
+    println(s"Learned classification tree model:\n ${treeModel.toDebugString}")
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala
index ee61200ad1d0c..aaaecaea47081 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala
@@ -73,10 +73,10 @@ object DecisionTreeRegressionExample {
       .setPredictionCol("prediction")
       .setMetricName("rmse")
     val rmse = evaluator.evaluate(predictions)
-    println("Root Mean Squared Error (RMSE) on test data = " + rmse)
+    println(s"Root Mean Squared Error (RMSE) on test data = $rmse")
 
     val treeModel = model.stages(1).asInstanceOf[DecisionTreeRegressionModel]
-    println("Learned regression tree model:\n" + treeModel.toDebugString)
+    println(s"Learned regression tree model:\n ${treeModel.toDebugString}")
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
index d94d837d10e96..2dc11b07d88ef 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -53,7 +53,7 @@ object DeveloperApiExample {
     // Create a LogisticRegression instance. This instance is an Estimator.
     val lr = new MyLogisticRegression()
     // Print out the parameters, documentation, and any default values.
-    println("MyLogisticRegression parameters:\n" + lr.explainParams() + "\n")
+    println(s"MyLogisticRegression parameters:\n ${lr.explainParams()}")
 
     // We may set parameters using setter methods.
     lr.setMaxIter(10)
@@ -169,10 +169,10 @@ private class MyLogisticRegressionModel(
     Vectors.dense(-margin, margin)
   }
 
-  /** Number of classes the label can take. 2 indicates binary classification. */
+  // Number of classes the label can take. 2 indicates binary classification.
   override val numClasses: Int = 2
 
-  /** Number of features the model was trained on. */
+  // Number of features the model was trained on.
   override val numFeatures: Int = coefficients.size
 
   /**
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala
index f18d86e1a6921..e5d91f132a3f2 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala
@@ -46,7 +46,7 @@ object EstimatorTransformerParamExample {
     // Create a LogisticRegression instance. This instance is an Estimator.
     val lr = new LogisticRegression()
     // Print out the parameters, documentation, and any default values.
-    println("LogisticRegression parameters:\n" + lr.explainParams() + "\n")
+    println(s"LogisticRegression parameters:\n ${lr.explainParams()}\n")
 
     // We may set parameters using setter methods.
     lr.setMaxIter(10)
@@ -58,7 +58,7 @@ object EstimatorTransformerParamExample {
     // we can view the parameters it used during fit().
     // This prints the parameter (name: value) pairs, where names are unique IDs for this
     // LogisticRegression instance.
-    println("Model 1 was fit using parameters: " + model1.parent.extractParamMap)
+    println(s"Model 1 was fit using parameters: ${model1.parent.extractParamMap}")
 
     // We may alternatively specify parameters using a ParamMap,
     // which supports several methods for specifying parameters.
@@ -73,7 +73,7 @@ object EstimatorTransformerParamExample {
     // Now learn a new model using the paramMapCombined parameters.
     // paramMapCombined overrides all parameters set earlier via lr.set* methods.
     val model2 = lr.fit(training, paramMapCombined)
-    println("Model 2 was fit using parameters: " + model2.parent.extractParamMap)
+    println(s"Model 2 was fit using parameters: ${model2.parent.extractParamMap}")
 
     // Prepare test data.
     val test = spark.createDataFrame(Seq(
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala
index 3656773c8b817..ef78c0a1145ef 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala
@@ -86,10 +86,10 @@ object GradientBoostedTreeClassifierExample {
       .setPredictionCol("prediction")
       .setMetricName("accuracy")
     val accuracy = evaluator.evaluate(predictions)
-    println("Test Error = " + (1.0 - accuracy))
+    println(s"Test Error = ${1.0 - accuracy}")
 
     val gbtModel = model.stages(2).asInstanceOf[GBTClassificationModel]
-    println("Learned classification GBT model:\n" + gbtModel.toDebugString)
+    println(s"Learned classification GBT model:\n ${gbtModel.toDebugString}")
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala
index e53aab7f326d3..3feb2343f6a85 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala
@@ -73,10 +73,10 @@ object GradientBoostedTreeRegressorExample {
       .setPredictionCol("prediction")
       .setMetricName("rmse")
     val rmse = evaluator.evaluate(predictions)
-    println("Root Mean Squared Error (RMSE) on test data = " + rmse)
+    println(s"Root Mean Squared Error (RMSE) on test data = $rmse")
 
     val gbtModel = model.stages(1).asInstanceOf[GBTRegressionModel]
-    println("Learned regression GBT model:\n" + gbtModel.toDebugString)
+    println(s"Learned regression GBT model:\n ${gbtModel.toDebugString}")
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala
index a1d19e138dedb..2bc8184e623ff 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala
@@ -21,6 +21,7 @@ package org.apache.spark.examples.ml
 
 // $example on$
 import org.apache.spark.ml.clustering.KMeans
+import org.apache.spark.ml.evaluation.ClusteringEvaluator
 // $example off$
 import org.apache.spark.sql.SparkSession
 
@@ -47,9 +48,14 @@ object KMeansExample {
     val kmeans = new KMeans().setK(2).setSeed(1L)
     val model = kmeans.fit(dataset)
 
-    // Evaluate clustering by computing Within Set Sum of Squared Errors.
-    val WSSSE = model.computeCost(dataset)
-    println(s"Within Set Sum of Squared Errors = $WSSSE")
+    // Make predictions
+    val predictions = model.transform(dataset)
+
+    // Evaluate clustering by computing Silhouette score
+    val evaluator = new ClusteringEvaluator()
+
+    val silhouette = evaluator.evaluate(predictions)
+    println(s"Silhouette with squared euclidean distance = $silhouette")
 
     // Shows the result.
     println("Cluster Centers: ")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MulticlassLogisticRegressionWithElasticNetExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MulticlassLogisticRegressionWithElasticNetExample.scala
index 42f0ace7a353d..3e61dbe628c20 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/MulticlassLogisticRegressionWithElasticNetExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/MulticlassLogisticRegressionWithElasticNetExample.scala
@@ -48,7 +48,7 @@ object MulticlassLogisticRegressionWithElasticNetExample {
 
     // Print the coefficients and intercept for multinomial logistic regression
     println(s"Coefficients: \n${lrModel.coefficientMatrix}")
-    println(s"Intercepts: ${lrModel.interceptVector}")
+    println(s"Intercepts: \n${lrModel.interceptVector}")
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala
index 6fce82d294f8d..646f46a925062 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala
@@ -66,7 +66,7 @@ object MultilayerPerceptronClassifierExample {
     val evaluator = new MulticlassClassificationEvaluator()
       .setMetricName("accuracy")
 
-    println("Test set accuracy = " + evaluator.evaluate(predictionAndLabels))
+    println(s"Test set accuracy = ${evaluator.evaluate(predictionAndLabels)}")
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala
index bd9fcc420a66c..50c70c626b128 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala
@@ -52,7 +52,7 @@ object NaiveBayesExample {
       .setPredictionCol("prediction")
       .setMetricName("accuracy")
     val accuracy = evaluator.evaluate(predictions)
-    println("Test set accuracy = " + accuracy)
+    println(s"Test set accuracy = $accuracy")
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
index aedb9e7d3bb70..0fe16fb6dfa9f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
@@ -36,7 +36,7 @@ object QuantileDiscretizerExample {
     // Output of QuantileDiscretizer for such small datasets can depend on the number of
     // partitions. Here we force a single partition to ensure consistent results.
     // Note this is not necessary for normal use cases
-        .repartition(1)
+      .repartition(1)
 
     // $example on$
     val discretizer = new QuantileDiscretizer()
@@ -45,7 +45,7 @@ object QuantileDiscretizerExample {
       .setNumBuckets(3)
 
     val result = discretizer.fit(df).transform(df)
-    result.show()
+    result.show(false)
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala
index 5eafda8ce4285..6265f83902528 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala
@@ -85,10 +85,10 @@ object RandomForestClassifierExample {
       .setPredictionCol("prediction")
       .setMetricName("accuracy")
     val accuracy = evaluator.evaluate(predictions)
-    println("Test Error = " + (1.0 - accuracy))
+    println(s"Test Error = ${(1.0 - accuracy)}")
 
     val rfModel = model.stages(2).asInstanceOf[RandomForestClassificationModel]
-    println("Learned classification forest model:\n" + rfModel.toDebugString)
+    println(s"Learned classification forest model:\n ${rfModel.toDebugString}")
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala
index 9a0a001c26ef5..2679fcb353a8a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala
@@ -72,10 +72,10 @@ object RandomForestRegressorExample {
       .setPredictionCol("prediction")
       .setMetricName("rmse")
     val rmse = evaluator.evaluate(predictions)
-    println("Root Mean Squared Error (RMSE) on test data = " + rmse)
+    println(s"Root Mean Squared Error (RMSE) on test data = $rmse")
 
     val rfModel = model.stages(1).asInstanceOf[RandomForestRegressionModel]
-    println("Learned regression forest model:\n" + rfModel.toDebugString)
+    println(s"Learned regression forest model:\n ${rfModel.toDebugString}")
     // $example off$
 
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
index afa761aee0b98..96bb8ea2338af 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
@@ -41,8 +41,8 @@ object VectorIndexerExample {
     val indexerModel = indexer.fit(data)
 
     val categoricalFeatures: Set[Int] = indexerModel.categoryMaps.keys.toSet
-    println(s"Chose ${categoricalFeatures.size} categorical features: " +
-      categoricalFeatures.mkString(", "))
+    println(s"Chose ${categoricalFeatures.size} " +
+      s"categorical features: ${categoricalFeatures.mkString(", ")}")
 
     // Create new column "indexed" with categorical values transformed to indices
     val indexedData = indexerModel.transform(data)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala
index ff44de56839e5..a07535bb5a38d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala
@@ -42,9 +42,8 @@ object AssociationRulesExample {
     val results = ar.run(freqItemsets)
 
     results.collect().foreach { rule =>
-      println("[" + rule.antecedent.mkString(",")
-        + "=>"
-        + rule.consequent.mkString(",") + "]," + rule.confidence)
+    println(s"[${rule.antecedent.mkString(",")}=>${rule.consequent.mkString(",")} ]" +
+        s" ${rule.confidence}")
     }
     // $example off$
 
@@ -53,3 +52,4 @@ object AssociationRulesExample {
 
 }
 // scalastyle:on println
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala
index b9263ac6fcff6..c6312d71cc912 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala
@@ -86,7 +86,7 @@ object BinaryClassificationMetricsExample {
 
     // AUPRC
     val auPRC = metrics.areaUnderPR
-    println("Area under precision-recall curve = " + auPRC)
+    println(s"Area under precision-recall curve = $auPRC")
 
     // Compute thresholds used in ROC and PR curves
     val thresholds = precision.map(_._1)
@@ -96,7 +96,7 @@ object BinaryClassificationMetricsExample {
 
     // AUROC
     val auROC = metrics.areaUnderROC
-    println("Area under ROC = " + auROC)
+    println(s"Area under ROC = $auROC")
     // $example off$
     sc.stop()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeClassificationExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeClassificationExample.scala
index b50b4592777ce..c2f89b72c9a2e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeClassificationExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeClassificationExample.scala
@@ -55,8 +55,8 @@ object DecisionTreeClassificationExample {
       (point.label, prediction)
     }
     val testErr = labelAndPreds.filter(r => r._1 != r._2).count().toDouble / testData.count()
-    println("Test Error = " + testErr)
-    println("Learned classification tree model:\n" + model.toDebugString)
+    println(s"Test Error = $testErr")
+    println(s"Learned classification tree model:\n ${model.toDebugString}")
 
     // Save and load model
     model.save(sc, "target/tmp/myDecisionTreeClassificationModel")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRegressionExample.scala
index 2af45afae3d5b..1ecf6426e1f95 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRegressionExample.scala
@@ -54,8 +54,8 @@ object DecisionTreeRegressionExample {
       (point.label, prediction)
     }
     val testMSE = labelsAndPredictions.map{ case (v, p) => math.pow(v - p, 2) }.mean()
-    println("Test Mean Squared Error = " + testMSE)
-    println("Learned regression tree model:\n" + model.toDebugString)
+    println(s"Test Mean Squared Error = $testMSE")
+    println(s"Learned regression tree model:\n ${model.toDebugString}")
 
     // Save and load model
     model.save(sc, "target/tmp/myDecisionTreeRegressionModel")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala
index 6435abc127752..f724ee1030f04 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala
@@ -74,7 +74,7 @@ object FPGrowthExample {
     println(s"Number of frequent itemsets: ${model.freqItemsets.count()}")
 
     model.freqItemsets.collect().foreach { itemset =>
-      println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq)
+      println(s"${itemset.items.mkString("[", ",", "]")}, ${itemset.freq}")
     }
 
     sc.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingClassificationExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingClassificationExample.scala
index 00bb3348d2a36..3c56e1941aeca 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingClassificationExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingClassificationExample.scala
@@ -54,8 +54,8 @@ object GradientBoostingClassificationExample {
       (point.label, prediction)
     }
     val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData.count()
-    println("Test Error = " + testErr)
-    println("Learned classification GBT model:\n" + model.toDebugString)
+    println(s"Test Error = $testErr")
+    println(s"Learned classification GBT model:\n ${model.toDebugString}")
 
     // Save and load model
     model.save(sc, "target/tmp/myGradientBoostingClassificationModel")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingRegressionExample.scala
index d8c263460839b..c288bf29bf255 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingRegressionExample.scala
@@ -53,8 +53,8 @@ object GradientBoostingRegressionExample {
       (point.label, prediction)
     }
     val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean()
-    println("Test Mean Squared Error = " + testMSE)
-    println("Learned regression GBT model:\n" + model.toDebugString)
+    println(s"Test Mean Squared Error = $testMSE")
+    println(s"Learned regression GBT model:\n ${model.toDebugString}")
 
     // Save and load model
     model.save(sc, "target/tmp/myGradientBoostingRegressionModel")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
index 0d391a3637c07..add1719739539 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
@@ -68,7 +68,7 @@ object HypothesisTestingExample {
     // against the label.
     val featureTestResults: Array[ChiSqTestResult] = Statistics.chiSqTest(obs)
     featureTestResults.zipWithIndex.foreach { case (k, v) =>
-      println("Column " + (v + 1).toString + ":")
+      println(s"Column ${(v + 1)} :")
       println(k)
     }  // summary of the test
     // $example off$
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/IsotonicRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/IsotonicRegressionExample.scala
index 4aee951f5b04c..a10d6f0dda880 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/IsotonicRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/IsotonicRegressionExample.scala
@@ -56,7 +56,7 @@ object IsotonicRegressionExample {
 
     // Calculate mean squared error between predicted and real labels.
     val meanSquaredError = predictionAndLabel.map { case (p, l) => math.pow((p - l), 2) }.mean()
-    println("Mean Squared Error = " + meanSquaredError)
+    println(s"Mean Squared Error = $meanSquaredError")
 
     // Save and load model
     model.save(sc, "target/tmp/myIsotonicRegressionModel")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala
index c4d71d862f375..b0a6f1671a898 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala
@@ -43,7 +43,7 @@ object KMeansExample {
 
     // Evaluate clustering by computing Within Set Sum of Squared Errors
     val WSSSE = clusters.computeCost(parsedData)
-    println("Within Set Sum of Squared Errors = " + WSSSE)
+    println(s"Within Set Sum of Squared Errors = $WSSSE")
 
     // Save and load model
     clusters.save(sc, "target/org/apache/spark/KMeansExample/KMeansModel")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LBFGSExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LBFGSExample.scala
index fedcefa098381..123782fa6b9cf 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LBFGSExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LBFGSExample.scala
@@ -82,7 +82,7 @@ object LBFGSExample {
 
     println("Loss of each step in training process")
     loss.foreach(println)
-    println("Area under ROC = " + auROC)
+    println(s"Area under ROC = $auROC")
     // $example off$
 
     sc.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala
index f2c8ec01439f1..d25962c5500ed 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala
@@ -42,11 +42,13 @@ object LatentDirichletAllocationExample {
     val ldaModel = new LDA().setK(3).run(corpus)
 
     // Output topics. Each is a distribution over words (matching word count vectors)
-    println("Learned topics (as distributions over vocab of " + ldaModel.vocabSize + " words):")
+    println(s"Learned topics (as distributions over vocab of ${ldaModel.vocabSize} words):")
     val topics = ldaModel.topicsMatrix
     for (topic <- Range(0, 3)) {
-      print("Topic " + topic + ":")
-      for (word <- Range(0, ldaModel.vocabSize)) { print(" " + topics(word, topic)); }
+      print(s"Topic $topic :")
+      for (word <- Range(0, ldaModel.vocabSize)) {
+        print(s"${topics(word, topic)}")
+      }
       println()
     }
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
index d399618094487..449b725d1d173 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
@@ -52,7 +52,7 @@ object LinearRegressionWithSGDExample {
       (point.label, prediction)
     }
     val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2) }.mean()
-    println("training Mean Squared Error = " + MSE)
+    println(s"training Mean Squared Error $MSE")
 
     // Save and load model
     model.save(sc, "target/tmp/scalaLinearRegressionWithSGDModel")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala
index eb36697d94ba1..eff2393cc3abe 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala
@@ -65,8 +65,8 @@ object PCAExample {
     val MSE = valuesAndPreds.map { case (v, p) => math.pow((v - p), 2) }.mean()
     val MSE_pca = valuesAndPreds_pca.map { case (v, p) => math.pow((v - p), 2) }.mean()
 
-    println("Mean Squared Error = " + MSE)
-    println("PCA Mean Squared Error = " + MSE_pca)
+    println(s"Mean Squared Error = $MSE")
+    println(s"PCA Mean Squared Error = $MSE_pca")
     // $example off$
 
     sc.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PMMLModelExportExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PMMLModelExportExample.scala
index d74d74a37fb11..96deafd469bc7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/PMMLModelExportExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/PMMLModelExportExample.scala
@@ -41,7 +41,7 @@ object PMMLModelExportExample {
     val clusters = KMeans.train(parsedData, numClusters, numIterations)
 
     // Export to PMML to a String in PMML format
-    println("PMML Model:\n" + clusters.toPMML)
+    println(s"PMML Model:\n ${clusters.toPMML}")
 
     // Export the model to a local file in PMML format
     clusters.toPMML("/tmp/kmeans.xml")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala
index 69c72c4336576..8b789277774af 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala
@@ -42,8 +42,8 @@ object PrefixSpanExample {
     val model = prefixSpan.run(sequences)
     model.freqSequences.collect().foreach { freqSequence =>
       println(
-        freqSequence.sequence.map(_.mkString("[", ", ", "]")).mkString("[", ", ", "]") +
-          ", " + freqSequence.freq)
+        s"${freqSequence.sequence.map(_.mkString("[", ", ", "]")).mkString("[", ", ", "]")}," +
+          s" ${freqSequence.freq}")
     }
     // $example off$
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestClassificationExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestClassificationExample.scala
index f1ebdf1a733ed..246e71de25615 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestClassificationExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestClassificationExample.scala
@@ -55,8 +55,8 @@ object RandomForestClassificationExample {
       (point.label, prediction)
     }
     val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData.count()
-    println("Test Error = " + testErr)
-    println("Learned classification forest model:\n" + model.toDebugString)
+    println(s"Test Error = $testErr")
+    println(s"Learned classification forest model:\n ${model.toDebugString}")
 
     // Save and load model
     model.save(sc, "target/tmp/myRandomForestClassificationModel")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestRegressionExample.scala
index 11d612e651b4b..770e30276bc30 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestRegressionExample.scala
@@ -55,8 +55,8 @@ object RandomForestRegressionExample {
       (point.label, prediction)
     }
     val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean()
-    println("Test Mean Squared Error = " + testMSE)
-    println("Learned regression forest model:\n" + model.toDebugString)
+    println(s"Test Mean Squared Error = $testMSE")
+    println(s"Learned regression forest model:\n ${model.toDebugString}")
 
     // Save and load model
     model.save(sc, "target/tmp/myRandomForestRegressionModel")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RecommendationExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RecommendationExample.scala
index 6df742d737e70..0bb2b8c8c2b43 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RecommendationExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RecommendationExample.scala
@@ -56,7 +56,7 @@ object RecommendationExample {
       val err = (r1 - r2)
       err * err
     }.mean()
-    println("Mean Squared Error = " + MSE)
+    println(s"Mean Squared Error = $MSE")
 
     // Save and load model
     model.save(sc, "target/tmp/myCollaborativeFilter")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala
index b73fe9b2b3faa..285e2ce512639 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala
@@ -57,7 +57,7 @@ object SVMWithSGDExample {
     val metrics = new BinaryClassificationMetrics(scoreAndLabels)
     val auROC = metrics.areaUnderROC()
 
-    println("Area under ROC = " + auROC)
+    println(s"Area under ROC = $auROC")
 
     // Save and load model
     model.save(sc, "target/tmp/scalaSVMWithSGDModel")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala
index b5c3033bcba09..694c3bb18b045 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala
@@ -42,15 +42,13 @@ object SimpleFPGrowth {
     val model = fpg.run(transactions)
 
     model.freqItemsets.collect().foreach { itemset =>
-      println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq)
+      println(s"${itemset.items.mkString("[", ",", "]")},${itemset.freq}")
     }
 
     val minConfidence = 0.8
     model.generateAssociationRules(minConfidence).collect().foreach { rule =>
-      println(
-        rule.antecedent.mkString("[", ",", "]")
-          + " => " + rule.consequent .mkString("[", ",", "]")
-          + ", " + rule.confidence)
+      println(s"${rule.antecedent.mkString("[", ",", "]")}=> " +
+        s"${rule.consequent .mkString("[", ",", "]")},${rule.confidence}")
     }
     // $example off$
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala
index 16b074ef60699..3d41bef0af88c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala
@@ -41,10 +41,10 @@ object StratifiedSamplingExample {
     val exactSample = data.sampleByKeyExact(withReplacement = false, fractions = fractions)
     // $example off$
 
-    println("approxSample size is " + approxSample.collect().size.toString)
+    println(s"approxSample size is ${approxSample.collect().size}")
     approxSample.collect().foreach(println)
 
-    println("exactSample its size is " + exactSample.collect().size.toString)
+    println(s"exactSample its size is ${exactSample.collect().size}")
     exactSample.collect().foreach(println)
 
     sc.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala
index 03bc675299c5a..071d341b81614 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala
@@ -54,7 +54,7 @@ object TallSkinnyPCA {
     // Compute principal components.
     val pc = mat.computePrincipalComponents(mat.numCols().toInt)
 
-    println("Principal components are:\n" + pc)
+    println(s"Principal components are:\n $pc")
 
     sc.stop()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala
index 067e49b9599e7..8ae6de16d80e7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala
@@ -54,7 +54,7 @@ object TallSkinnySVD {
     // Compute SVD.
     val svd = mat.computeSVD(mat.numCols().toInt)
 
-    println("Singular values are " + svd.s)
+    println(s"Singular values are ${svd.s}")
 
     sc.stop()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
index f9477969a4bb5..7d83aacb11548 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
@@ -67,15 +67,15 @@ object SQLDataSourceExample {
     usersDF.write.partitionBy("favorite_color").format("parquet").save("namesPartByColor.parquet")
     // $example off:write_partitioning$
     // $example on:write_partition_and_bucket$
-    peopleDF
+    usersDF
       .write
       .partitionBy("favorite_color")
       .bucketBy(42, "name")
-      .saveAsTable("people_partitioned_bucketed")
+      .saveAsTable("users_partitioned_bucketed")
     // $example off:write_partition_and_bucket$
 
     spark.sql("DROP TABLE IF EXISTS people_bucketed")
-    spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed")
+    spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed")
   }
 
   private def runBasicParquetExample(spark: SparkSession): Unit = {
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
index e5f75d53edc86..70fb5b27366ec 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
@@ -19,8 +19,7 @@ package org.apache.spark.examples.sql.hive
 // $example on:spark_hive$
 import java.io.File
 
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{Row, SaveMode, SparkSession}
 // $example off:spark_hive$
 
 object SparkHiveExample {
@@ -102,8 +101,54 @@ object SparkHiveExample {
     // |  4| val_4|  4| val_4|
     // |  5| val_5|  5| val_5|
     // ...
-    // $example off:spark_hive$
+
+    // Create a Hive managed Parquet table, with HQL syntax instead of the Spark SQL native syntax
+    // `USING hive`
+    sql("CREATE TABLE hive_records(key int, value string) STORED AS PARQUET")
+    // Save DataFrame to the Hive managed table
+    val df = spark.table("src")
+    df.write.mode(SaveMode.Overwrite).saveAsTable("hive_records")
+    // After insertion, the Hive managed table has data now
+    sql("SELECT * FROM hive_records").show()
+    // +---+-------+
+    // |key|  value|
+    // +---+-------+
+    // |238|val_238|
+    // | 86| val_86|
+    // |311|val_311|
+    // ...
+
+    // Prepare a Parquet data directory
+    val dataDir = "/tmp/parquet_data"
+    spark.range(10).write.parquet(dataDir)
+    // Create a Hive external Parquet table
+    sql(s"CREATE EXTERNAL TABLE hive_ints(key int) STORED AS PARQUET LOCATION '$dataDir'")
+    // The Hive external table should already have data
+    sql("SELECT * FROM hive_ints").show()
+    // +---+
+    // |key|
+    // +---+
+    // |  0|
+    // |  1|
+    // |  2|
+    // ...
+
+    // Turn on flag for Hive Dynamic Partitioning
+    spark.sqlContext.setConf("hive.exec.dynamic.partition", "true")
+    spark.sqlContext.setConf("hive.exec.dynamic.partition.mode", "nonstrict")
+    // Create a Hive partitioned table using DataFrame API
+    df.write.partitionBy("key").format("hive").saveAsTable("hive_part_tbl")
+    // Partitioned column `key` will be moved to the end of the schema.
+    sql("SELECT * FROM hive_part_tbl").show()
+    // +-------+---+
+    // |  value|key|
+    // +-------+---+
+    // |val_238|238|
+    // | val_86| 86|
+    // |val_311|311|
+    // ...
 
     spark.stop()
+    // $example off:spark_hive$
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
index 43044d01b1204..25c7bf2871972 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
@@ -82,9 +82,9 @@ class CustomReceiver(host: String, port: Int)
    var socket: Socket = null
    var userInput: String = null
    try {
-     logInfo("Connecting to " + host + ":" + port)
+     logInfo(s"Connecting to $host : $port")
      socket = new Socket(host, port)
-     logInfo("Connected to " + host + ":" + port)
+     logInfo(s"Connected to $host : $port")
      val reader = new BufferedReader(
        new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8))
      userInput = reader.readLine()
@@ -98,7 +98,7 @@ class CustomReceiver(host: String, port: Int)
      restart("Trying to connect again")
    } catch {
      case e: java.net.ConnectException =>
-       restart("Error connecting to " + host + ":" + port, e)
+       restart(s"Error connecting to $host : $port", e)
      case t: Throwable =>
        restart("Error receiving data", t)
    }
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala
index 5322929d177b4..437ccf0898d7c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala
@@ -54,7 +54,7 @@ object RawNetworkGrep {
       ssc.rawSocketStream[String](host, port, StorageLevel.MEMORY_ONLY_SER_2)).toArray
     val union = ssc.union(rawStreams)
     union.filter(_.contains("the")).count().foreachRDD(r =>
-      println("Grep count: " + r.collect().mkString))
+      println(s"Grep count: ${r.collect().mkString}"))
     ssc.start()
     ssc.awaitTermination()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
index 49c0427321133..f018f3a26d2e9 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
@@ -130,10 +130,10 @@ object RecoverableNetworkWordCount {
           true
         }
       }.collect().mkString("[", ", ", "]")
-      val output = "Counts at time " + time + " " + counts
+      val output = s"Counts at time $time $counts"
       println(output)
-      println("Dropped " + droppedWordsCounter.value + " word(s) totally")
-      println("Appending to " + outputFile.getAbsolutePath)
+      println(s"Dropped ${droppedWordsCounter.value} word(s) totally")
+      println(s"Appending to ${outputFile.getAbsolutePath}")
       Files.append(output + "\n", outputFile, Charset.defaultCharset())
     }
     ssc
@@ -141,7 +141,7 @@ object RecoverableNetworkWordCount {
 
   def main(args: Array[String]) {
     if (args.length != 4) {
-      System.err.println("Your arguments were " + args.mkString("[", ", ", "]"))
+      System.err.println(s"Your arguments were ${args.mkString("[", ", ", "]")}")
       System.err.println(
         """
           |Usage: RecoverableNetworkWordCount <hostname> <port> <checkpoint-directory>
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
index 0ddd065f0db2b..2108bc63edea2 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
@@ -90,13 +90,13 @@ object PageViewGenerator {
     val viewsPerSecond = args(1).toFloat
     val sleepDelayMs = (1000.0 / viewsPerSecond).toInt
     val listener = new ServerSocket(port)
-    println("Listening on port: " + port)
+    println(s"Listening on port: $port")
 
     while (true) {
       val socket = listener.accept()
       new Thread() {
         override def run(): Unit = {
-          println("Got client connected from: " + socket.getInetAddress)
+          println(s"Got client connected from: ${socket.getInetAddress}")
           val out = new PrintWriter(socket.getOutputStream(), true)
 
           while (true) {
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
index 1ba093f57b32c..b8e7c7e9e9152 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
@@ -104,8 +104,8 @@ object PageViewStream {
           .foreachRDD((rdd, time) => rdd.join(userList)
             .map(_._2._2)
             .take(10)
-            .foreach(u => println("Saw user %s at time %s".format(u, time))))
-      case _ => println("Invalid metric entered: " + metric)
+            .foreach(u => println(s"Saw user $u at time $time")))
+      case _ => println(s"Invalid metric entered: $metric")
     }
 
     ssc.start()
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 485b562dce990..431339d412194 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index 90343182712ed..8512496e5fe52 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -18,11 +18,12 @@
 package org.apache.spark.sql.jdbc
 
 import java.sql.{Connection, Date, Timestamp}
-import java.util.Properties
+import java.util.{Properties, TimeZone}
 import java.math.BigDecimal
 
-import org.apache.spark.sql.{DataFrame, Row, SaveMode}
-import org.apache.spark.sql.execution.{WholeStageCodegenExec, RowDataSourceScanExec}
+import org.apache.spark.sql.{DataFrame, QueryTest, Row, SaveMode}
+import org.apache.spark.sql.execution.{RowDataSourceScanExec, WholeStageCodegenExec}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
@@ -77,6 +78,9 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
     conn.prepareStatement(
       "INSERT INTO ts_with_timezone VALUES " +
         "(1, to_timestamp_tz('1999-12-01 11:00:00 UTC','YYYY-MM-DD HH:MI:SS TZR'))").executeUpdate()
+    conn.prepareStatement(
+      "INSERT INTO ts_with_timezone VALUES " +
+        "(2, to_timestamp_tz('1999-12-01 12:00:00 PST','YYYY-MM-DD HH:MI:SS TZR'))").executeUpdate()
     conn.commit()
 
     conn.prepareStatement(
@@ -235,6 +239,63 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
     assert(types(1).equals("class java.sql.Timestamp"))
   }
 
+  test("Column type TIMESTAMP with SESSION_LOCAL_TIMEZONE is different from default") {
+    val defaultJVMTimeZone = TimeZone.getDefault
+    // Pick the timezone different from the current default time zone of JVM
+    val sofiaTimeZone = TimeZone.getTimeZone("Europe/Sofia")
+    val shanghaiTimeZone = TimeZone.getTimeZone("Asia/Shanghai")
+    val localSessionTimeZone =
+      if (defaultJVMTimeZone == shanghaiTimeZone) sofiaTimeZone else shanghaiTimeZone
+
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> localSessionTimeZone.getID) {
+      val e = intercept[java.sql.SQLException] {
+        val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties)
+        dfRead.collect()
+      }.getMessage
+      assert(e.contains("Unrecognized SQL type -101"))
+    }
+  }
+
+  /**
+   * Change the Time Zone `timeZoneId` of JVM before executing `f`, then switches back to the
+   * original after `f` returns.
+   * @param timeZoneId the ID for a TimeZone, either an abbreviation such as "PST", a full name such
+   *                   as "America/Los_Angeles", or a custom ID such as "GMT-8:00".
+   */
+  private def withTimeZone(timeZoneId: String)(f: => Unit): Unit = {
+    val originalLocale = TimeZone.getDefault
+    try {
+      // Add Locale setting
+      TimeZone.setDefault(TimeZone.getTimeZone(timeZoneId))
+      f
+    } finally {
+      TimeZone.setDefault(originalLocale)
+    }
+  }
+
+  test("Column TIMESTAMP with TIME ZONE(JVM timezone)") {
+    def checkRow(row: Row, ts: String): Unit = {
+      assert(row.getTimestamp(1).equals(Timestamp.valueOf(ts)))
+    }
+
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> TimeZone.getDefault.getID) {
+      val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties)
+      withTimeZone("PST") {
+        assert(dfRead.collect().toSet ===
+          Set(
+            Row(BigDecimal.valueOf(1), java.sql.Timestamp.valueOf("1999-12-01 03:00:00")),
+            Row(BigDecimal.valueOf(2), java.sql.Timestamp.valueOf("1999-12-01 12:00:00"))))
+      }
+
+      withTimeZone("UTC") {
+        assert(dfRead.collect().toSet ===
+          Set(
+            Row(BigDecimal.valueOf(1), java.sql.Timestamp.valueOf("1999-12-01 11:00:00")),
+            Row(BigDecimal.valueOf(2), java.sql.Timestamp.valueOf("1999-12-01 20:00:00"))))
+      }
+    }
+  }
+
   test("SPARK-18004: Make sure date or timestamp related predicate is pushed down correctly") {
     val props = new Properties()
     props.put("oracle.jdbc.mapDateToTimestamp", "false")
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index 48aba90afc787..be32cb89f4886 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -151,6 +151,8 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
 
   test("SPARK-20557: column type TIMESTAMP with TIME ZONE and TIME with TIME ZONE " +
     "should be recognized") {
+    // When using JDBC to read the columns of TIMESTAMP with TIME ZONE and TIME with TIME ZONE
+    // the actual types are java.sql.Types.TIMESTAMP and java.sql.Types.TIME
     val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties)
     val rows = dfRead.collect()
     val types = rows(0).toSeq.map(x => x.getClass.toString)
diff --git a/external/docker/spark-test/base/Dockerfile b/external/docker/spark-test/base/Dockerfile
index 5a95a9387c310..c70cd71367679 100644
--- a/external/docker/spark-test/base/Dockerfile
+++ b/external/docker/spark-test/base/Dockerfile
@@ -15,14 +15,14 @@
 # limitations under the License.
 #
 
-FROM ubuntu:precise
+FROM ubuntu:xenial
 
 # Upgrade package index
-# install a few other useful packages plus Open Jdk 7
+# install a few other useful packages plus Open Jdk 8
 # Remove unneeded /var/lib/apt/lists/* after install to reduce the
 # docker image size (by ~30MB)
 RUN apt-get update && \
-    apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server && \
+    apt-get install -y less openjdk-8-jre-headless iproute2 vim-tiny sudo openssh-server && \
     rm -rf /var/lib/apt/lists/*
 
 ENV SCALA_VERSION 2.11.8
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 71016bc645ca7..7cd1ec4c9c09a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 12630840e79dc..f810aa80e8780 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 87a09642405a7..498e88f665eb5 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index d6f97316b326a..a742b8d6dbddb 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 0c9f0aa765a39..16bbc6db641ca 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
index 9d9e2aaba8079..66b3409c0cd04 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
@@ -52,7 +52,7 @@ private[kafka010] case class KafkaSourceRDDPartition(
  * An RDD that reads data from Kafka based on offset ranges across multiple partitions.
  * Additionally, it allows preferred locations to be set for each topic + partition, so that
  * the [[KafkaSource]] can ensure the same executor always reads the same topic + partition
- * and cached KafkaConsuemrs (see [[CachedKafkaConsumer]] can be used read data efficiently.
+ * and cached KafkaConsumers (see [[CachedKafkaConsumer]] can be used read data efficiently.
  *
  * @param sc the [[SparkContext]]
  * @param executorKafkaParams Kafka configuration for creating KafkaConsumer on the executors
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index dcee3c15b91f6..2034b9be07f24 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -977,8 +977,7 @@ class KafkaSourceStressForDontFailOnDataLossSuite extends StreamTest with Shared
     }
   }
 
-  // flakes in palantir/spark
-  ignore("stress test for failOnDataLoss=false") {
+  test("stress test for failOnDataLoss=false") {
     val reader = spark
       .readStream
       .format("kafka")
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index 2df8352f48660..75245943c4936 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -201,7 +201,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
     verifyTopicDeletionWithRetries(zkUtils, topic, partitions, List(this.server))
   }
 
-  /** Add new paritions to a Kafka topic */
+  /** Add new partitions to a Kafka topic */
   def addPartitions(topic: String, partitions: Int): Unit = {
     AdminUtils.addPartitions(zkUtils, topic, partitions)
     // wait until metadata is propagated
@@ -296,7 +296,9 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
     props.put("replica.socket.timeout.ms", "1500")
     props.put("delete.topic.enable", "true")
     props.put("offsets.topic.num.partitions", "1")
-    props.putAll(withBrokerProps.asJava)
+    // Can not use properties.putAll(propsMap.asJava) in scala-2.12
+    // See https://github.com/scala/bug/issues/10418
+    withBrokerProps.foreach { case (k, v) => props.put(k, v) }
     props
   }
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 6eb7ba5f0092d..3b124b2a69d50 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaKafkaRDDSuite.java b/external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaKafkaRDDSuite.java
index 87bfe1514e338..b20fad2291262 100644
--- a/external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaKafkaRDDSuite.java
+++ b/external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaKafkaRDDSuite.java
@@ -111,7 +111,7 @@ public String call(ConsumerRecord<String, String> r) {
         LocationStrategies.PreferConsistent()
     ).map(handler);
 
-    // just making sure the java user apis work; the scala tests handle logic corner cases
+    // just making sure the java user APIs work; the scala tests handle logic corner cases
     long count1 = rdd1.count();
     long count2 = rdd2.count();
     Assert.assertTrue(count1 > 0);
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 786349474389b..41bc8b3e3ee1f 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 849c8b465f99e..6d1c4789f382d 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 48783d65826aa..37c7d1e604ec5 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 40a751a652fa9..4915893965595 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/src/main/java/org/apache/spark/streaming/kinesis/KinesisInitialPositions.java b/external/kinesis-asl/src/main/java/org/apache/spark/streaming/kinesis/KinesisInitialPositions.java
new file mode 100644
index 0000000000000..b5f5ab0e90540
--- /dev/null
+++ b/external/kinesis-asl/src/main/java/org/apache/spark/streaming/kinesis/KinesisInitialPositions.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis;
+
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
+
+import java.io.Serializable;
+import java.util.Date;
+
+/**
+ * A java wrapper for exposing [[InitialPositionInStream]]
+ * to the corresponding Kinesis readers.
+ */
+interface KinesisInitialPosition {
+    InitialPositionInStream getPosition();
+}
+
+public class KinesisInitialPositions {
+    public static class Latest implements KinesisInitialPosition, Serializable {
+        public Latest() {}
+
+        @Override
+        public InitialPositionInStream getPosition() {
+            return InitialPositionInStream.LATEST;
+        }
+    }
+
+    public static class TrimHorizon implements KinesisInitialPosition, Serializable {
+        public TrimHorizon() {}
+
+        @Override
+        public InitialPositionInStream getPosition() {
+            return InitialPositionInStream.TRIM_HORIZON;
+        }
+    }
+
+    public static class AtTimestamp implements KinesisInitialPosition, Serializable {
+        private Date timestamp;
+
+        public AtTimestamp(Date timestamp) {
+            this.timestamp = timestamp;
+        }
+
+        @Override
+        public InitialPositionInStream getPosition() {
+            return InitialPositionInStream.AT_TIMESTAMP;
+        }
+
+        public Date getTimestamp() {
+            return timestamp;
+        }
+    }
+
+
+    /**
+     * Returns instance of [[KinesisInitialPosition]] based on the passed
+     * [[InitialPositionInStream]]. This method is used in KinesisUtils for translating the
+     * InitialPositionInStream to InitialPosition. This function would be removed when we deprecate
+     * the KinesisUtils.
+     *
+     * @return [[InitialPosition]]
+     */
+    public static KinesisInitialPosition fromKinesisInitialPosition(
+            InitialPositionInStream initialPositionInStream) throws UnsupportedOperationException {
+        if (initialPositionInStream == InitialPositionInStream.LATEST) {
+            return new Latest();
+        } else if (initialPositionInStream == InitialPositionInStream.TRIM_HORIZON) {
+            return new TrimHorizon();
+        } else {
+            // InitialPositionInStream.AT_TIMESTAMP is not supported.
+            // Use InitialPosition.atTimestamp(timestamp) instead.
+            throw new UnsupportedOperationException(
+                    "Only InitialPositionInStream.LATEST and InitialPositionInStream." +
+                            "TRIM_HORIZON supported in initialPositionInStream(). Please use " +
+                            "the initialPosition() from builder API in KinesisInputDStream for " +
+                            "using InitialPositionInStream.AT_TIMESTAMP");
+        }
+    }
+}
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
index cde2c4b04c0c7..fcb790e3ea1f9 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -24,7 +24,6 @@ import scala.util.Random
 
 import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
 import com.amazonaws.services.kinesis.AmazonKinesisClient
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
 import com.amazonaws.services.kinesis.model.PutRecordRequest
 import org.apache.log4j.{Level, Logger}
 
@@ -33,9 +32,9 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Milliseconds, StreamingContext}
 import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
+import org.apache.spark.streaming.kinesis.KinesisInitialPositions.Latest
 import org.apache.spark.streaming.kinesis.KinesisInputDStream
 
-
 /**
  * Consumes messages from a Amazon Kinesis streams and does wordcount.
  *
@@ -139,7 +138,7 @@ object KinesisWordCountASL extends Logging {
         .streamName(streamName)
         .endpointUrl(endpointUrl)
         .regionName(regionName)
-        .initialPositionInStream(InitialPositionInStream.LATEST)
+        .initialPosition(new Latest())
         .checkpointAppName(appName)
         .checkpointInterval(kinesisCheckpointInterval)
         .storageLevel(StorageLevel.MEMORY_AND_DISK_2)
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
index f61e398e7bdd1..1ffec01df9f00 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
@@ -28,6 +28,7 @@ import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.streaming.{Duration, StreamingContext, Time}
 import org.apache.spark.streaming.api.java.JavaStreamingContext
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import org.apache.spark.streaming.kinesis.KinesisInitialPositions.Latest
 import org.apache.spark.streaming.receiver.Receiver
 import org.apache.spark.streaming.scheduler.ReceivedBlockInfo
 
@@ -36,7 +37,7 @@ private[kinesis] class KinesisInputDStream[T: ClassTag](
     val streamName: String,
     val endpointUrl: String,
     val regionName: String,
-    val initialPositionInStream: InitialPositionInStream,
+    val initialPosition: KinesisInitialPosition,
     val checkpointAppName: String,
     val checkpointInterval: Duration,
     val _storageLevel: StorageLevel,
@@ -77,7 +78,7 @@ private[kinesis] class KinesisInputDStream[T: ClassTag](
   }
 
   override def getReceiver(): Receiver[T] = {
-    new KinesisReceiver(streamName, endpointUrl, regionName, initialPositionInStream,
+    new KinesisReceiver(streamName, endpointUrl, regionName, initialPosition,
       checkpointAppName, checkpointInterval, _storageLevel, messageHandler,
       kinesisCreds, dynamoDBCreds, cloudWatchCreds)
   }
@@ -100,7 +101,7 @@ object KinesisInputDStream {
     // Params with defaults
     private var endpointUrl: Option[String] = None
     private var regionName: Option[String] = None
-    private var initialPositionInStream: Option[InitialPositionInStream] = None
+    private var initialPosition: Option[KinesisInitialPosition] = None
     private var checkpointInterval: Option[Duration] = None
     private var storageLevel: Option[StorageLevel] = None
     private var kinesisCredsProvider: Option[SparkAWSCredentials] = None
@@ -180,16 +181,32 @@ object KinesisInputDStream {
       this
     }
 
+    /**
+     * Sets the initial position data is read from in the Kinesis stream. Defaults to
+     * [[KinesisInitialPositions.Latest]] if no custom value is specified.
+     *
+     * @param initialPosition [[KinesisInitialPosition]] value specifying where Spark Streaming
+     *                        will start reading records in the Kinesis stream from
+     * @return Reference to this [[KinesisInputDStream.Builder]]
+     */
+    def initialPosition(initialPosition: KinesisInitialPosition): Builder = {
+      this.initialPosition = Option(initialPosition)
+      this
+    }
+
     /**
      * Sets the initial position data is read from in the Kinesis stream. Defaults to
      * [[InitialPositionInStream.LATEST]] if no custom value is specified.
+     * This function would be removed when we deprecate the KinesisUtils.
      *
      * @param initialPosition InitialPositionInStream value specifying where Spark Streaming
      *                        will start reading records in the Kinesis stream from
      * @return Reference to this [[KinesisInputDStream.Builder]]
      */
+    @deprecated("use initialPosition(initialPosition: KinesisInitialPosition)", "2.3.0")
     def initialPositionInStream(initialPosition: InitialPositionInStream): Builder = {
-      initialPositionInStream = Option(initialPosition)
+      this.initialPosition = Option(
+        KinesisInitialPositions.fromKinesisInitialPosition(initialPosition))
       this
     }
 
@@ -266,7 +283,7 @@ object KinesisInputDStream {
         getRequiredParam(streamName, "streamName"),
         endpointUrl.getOrElse(DEFAULT_KINESIS_ENDPOINT_URL),
         regionName.getOrElse(DEFAULT_KINESIS_REGION_NAME),
-        initialPositionInStream.getOrElse(DEFAULT_INITIAL_POSITION_IN_STREAM),
+        initialPosition.getOrElse(DEFAULT_INITIAL_POSITION),
         getRequiredParam(checkpointAppName, "checkpointAppName"),
         checkpointInterval.getOrElse(ssc.graph.batchDuration),
         storageLevel.getOrElse(DEFAULT_STORAGE_LEVEL),
@@ -293,7 +310,6 @@ object KinesisInputDStream {
    * Creates a [[KinesisInputDStream.Builder]] for constructing [[KinesisInputDStream]] instances.
    *
    * @since 2.2.0
-   *
    * @return [[KinesisInputDStream.Builder]] instance
    */
   def builder: Builder = new Builder
@@ -309,7 +325,6 @@ object KinesisInputDStream {
   private[kinesis] val DEFAULT_KINESIS_ENDPOINT_URL: String =
     "https://kinesis.us-east-1.amazonaws.com"
   private[kinesis] val DEFAULT_KINESIS_REGION_NAME: String = "us-east-1"
-  private[kinesis] val DEFAULT_INITIAL_POSITION_IN_STREAM: InitialPositionInStream =
-    InitialPositionInStream.LATEST
+  private[kinesis] val DEFAULT_INITIAL_POSITION: KinesisInitialPosition = new Latest()
   private[kinesis] val DEFAULT_STORAGE_LEVEL: StorageLevel = StorageLevel.MEMORY_AND_DISK_2
 }
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
index 1026d0fcb59bd..fa0de6298a5f1 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -24,12 +24,13 @@ import scala.collection.mutable
 import scala.util.control.NonFatal
 
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.{IRecordProcessor, IRecordProcessorCheckpointer, IRecordProcessorFactory}
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.{InitialPositionInStream, KinesisClientLibConfiguration, Worker}
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.{KinesisClientLibConfiguration, Worker}
 import com.amazonaws.services.kinesis.model.Record
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
 import org.apache.spark.streaming.Duration
+import org.apache.spark.streaming.kinesis.KinesisInitialPositions.AtTimestamp
 import org.apache.spark.streaming.receiver.{BlockGenerator, BlockGeneratorListener, Receiver}
 import org.apache.spark.util.Utils
 
@@ -56,12 +57,13 @@ import org.apache.spark.util.Utils
  * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
  * @param regionName  Region name used by the Kinesis Client Library for
  *                    DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
- * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
- *                                 worker's initial starting position in the stream.
- *                                 The values are either the beginning of the stream
- *                                 per Kinesis' limit of 24 hours
- *                                 (InitialPositionInStream.TRIM_HORIZON) or
- *                                 the tip of the stream (InitialPositionInStream.LATEST).
+ * @param initialPosition  Instance of [[KinesisInitialPosition]]
+ *                         In the absence of Kinesis checkpoint info, this is the
+ *                         worker's initial starting position in the stream.
+ *                         The values are either the beginning of the stream
+ *                         per Kinesis' limit of 24 hours
+ *                         ([[KinesisInitialPositions.TrimHorizon]]) or
+ *                         the tip of the stream ([[KinesisInitialPositions.Latest]]).
  * @param checkpointAppName  Kinesis application name. Kinesis Apps are mapped to Kinesis Streams
  *                 by the Kinesis Client Library.  If you change the App name or Stream name,
  *                 the KCL will throw errors.  This usually requires deleting the backing
@@ -83,7 +85,7 @@ private[kinesis] class KinesisReceiver[T](
     val streamName: String,
     endpointUrl: String,
     regionName: String,
-    initialPositionInStream: InitialPositionInStream,
+    initialPosition: KinesisInitialPosition,
     checkpointAppName: String,
     checkpointInterval: Duration,
     storageLevel: StorageLevel,
@@ -148,18 +150,29 @@ private[kinesis] class KinesisReceiver[T](
 
     kinesisCheckpointer = new KinesisCheckpointer(receiver, checkpointInterval, workerId)
     val kinesisProvider = kinesisCreds.provider
-    val kinesisClientLibConfiguration = new KinesisClientLibConfiguration(
-          checkpointAppName,
-          streamName,
-          kinesisProvider,
-          dynamoDBCreds.map(_.provider).getOrElse(kinesisProvider),
-          cloudWatchCreds.map(_.provider).getOrElse(kinesisProvider),
-          workerId)
+
+    val kinesisClientLibConfiguration = {
+      val baseClientLibConfiguration = new KinesisClientLibConfiguration(
+        checkpointAppName,
+        streamName,
+        kinesisProvider,
+        dynamoDBCreds.map(_.provider).getOrElse(kinesisProvider),
+        cloudWatchCreds.map(_.provider).getOrElse(kinesisProvider),
+        workerId)
         .withKinesisEndpoint(endpointUrl)
-        .withInitialPositionInStream(initialPositionInStream)
+        .withInitialPositionInStream(initialPosition.getPosition)
         .withTaskBackoffTimeMillis(500)
         .withRegionName(regionName)
 
+      // Update the Kinesis client lib config with timestamp
+      // if InitialPositionInStream.AT_TIMESTAMP is passed
+      initialPosition match {
+        case ts: AtTimestamp =>
+          baseClientLibConfiguration.withTimestampAtInitialPositionInStream(ts.getTimestamp)
+        case _ => baseClientLibConfiguration
+      }
+    }
+
    /*
     *  RecordProcessorFactory creates impls of IRecordProcessor.
     *  IRecordProcessor adapts the KCL to our Spark KinesisReceiver via the
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
index 1298463bfba1e..c60b9896a3473 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
@@ -73,7 +73,8 @@ object KinesisUtils {
     // Setting scope to override receiver stream's scope of "receiver stream"
     ssc.withNamedScope("kinesis stream") {
       new KinesisInputDStream[T](ssc, streamName, endpointUrl, validateRegion(regionName),
-        initialPositionInStream, kinesisAppName, checkpointInterval, storageLevel,
+        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
+        kinesisAppName, checkpointInterval, storageLevel,
         cleanedHandler, DefaultCredentials, None, None)
     }
   }
@@ -129,7 +130,8 @@ object KinesisUtils {
         awsAccessKeyId = awsAccessKeyId,
         awsSecretKey = awsSecretKey)
       new KinesisInputDStream[T](ssc, streamName, endpointUrl, validateRegion(regionName),
-        initialPositionInStream, kinesisAppName, checkpointInterval, storageLevel,
+        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
+        kinesisAppName, checkpointInterval, storageLevel,
         cleanedHandler, kinesisCredsProvider, None, None)
     }
   }
@@ -162,7 +164,7 @@ object KinesisUtils {
    * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
    * @param stsAssumeRoleArn ARN of IAM role to assume when using STS sessions to read from
    *                         Kinesis stream.
-   * @param stsSessionName Name to uniquely identify STS sessions if multiple princples assume
+   * @param stsSessionName Name to uniquely identify STS sessions if multiple principals assume
    *                       the same role.
    * @param stsExternalId External ID that can be used to validate against the assumed IAM role's
    *                      trust policy.
@@ -198,7 +200,8 @@ object KinesisUtils {
           awsAccessKeyId = awsAccessKeyId,
           awsSecretKey = awsSecretKey))
       new KinesisInputDStream[T](ssc, streamName, endpointUrl, validateRegion(regionName),
-        initialPositionInStream, kinesisAppName, checkpointInterval, storageLevel,
+        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
+        kinesisAppName, checkpointInterval, storageLevel,
         cleanedHandler, kinesisCredsProvider, None, None)
     }
   }
@@ -243,7 +246,8 @@ object KinesisUtils {
     // Setting scope to override receiver stream's scope of "receiver stream"
     ssc.withNamedScope("kinesis stream") {
       new KinesisInputDStream[Array[Byte]](ssc, streamName, endpointUrl, validateRegion(regionName),
-        initialPositionInStream, kinesisAppName, checkpointInterval, storageLevel,
+        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
+        kinesisAppName, checkpointInterval, storageLevel,
         KinesisInputDStream.defaultMessageHandler, DefaultCredentials, None, None)
     }
   }
@@ -293,7 +297,8 @@ object KinesisUtils {
         awsAccessKeyId = awsAccessKeyId,
         awsSecretKey = awsSecretKey)
       new KinesisInputDStream[Array[Byte]](ssc, streamName, endpointUrl, validateRegion(regionName),
-        initialPositionInStream, kinesisAppName, checkpointInterval, storageLevel,
+        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
+        kinesisAppName, checkpointInterval, storageLevel,
         KinesisInputDStream.defaultMessageHandler, kinesisCredsProvider, None, None)
     }
   }
@@ -429,7 +434,7 @@ object KinesisUtils {
    * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
    * @param stsAssumeRoleArn ARN of IAM role to assume when using STS sessions to read from
    *                         Kinesis stream.
-   * @param stsSessionName Name to uniquely identify STS sessions if multiple princples assume
+   * @param stsSessionName Name to uniquely identify STS sessions if multiple princpals assume
    *                       the same role.
    * @param stsExternalId External ID that can be used to validate against the assumed IAM role's
    *                      trust policy.
diff --git a/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java b/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
index be6d549b1e429..03becd73d1a06 100644
--- a/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
+++ b/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
@@ -17,14 +17,13 @@
 
 package org.apache.spark.streaming.kinesis;
 
-import org.junit.Test;
-
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
-
+import org.apache.spark.streaming.kinesis.KinesisInitialPositions.TrimHorizon;
 import org.apache.spark.storage.StorageLevel;
 import org.apache.spark.streaming.Duration;
-import org.apache.spark.streaming.Seconds;
 import org.apache.spark.streaming.LocalJavaStreamingContext;
+import org.apache.spark.streaming.Seconds;
+import org.junit.Test;
 
 public class JavaKinesisInputDStreamBuilderSuite extends LocalJavaStreamingContext {
   /**
@@ -35,7 +34,41 @@ public void testJavaKinesisDStreamBuilder() {
     String streamName = "a-very-nice-stream-name";
     String endpointUrl = "https://kinesis.us-west-2.amazonaws.com";
     String region = "us-west-2";
-    InitialPositionInStream initialPosition = InitialPositionInStream.TRIM_HORIZON;
+    KinesisInitialPosition initialPosition = new TrimHorizon();
+    String appName = "a-very-nice-kinesis-app";
+    Duration checkpointInterval = Seconds.apply(30);
+    StorageLevel storageLevel = StorageLevel.MEMORY_ONLY();
+
+    KinesisInputDStream<byte[]> kinesisDStream = KinesisInputDStream.builder()
+      .streamingContext(ssc)
+      .streamName(streamName)
+      .endpointUrl(endpointUrl)
+      .regionName(region)
+      .initialPosition(initialPosition)
+      .checkpointAppName(appName)
+      .checkpointInterval(checkpointInterval)
+      .storageLevel(storageLevel)
+      .build();
+    assert(kinesisDStream.streamName() == streamName);
+    assert(kinesisDStream.endpointUrl() == endpointUrl);
+    assert(kinesisDStream.regionName() == region);
+    assert(kinesisDStream.initialPosition().getPosition() == initialPosition.getPosition());
+    assert(kinesisDStream.checkpointAppName() == appName);
+    assert(kinesisDStream.checkpointInterval() == checkpointInterval);
+    assert(kinesisDStream._storageLevel() == storageLevel);
+    ssc.stop();
+  }
+
+  /**
+   * Test to ensure that the old API for InitialPositionInStream
+   * is supported in KinesisDStream.Builder.
+   * This test would be removed when we deprecate the KinesisUtils.
+   */
+  @Test
+  public void testJavaKinesisDStreamBuilderOldApi() {
+    String streamName = "a-very-nice-stream-name";
+    String endpointUrl = "https://kinesis.us-west-2.amazonaws.com";
+    String region = "us-west-2";
     String appName = "a-very-nice-kinesis-app";
     Duration checkpointInterval = Seconds.apply(30);
     StorageLevel storageLevel = StorageLevel.MEMORY_ONLY();
@@ -45,7 +78,7 @@ public void testJavaKinesisDStreamBuilder() {
       .streamName(streamName)
       .endpointUrl(endpointUrl)
       .regionName(region)
-      .initialPositionInStream(initialPosition)
+      .initialPositionInStream(InitialPositionInStream.LATEST)
       .checkpointAppName(appName)
       .checkpointInterval(checkpointInterval)
       .storageLevel(storageLevel)
@@ -53,7 +86,7 @@ public void testJavaKinesisDStreamBuilder() {
     assert(kinesisDStream.streamName() == streamName);
     assert(kinesisDStream.endpointUrl() == endpointUrl);
     assert(kinesisDStream.regionName() == region);
-    assert(kinesisDStream.initialPositionInStream() == initialPosition);
+    assert(kinesisDStream.initialPosition().getPosition() == InitialPositionInStream.LATEST);
     assert(kinesisDStream.checkpointAppName() == appName);
     assert(kinesisDStream.checkpointInterval() == checkpointInterval);
     assert(kinesisDStream._storageLevel() == storageLevel);
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
index afa1a7f8ca663..e0e26847aa0ec 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.streaming.kinesis
 
+import java.util.Calendar
+
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.mockito.MockitoSugar
 
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.{Seconds, StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.{Duration, Seconds, StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.kinesis.KinesisInitialPositions.{AtTimestamp, TrimHorizon}
 
 class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterEach
    with MockitoSugar {
@@ -69,7 +72,7 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
     val dstream = builder.build()
     assert(dstream.endpointUrl == DEFAULT_KINESIS_ENDPOINT_URL)
     assert(dstream.regionName == DEFAULT_KINESIS_REGION_NAME)
-    assert(dstream.initialPositionInStream == DEFAULT_INITIAL_POSITION_IN_STREAM)
+    assert(dstream.initialPosition == DEFAULT_INITIAL_POSITION)
     assert(dstream.checkpointInterval == batchDuration)
     assert(dstream._storageLevel == DEFAULT_STORAGE_LEVEL)
     assert(dstream.kinesisCreds == DefaultCredentials)
@@ -80,7 +83,7 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
   test("should propagate custom non-auth values to KinesisInputDStream") {
     val customEndpointUrl = "https://kinesis.us-west-2.amazonaws.com"
     val customRegion = "us-west-2"
-    val customInitialPosition = InitialPositionInStream.TRIM_HORIZON
+    val customInitialPosition = new TrimHorizon()
     val customAppName = "a-very-nice-kinesis-app"
     val customCheckpointInterval = Seconds(30)
     val customStorageLevel = StorageLevel.MEMORY_ONLY
@@ -91,7 +94,7 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
     val dstream = builder
       .endpointUrl(customEndpointUrl)
       .regionName(customRegion)
-      .initialPositionInStream(customInitialPosition)
+      .initialPosition(customInitialPosition)
       .checkpointAppName(customAppName)
       .checkpointInterval(customCheckpointInterval)
       .storageLevel(customStorageLevel)
@@ -101,12 +104,67 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
       .build()
     assert(dstream.endpointUrl == customEndpointUrl)
     assert(dstream.regionName == customRegion)
-    assert(dstream.initialPositionInStream == customInitialPosition)
+    assert(dstream.initialPosition == customInitialPosition)
     assert(dstream.checkpointAppName == customAppName)
     assert(dstream.checkpointInterval == customCheckpointInterval)
     assert(dstream._storageLevel == customStorageLevel)
     assert(dstream.kinesisCreds == customKinesisCreds)
     assert(dstream.dynamoDBCreds == Option(customDynamoDBCreds))
     assert(dstream.cloudWatchCreds == Option(customCloudWatchCreds))
+
+    // Testing with AtTimestamp
+    val cal = Calendar.getInstance()
+    cal.add(Calendar.DATE, -1)
+    val timestamp = cal.getTime()
+    val initialPositionAtTimestamp = new AtTimestamp(timestamp)
+
+    val dstreamAtTimestamp = builder
+      .endpointUrl(customEndpointUrl)
+      .regionName(customRegion)
+      .initialPosition(initialPositionAtTimestamp)
+      .checkpointAppName(customAppName)
+      .checkpointInterval(customCheckpointInterval)
+      .storageLevel(customStorageLevel)
+      .kinesisCredentials(customKinesisCreds)
+      .dynamoDBCredentials(customDynamoDBCreds)
+      .cloudWatchCredentials(customCloudWatchCreds)
+      .build()
+    assert(dstreamAtTimestamp.endpointUrl == customEndpointUrl)
+    assert(dstreamAtTimestamp.regionName == customRegion)
+    assert(dstreamAtTimestamp.initialPosition.getPosition
+      == initialPositionAtTimestamp.getPosition)
+    assert(
+      dstreamAtTimestamp.initialPosition.asInstanceOf[AtTimestamp].getTimestamp.equals(timestamp))
+    assert(dstreamAtTimestamp.checkpointAppName == customAppName)
+    assert(dstreamAtTimestamp.checkpointInterval == customCheckpointInterval)
+    assert(dstreamAtTimestamp._storageLevel == customStorageLevel)
+    assert(dstreamAtTimestamp.kinesisCreds == customKinesisCreds)
+    assert(dstreamAtTimestamp.dynamoDBCreds == Option(customDynamoDBCreds))
+    assert(dstreamAtTimestamp.cloudWatchCreds == Option(customCloudWatchCreds))
+  }
+
+  test("old Api should throw UnsupportedOperationExceptionexception with AT_TIMESTAMP") {
+    val streamName: String = "a-very-nice-stream-name"
+    val endpointUrl: String = "https://kinesis.us-west-2.amazonaws.com"
+    val region: String = "us-west-2"
+    val appName: String = "a-very-nice-kinesis-app"
+    val checkpointInterval: Duration = Seconds.apply(30)
+    val storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY
+
+    // This should not build.
+    // InitialPositionInStream.AT_TIMESTAMP is not supported in old Api.
+    // The builder Api in KinesisInputDStream should be used.
+    intercept[UnsupportedOperationException] {
+      val kinesisDStream: KinesisInputDStream[Array[Byte]] = KinesisInputDStream.builder
+        .streamingContext(ssc)
+        .streamName(streamName)
+        .endpointUrl(endpointUrl)
+        .regionName(region)
+        .initialPositionInStream(InitialPositionInStream.AT_TIMESTAMP)
+        .checkpointAppName(appName)
+        .checkpointInterval(checkpointInterval)
+        .storageLevel(storageLevel)
+        .build
+    }
   }
 }
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
index 7e5bda923f63e..a7a68eba910bf 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
@@ -34,6 +34,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import org.apache.spark.streaming.kinesis.KinesisInitialPositions.Latest
 import org.apache.spark.streaming.kinesis.KinesisReadConfigurations._
 import org.apache.spark.streaming.kinesis.KinesisTestUtils._
 import org.apache.spark.streaming.receiver.BlockManagerBasedStoreResult
@@ -178,7 +179,7 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
       .streamName(testUtils.streamName)
       .endpointUrl(testUtils.endpointUrl)
       .regionName(testUtils.regionName)
-      .initialPositionInStream(InitialPositionInStream.LATEST)
+      .initialPosition(new Latest())
       .checkpointInterval(Seconds(10))
       .storageLevel(StorageLevel.MEMORY_ONLY)
       .build()
@@ -209,7 +210,7 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
       .streamName(testUtils.streamName)
       .endpointUrl(testUtils.endpointUrl)
       .regionName(testUtils.regionName)
-      .initialPositionInStream(InitialPositionInStream.LATEST)
+      .initialPosition(new Latest())
       .checkpointInterval(Seconds(10))
       .storageLevel(StorageLevel.MEMORY_ONLY)
       .buildWithMessageHandler(addFive(_))
@@ -245,7 +246,7 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
       .streamName("dummyStream")
       .endpointUrl(dummyEndpointUrl)
       .regionName(dummyRegionName)
-      .initialPositionInStream(InitialPositionInStream.LATEST)
+      .initialPosition(new Latest())
       .checkpointInterval(Seconds(10))
       .storageLevel(StorageLevel.MEMORY_ONLY)
       .build()
@@ -293,7 +294,7 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
         .streamName(localTestUtils.streamName)
         .endpointUrl(localTestUtils.endpointUrl)
         .regionName(localTestUtils.regionName)
-        .initialPositionInStream(InitialPositionInStream.LATEST)
+        .initialPosition(new Latest())
         .checkpointInterval(Seconds(10))
         .storageLevel(StorageLevel.MEMORY_ONLY)
         .build()
@@ -369,7 +370,7 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
       .streamName(testUtils.streamName)
       .endpointUrl(testUtils.endpointUrl)
       .regionName(testUtils.regionName)
-      .initialPositionInStream(InitialPositionInStream.LATEST)
+      .initialPosition(new Latest())
       .checkpointInterval(Seconds(10))
       .storageLevel(StorageLevel.MEMORY_ONLY)
       .build()
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 36d555066b181..027157e53d511 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index cb30e4a4af4bc..fbe77fcb958d5 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index fd7b7f7c1c487..ebd65e8320e5c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -303,7 +303,7 @@ object PageRank extends Logging {
     val src: VertexId = srcId.getOrElse(-1L)
 
     // Initialize the pagerankGraph with each edge attribute
-    // having weight 1/outDegree and each vertex with attribute 1.0.
+    // having weight 1/outDegree and each vertex with attribute 0.
     val pagerankGraph: Graph[(Double, Double), Double] = graph
       // Associate the degree with each vertex
       .outerJoinVertices(graph.outDegrees) {
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 447da86f019a6..7458fe2fba13e 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index e9b46c4cf0ffa..912eb6b6d2a08 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractAppHandle.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractAppHandle.java
new file mode 100644
index 0000000000000..df1e7316861d4
--- /dev/null
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractAppHandle.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.launcher;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+abstract class AbstractAppHandle implements SparkAppHandle {
+
+  private static final Logger LOG = Logger.getLogger(ChildProcAppHandle.class.getName());
+
+  private final LauncherServer server;
+
+  private LauncherConnection connection;
+  private List<Listener> listeners;
+  private State state;
+  private String appId;
+  private boolean disposed;
+
+  protected AbstractAppHandle(LauncherServer server) {
+    this.server = server;
+    this.state = State.UNKNOWN;
+  }
+
+  @Override
+  public synchronized void addListener(Listener l) {
+    if (listeners == null) {
+      listeners = new ArrayList<>();
+    }
+    listeners.add(l);
+  }
+
+  @Override
+  public State getState() {
+    return state;
+  }
+
+  @Override
+  public String getAppId() {
+    return appId;
+  }
+
+  @Override
+  public void stop() {
+    CommandBuilderUtils.checkState(connection != null, "Application is still not connected.");
+    try {
+      connection.send(new LauncherProtocol.Stop());
+    } catch (IOException ioe) {
+      throw new RuntimeException(ioe);
+    }
+  }
+
+  @Override
+  public synchronized void disconnect() {
+    if (!disposed) {
+      disposed = true;
+      if (connection != null) {
+        try {
+          connection.close();
+        } catch (IOException ioe) {
+          // no-op.
+        }
+      }
+      server.unregister(this);
+    }
+  }
+
+  void setConnection(LauncherConnection connection) {
+    this.connection = connection;
+  }
+
+  LauncherConnection getConnection() {
+    return connection;
+  }
+
+  boolean isDisposed() {
+    return disposed;
+  }
+
+  void setState(State s) {
+    setState(s, false);
+  }
+
+  synchronized void setState(State s, boolean force) {
+    if (force || !state.isFinal()) {
+      state = s;
+      fireEvent(false);
+    } else {
+      LOG.log(Level.WARNING, "Backend requested transition from final state {0} to {1}.",
+        new Object[] { state, s });
+    }
+  }
+
+  synchronized void setAppId(String appId) {
+    this.appId = appId;
+    fireEvent(true);
+  }
+
+  private void fireEvent(boolean isInfoChanged) {
+    if (listeners != null) {
+      for (Listener l : listeners) {
+        if (isInfoChanged) {
+          l.infoChanged(this);
+        } else {
+          l.stateChanged(this);
+        }
+      }
+    }
+  }
+
+}
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java
new file mode 100644
index 0000000000000..44e69fc45dffa
--- /dev/null
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java
@@ -0,0 +1,307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.launcher;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.apache.spark.launcher.CommandBuilderUtils.*;
+
+/**
+ * Base class for launcher implementations.
+ *
+ * @since Spark 2.3.0
+ */
+public abstract class AbstractLauncher<T extends AbstractLauncher> {
+
+  final SparkSubmitCommandBuilder builder;
+
+  AbstractLauncher() {
+    this.builder = new SparkSubmitCommandBuilder();
+  }
+
+  /**
+   * Set a custom properties file with Spark configuration for the application.
+   *
+   * @param path Path to custom properties file to use.
+   * @return This launcher.
+   */
+  public T setPropertiesFile(String path) {
+    checkNotNull(path, "path");
+    builder.setPropertiesFile(path);
+    return self();
+  }
+
+  /**
+   * Set a single configuration value for the application.
+   *
+   * @param key Configuration key.
+   * @param value The value to use.
+   * @return This launcher.
+   */
+  public T setConf(String key, String value) {
+    checkNotNull(key, "key");
+    checkNotNull(value, "value");
+    checkArgument(key.startsWith("spark."), "'key' must start with 'spark.'");
+    builder.conf.put(key, value);
+    return self();
+  }
+
+  /**
+   * Set the application name.
+   *
+   * @param appName Application name.
+   * @return This launcher.
+   */
+  public T setAppName(String appName) {
+    checkNotNull(appName, "appName");
+    builder.appName = appName;
+    return self();
+  }
+
+  /**
+   * Set the Spark master for the application.
+   *
+   * @param master Spark master.
+   * @return This launcher.
+   */
+  public T setMaster(String master) {
+    checkNotNull(master, "master");
+    builder.master = master;
+    return self();
+  }
+
+  /**
+   * Set the deploy mode for the application.
+   *
+   * @param mode Deploy mode.
+   * @return This launcher.
+   */
+  public T setDeployMode(String mode) {
+    checkNotNull(mode, "mode");
+    builder.deployMode = mode;
+    return self();
+  }
+
+  /**
+   * Set the main application resource. This should be the location of a jar file for Scala/Java
+   * applications, or a python script for PySpark applications.
+   *
+   * @param resource Path to the main application resource.
+   * @return This launcher.
+   */
+  public T setAppResource(String resource) {
+    checkNotNull(resource, "resource");
+    builder.appResource = resource;
+    return self();
+  }
+
+  /**
+   * Sets the application class name for Java/Scala applications.
+   *
+   * @param mainClass Application's main class.
+   * @return This launcher.
+   */
+  public T setMainClass(String mainClass) {
+    checkNotNull(mainClass, "mainClass");
+    builder.mainClass = mainClass;
+    return self();
+  }
+
+  /**
+   * Adds a no-value argument to the Spark invocation. If the argument is known, this method
+   * validates whether the argument is indeed a no-value argument, and throws an exception
+   * otherwise.
+   * <p>
+   * Use this method with caution. It is possible to create an invalid Spark command by passing
+   * unknown arguments to this method, since those are allowed for forward compatibility.
+   *
+   * @since 1.5.0
+   * @param arg Argument to add.
+   * @return This launcher.
+   */
+  public T addSparkArg(String arg) {
+    SparkSubmitOptionParser validator = new ArgumentValidator(false);
+    validator.parse(Arrays.asList(arg));
+    builder.sparkArgs.add(arg);
+    return self();
+  }
+
+  /**
+   * Adds an argument with a value to the Spark invocation. If the argument name corresponds to
+   * a known argument, the code validates that the argument actually expects a value, and throws
+   * an exception otherwise.
+   * <p>
+   * It is safe to add arguments modified by other methods in this class (such as
+   * {@link #setMaster(String)} - the last invocation will be the one to take effect.
+   * <p>
+   * Use this method with caution. It is possible to create an invalid Spark command by passing
+   * unknown arguments to this method, since those are allowed for forward compatibility.
+   *
+   * @since 1.5.0
+   * @param name Name of argument to add.
+   * @param value Value of the argument.
+   * @return This launcher.
+   */
+  public T addSparkArg(String name, String value) {
+    SparkSubmitOptionParser validator = new ArgumentValidator(true);
+    if (validator.MASTER.equals(name)) {
+      setMaster(value);
+    } else if (validator.PROPERTIES_FILE.equals(name)) {
+      setPropertiesFile(value);
+    } else if (validator.CONF.equals(name)) {
+      String[] vals = value.split("=", 2);
+      setConf(vals[0], vals[1]);
+    } else if (validator.CLASS.equals(name)) {
+      setMainClass(value);
+    } else if (validator.JARS.equals(name)) {
+      builder.jars.clear();
+      for (String jar : value.split(",")) {
+        addJar(jar);
+      }
+    } else if (validator.FILES.equals(name)) {
+      builder.files.clear();
+      for (String file : value.split(",")) {
+        addFile(file);
+      }
+    } else if (validator.PY_FILES.equals(name)) {
+      builder.pyFiles.clear();
+      for (String file : value.split(",")) {
+        addPyFile(file);
+      }
+    } else {
+      validator.parse(Arrays.asList(name, value));
+      builder.sparkArgs.add(name);
+      builder.sparkArgs.add(value);
+    }
+    return self();
+  }
+
+  /**
+   * Adds command line arguments for the application.
+   *
+   * @param args Arguments to pass to the application's main class.
+   * @return This launcher.
+   */
+  public T addAppArgs(String... args) {
+    for (String arg : args) {
+      checkNotNull(arg, "arg");
+      builder.appArgs.add(arg);
+    }
+    return self();
+  }
+
+  /**
+   * Adds a jar file to be submitted with the application.
+   *
+   * @param jar Path to the jar file.
+   * @return This launcher.
+   */
+  public T addJar(String jar) {
+    checkNotNull(jar, "jar");
+    builder.jars.add(jar);
+    return self();
+  }
+
+  /**
+   * Adds a file to be submitted with the application.
+   *
+   * @param file Path to the file.
+   * @return This launcher.
+   */
+  public T addFile(String file) {
+    checkNotNull(file, "file");
+    builder.files.add(file);
+    return self();
+  }
+
+  /**
+   * Adds a python file / zip / egg to be submitted with the application.
+   *
+   * @param file Path to the file.
+   * @return This launcher.
+   */
+  public T addPyFile(String file) {
+    checkNotNull(file, "file");
+    builder.pyFiles.add(file);
+    return self();
+  }
+
+  /**
+   * Enables verbose reporting for SparkSubmit.
+   *
+   * @param verbose Whether to enable verbose output.
+   * @return This launcher.
+   */
+  public T setVerbose(boolean verbose) {
+    builder.verbose = verbose;
+    return self();
+  }
+
+  /**
+   * Starts a Spark application.
+   *
+   * <p>
+   * This method returns a handle that provides information about the running application and can
+   * be used to do basic interaction with it.
+   * <p>
+   * The returned handle assumes that the application will instantiate a single SparkContext
+   * during its lifetime. Once that context reports a final state (one that indicates the
+   * SparkContext has stopped), the handle will not perform new state transitions, so anything
+   * that happens after that cannot be monitored. If the underlying application is launched as
+   * a child process, {@link SparkAppHandle#kill()} can still be used to kill the child process.
+   *
+   * @since 1.6.0
+   * @param listeners Listeners to add to the handle before the app is launched.
+   * @return A handle for the launched application.
+   */
+  public abstract SparkAppHandle startApplication(SparkAppHandle.Listener... listeners)
+    throws IOException;
+
+  abstract T self();
+
+  private static class ArgumentValidator extends SparkSubmitOptionParser {
+
+    private final boolean hasValue;
+
+    ArgumentValidator(boolean hasValue) {
+      this.hasValue = hasValue;
+    }
+
+    @Override
+    protected boolean handle(String opt, String value) {
+      if (value == null && hasValue) {
+        throw new IllegalArgumentException(String.format("'%s' expects a value.", opt));
+      }
+      return true;
+    }
+
+    @Override
+    protected boolean handleUnknown(String opt) {
+      // Do not fail on unknown arguments, to support future arguments added to SparkSubmit.
+      return true;
+    }
+
+    protected void handleExtraArgs(List<String> extra) {
+      // No op.
+    }
+
+  }
+
+}
diff --git a/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java b/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java
index 5391d4a50fe47..8b3f427b7750e 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java
@@ -17,77 +17,29 @@
 
 package org.apache.spark.launcher;
 
-import java.io.IOException;
 import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
 /**
  * Handle implementation for monitoring apps started as a child process.
  */
-class ChildProcAppHandle implements SparkAppHandle {
+class ChildProcAppHandle extends AbstractAppHandle {
 
   private static final Logger LOG = Logger.getLogger(ChildProcAppHandle.class.getName());
 
-  private final String secret;
-  private final LauncherServer server;
-
   private volatile Process childProc;
-  private boolean disposed;
-  private LauncherConnection connection;
-  private List<Listener> listeners;
-  private State state;
-  private String appId;
   private OutputRedirector redirector;
 
-  ChildProcAppHandle(String secret, LauncherServer server) {
-    this.secret = secret;
-    this.server = server;
-    this.state = State.UNKNOWN;
-  }
-
-  @Override
-  public synchronized void addListener(Listener l) {
-    if (listeners == null) {
-      listeners = new ArrayList<>();
-    }
-    listeners.add(l);
-  }
-
-  @Override
-  public State getState() {
-    return state;
-  }
-
-  @Override
-  public String getAppId() {
-    return appId;
-  }
-
-  @Override
-  public void stop() {
-    CommandBuilderUtils.checkState(connection != null, "Application is still not connected.");
-    try {
-      connection.send(new LauncherProtocol.Stop());
-    } catch (IOException ioe) {
-      throw new RuntimeException(ioe);
-    }
+  ChildProcAppHandle(LauncherServer server) {
+    super(server);
   }
 
   @Override
   public synchronized void disconnect() {
-    if (!disposed) {
-      disposed = true;
-      if (connection != null) {
-        try {
-          connection.close();
-        } catch (IOException ioe) {
-          // no-op.
-        }
-      }
-      server.unregister(this);
+    try {
+      super.disconnect();
+    } finally {
       if (redirector != null) {
         redirector.stop();
       }
@@ -106,10 +58,6 @@ public synchronized void kill() {
     setState(State.KILLED);
   }
 
-  String getSecret() {
-    return secret;
-  }
-
   void setChildProc(Process childProc, String loggerName, InputStream logStream) {
     this.childProc = childProc;
     if (logStream != null) {
@@ -118,41 +66,12 @@ void setChildProc(Process childProc, String loggerName, InputStream logStream) {
     } else {
       // If there is no log redirection, spawn a thread that will wait for the child process
       // to finish.
-      Thread waiter = SparkLauncher.REDIRECTOR_FACTORY.newThread(this::monitorChild);
-      waiter.setDaemon(true);
-      waiter.start();
+      SparkLauncher.REDIRECTOR_FACTORY.newThread(this::monitorChild).start();
     }
   }
 
-  void setConnection(LauncherConnection connection) {
-    this.connection = connection;
-  }
-
-  LauncherServer getServer() {
-    return server;
-  }
-
-  LauncherConnection getConnection() {
-    return connection;
-  }
-
-  synchronized void setState(State s) {
-    if (!state.isFinal()) {
-      state = s;
-      fireEvent(false);
-    } else {
-      LOG.log(Level.WARNING, "Backend requested transition from final state {0} to {1}.",
-        new Object[] { state, s });
-    }
-  }
-
-  synchronized void setAppId(String appId) {
-    this.appId = appId;
-    fireEvent(true);
-  }
-
   /**
-   * Wait for the child process to exit and update the handle's state if necessary, accoding to
+   * Wait for the child process to exit and update the handle's state if necessary, according to
    * the exit code.
    */
   void monitorChild() {
@@ -171,7 +90,7 @@ void monitorChild() {
     }
 
     synchronized (this) {
-      if (disposed) {
+      if (isDisposed()) {
         return;
       }
 
@@ -185,31 +104,19 @@ void monitorChild() {
         ec = 1;
       }
 
+      State currState = getState();
       State newState = null;
       if (ec != 0) {
         // Override state with failure if the current state is not final, or is success.
-        if (!state.isFinal() || state == State.FINISHED) {
+        if (!currState.isFinal() || currState == State.FINISHED) {
           newState = State.FAILED;
         }
-      } else if (!state.isFinal()) {
+      } else if (!currState.isFinal()) {
         newState = State.LOST;
       }
 
       if (newState != null) {
-        state = newState;
-        fireEvent(false);
-      }
-    }
-  }
-
-  private void fireEvent(boolean isInfoChanged) {
-    if (listeners != null) {
-      for (Listener l : listeners) {
-        if (isInfoChanged) {
-          l.infoChanged(this);
-        } else {
-          l.stateChanged(this);
-        }
+        setState(newState, true);
       }
     }
   }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/InProcessAppHandle.java b/launcher/src/main/java/org/apache/spark/launcher/InProcessAppHandle.java
new file mode 100644
index 0000000000000..acd64c962604f
--- /dev/null
+++ b/launcher/src/main/java/org/apache/spark/launcher/InProcessAppHandle.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.launcher;
+
+import java.lang.reflect.Method;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+class InProcessAppHandle extends AbstractAppHandle {
+
+  private static final String THREAD_NAME_FMT = "spark-app-%d: '%s'";
+  private static final Logger LOG = Logger.getLogger(ChildProcAppHandle.class.getName());
+  private static final AtomicLong THREAD_IDS = new AtomicLong();
+
+  // Avoid really long thread names.
+  private static final int MAX_APP_NAME_LEN = 16;
+
+  private Thread app;
+
+  InProcessAppHandle(LauncherServer server) {
+    super(server);
+  }
+
+  @Override
+  public synchronized void kill() {
+    LOG.warning("kill() may leave the underlying app running in in-process mode.");
+    disconnect();
+
+    // Interrupt the thread. This is not guaranteed to kill the app, though.
+    if (app != null) {
+      app.interrupt();
+    }
+
+    setState(State.KILLED);
+  }
+
+  synchronized void start(String appName, Method main, String[] args) {
+    CommandBuilderUtils.checkState(app == null, "Handle already started.");
+
+    if (appName.length() > MAX_APP_NAME_LEN) {
+      appName = "..." + appName.substring(appName.length() - MAX_APP_NAME_LEN);
+    }
+
+    app = new Thread(() -> {
+      try {
+        main.invoke(null, (Object) args);
+      } catch (Throwable t) {
+        LOG.log(Level.WARNING, "Application failed with exception.", t);
+        setState(State.FAILED);
+      }
+
+      synchronized (InProcessAppHandle.this) {
+        if (!isDisposed()) {
+          disconnect();
+          if (!getState().isFinal()) {
+            setState(State.LOST, true);
+          }
+        }
+      }
+    });
+
+    app.setName(String.format(THREAD_NAME_FMT, THREAD_IDS.incrementAndGet(), appName));
+    app.start();
+  }
+
+}
diff --git a/launcher/src/main/java/org/apache/spark/launcher/InProcessLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/InProcessLauncher.java
new file mode 100644
index 0000000000000..6d726b4a69a86
--- /dev/null
+++ b/launcher/src/main/java/org/apache/spark/launcher/InProcessLauncher.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.launcher;
+
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ * In-process launcher for Spark applications.
+ * <p>
+ * Use this class to start Spark applications programmatically. Applications launched using this
+ * class will run in the same process as the caller.
+ * <p>
+ * Because Spark only supports a single active instance of <code>SparkContext</code> per JVM, code
+ * that uses this class should be careful about which applications are launched. It's recommended
+ * that this launcher only be used to launch applications in cluster mode.
+ * <p>
+ * Also note that, when running applications in client mode, JVM-related configurations (like
+ * driver memory or configs which modify the driver's class path) do not take effect. Logging
+ * configuration is also inherited from the parent application.
+ *
+ * @since Spark 2.3.0
+ */
+public class InProcessLauncher extends AbstractLauncher<InProcessLauncher> {
+
+  private static final Logger LOG = Logger.getLogger(InProcessLauncher.class.getName());
+
+  /**
+   * Starts a Spark application.
+   *
+   * @see AbstractLauncher#startApplication(SparkAppHandle.Listener...)
+   * @param listeners Listeners to add to the handle before the app is launched.
+   * @return A handle for the launched application.
+   */
+  @Override
+  public SparkAppHandle startApplication(SparkAppHandle.Listener... listeners) throws IOException {
+    if (builder.isClientMode(builder.getEffectiveConfig())) {
+      LOG.warning("It's not recommended to run client-mode applications using InProcessLauncher.");
+    }
+
+    Method main = findSparkSubmit();
+    LauncherServer server = LauncherServer.getOrCreateServer();
+    InProcessAppHandle handle = new InProcessAppHandle(server);
+    for (SparkAppHandle.Listener l : listeners) {
+      handle.addListener(l);
+    }
+
+    String secret = server.registerHandle(handle);
+    setConf(LauncherProtocol.CONF_LAUNCHER_PORT, String.valueOf(server.getPort()));
+    setConf(LauncherProtocol.CONF_LAUNCHER_SECRET, secret);
+
+    List<String> sparkArgs = builder.buildSparkSubmitArgs();
+    String[] argv = sparkArgs.toArray(new String[sparkArgs.size()]);
+
+    String appName = CommandBuilderUtils.firstNonEmpty(builder.appName, builder.mainClass,
+      "<unknown>");
+    handle.start(appName, main, argv);
+    return handle;
+  }
+
+  @Override
+  InProcessLauncher self() {
+    return this;
+  }
+
+  // Visible for testing.
+  Method findSparkSubmit() throws IOException {
+    ClassLoader cl = Thread.currentThread().getContextClassLoader();
+    if (cl == null) {
+      cl = getClass().getClassLoader();
+    }
+
+    Class<?> sparkSubmit;
+    try {
+      sparkSubmit = cl.loadClass("org.apache.spark.deploy.SparkSubmit");
+    } catch (Exception e) {
+      throw new IOException("Cannot find SparkSubmit; make sure necessary jars are available.", e);
+    }
+
+    Method main;
+    try {
+      main = sparkSubmit.getMethod("main", String[].class);
+    } catch (Exception e) {
+      throw new IOException("Cannot find SparkSubmit main method.", e);
+    }
+
+    CommandBuilderUtils.checkState(Modifier.isStatic(main.getModifiers()),
+      "main method is not static.");
+    return main;
+  }
+
+}
diff --git a/launcher/src/main/java/org/apache/spark/launcher/LauncherProtocol.java b/launcher/src/main/java/org/apache/spark/launcher/LauncherProtocol.java
index 042f11cd9e434..e1eadb9d9355b 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/LauncherProtocol.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/LauncherProtocol.java
@@ -32,6 +32,12 @@ final class LauncherProtocol {
   /** Environment variable where the secret for connecting back to the server is stored. */
   static final String ENV_LAUNCHER_SECRET = "_SPARK_LAUNCHER_SECRET";
 
+  /** Spark conf key used to propagate the server port for in-process launches. */
+  static final String CONF_LAUNCHER_PORT = "spark.launcher.port";
+
+  /** Spark conf key used to propagate the app secret for in-process launches. */
+  static final String CONF_LAUNCHER_SECRET = "spark.launcher.secret";
+
   static class Message implements Serializable {
 
   }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java b/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java
index 4353e3f263c51..b8999a1d7a4f4 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java
@@ -26,6 +26,7 @@
 import java.security.SecureRandom;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.Timer;
 import java.util.TimerTask;
 import java.util.concurrent.ConcurrentHashMap;
@@ -88,31 +89,25 @@ class LauncherServer implements Closeable {
 
   private static volatile LauncherServer serverInstance;
 
-  /**
-   * Creates a handle for an app to be launched. This method will start a server if one hasn't been
-   * started yet. The server is shared for multiple handles, and once all handles are disposed of,
-   * the server is shut down.
-   */
-  static synchronized ChildProcAppHandle newAppHandle() throws IOException {
-    LauncherServer server = serverInstance != null ? serverInstance : new LauncherServer();
+  static synchronized LauncherServer getOrCreateServer() throws IOException {
+    LauncherServer server;
+    do {
+      server = serverInstance != null ? serverInstance : new LauncherServer();
+    } while (!server.running);
+
     server.ref();
     serverInstance = server;
-
-    String secret = server.createSecret();
-    while (server.pending.containsKey(secret)) {
-      secret = server.createSecret();
-    }
-
-    return server.newAppHandle(secret);
+    return server;
   }
 
-  static LauncherServer getServerInstance() {
+  // For testing.
+  static synchronized LauncherServer getServer() {
     return serverInstance;
   }
 
   private final AtomicLong refCount;
   private final AtomicLong threadIds;
-  private final ConcurrentMap<String, ChildProcAppHandle> pending;
+  private final ConcurrentMap<String, AbstractAppHandle> secretToPendingApps;
   private final List<ServerConnection> clients;
   private final ServerSocket server;
   private final Thread serverThread;
@@ -132,7 +127,7 @@ private LauncherServer() throws IOException {
       this.clients = new ArrayList<>();
       this.threadIds = new AtomicLong();
       this.factory = new NamedThreadFactory(THREAD_NAME_FMT);
-      this.pending = new ConcurrentHashMap<>();
+      this.secretToPendingApps = new ConcurrentHashMap<>();
       this.timeoutTimer = new Timer("LauncherServer-TimeoutTimer", true);
       this.server = server;
       this.running = true;
@@ -149,32 +144,38 @@ private LauncherServer() throws IOException {
   }
 
   /**
-   * Creates a new app handle. The handle will wait for an incoming connection for a configurable
-   * amount of time, and if one doesn't arrive, it will transition to an error state.
+   * Registers a handle with the server, and returns the secret the child app needs to connect
+   * back.
    */
-  ChildProcAppHandle newAppHandle(String secret) {
-    ChildProcAppHandle handle = new ChildProcAppHandle(secret, this);
-    ChildProcAppHandle existing = pending.putIfAbsent(secret, handle);
-    CommandBuilderUtils.checkState(existing == null, "Multiple handles with the same secret.");
-    return handle;
+  synchronized String registerHandle(AbstractAppHandle handle) {
+    String secret = createSecret();
+    secretToPendingApps.put(secret, handle);
+    return secret;
   }
 
   @Override
   public void close() throws IOException {
     synchronized (this) {
-      if (running) {
-        running = false;
-        timeoutTimer.cancel();
-        server.close();
-        synchronized (clients) {
-          List<ServerConnection> copy = new ArrayList<>(clients);
-          clients.clear();
-          for (ServerConnection client : copy) {
-            client.close();
-          }
-        }
+      if (!running) {
+        return;
+      }
+      running = false;
+    }
+
+    synchronized(LauncherServer.class) {
+      serverInstance = null;
+    }
+
+    timeoutTimer.cancel();
+    server.close();
+    synchronized (clients) {
+      List<ServerConnection> copy = new ArrayList<>(clients);
+      clients.clear();
+      for (ServerConnection client : copy) {
+        client.close();
       }
     }
+
     if (serverThread != null) {
       try {
         serverThread.join();
@@ -195,8 +196,6 @@ void unref() {
           close();
         } catch (IOException ioe) {
           // no-op.
-        } finally {
-          serverInstance = null;
         }
       }
     }
@@ -210,8 +209,14 @@ int getPort() {
    * Removes the client handle from the pending list (in case it's still there), and unrefs
    * the server.
    */
-  void unregister(ChildProcAppHandle handle) {
-    pending.remove(handle.getSecret());
+  void unregister(AbstractAppHandle handle) {
+    for (Map.Entry<String, AbstractAppHandle> e : secretToPendingApps.entrySet()) {
+      if (e.getValue().equals(handle)) {
+        String secret = e.getKey();
+        secretToPendingApps.remove(secret);
+        break;
+      }
+    }
     unref();
   }
 
@@ -260,24 +265,30 @@ private long getConnectionTimeout() {
   }
 
   private String createSecret() {
-    byte[] secret = new byte[128];
-    RND.nextBytes(secret);
-
-    StringBuilder sb = new StringBuilder();
-    for (byte b : secret) {
-      int ival = b >= 0 ? b : Byte.MAX_VALUE - b;
-      if (ival < 0x10) {
-        sb.append("0");
+    while (true) {
+      byte[] secret = new byte[128];
+      RND.nextBytes(secret);
+
+      StringBuilder sb = new StringBuilder();
+      for (byte b : secret) {
+        int ival = b >= 0 ? b : Byte.MAX_VALUE - b;
+        if (ival < 0x10) {
+          sb.append("0");
+        }
+        sb.append(Integer.toHexString(ival));
+      }
+
+      String secretStr = sb.toString();
+      if (!secretToPendingApps.containsKey(secretStr)) {
+        return secretStr;
       }
-      sb.append(Integer.toHexString(ival));
     }
-    return sb.toString();
   }
 
   private class ServerConnection extends LauncherConnection {
 
     private TimerTask timeout;
-    private ChildProcAppHandle handle;
+    private AbstractAppHandle handle;
 
     ServerConnection(Socket socket, TimerTask timeout) throws IOException {
       super(socket);
@@ -291,7 +302,7 @@ protected void handle(Message msg) throws IOException {
           timeout.cancel();
           timeout = null;
           Hello hello = (Hello) msg;
-          ChildProcAppHandle handle = pending.remove(hello.secret);
+          AbstractAppHandle handle = secretToPendingApps.remove(hello.secret);
           if (handle != null) {
             handle.setConnection(this);
             handle.setState(SparkAppHandle.State.CONNECTED);
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
index 75b8ef5ca5ef4..f86d40015bd22 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
@@ -21,7 +21,6 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -37,7 +36,7 @@
  * to allow clients to configure the Spark application and launch it as a child process.
  * </p>
  */
-public class SparkLauncher {
+public class SparkLauncher extends AbstractLauncher<SparkLauncher> {
 
   /** The Spark master. */
   public static final String SPARK_MASTER = "spark.master";
@@ -109,7 +108,6 @@ public static void setConfig(String name, String value) {
   }
 
   // Visible for testing.
-  final SparkSubmitCommandBuilder builder;
   File workingDir;
   boolean redirectErrorStream;
   ProcessBuilder.Redirect errorStream;
@@ -125,7 +123,6 @@ public SparkLauncher() {
    * @param env Environment variables to set.
    */
   public SparkLauncher(Map<String, String> env) {
-    this.builder = new SparkSubmitCommandBuilder();
     if (env != null) {
       this.builder.childEnv.putAll(env);
     }
@@ -156,297 +153,152 @@ public SparkLauncher setSparkHome(String sparkHome) {
   }
 
   /**
-   * Set a custom properties file with Spark configuration for the application.
+   * Sets the working directory of spark-submit.
    *
-   * @param path Path to custom properties file to use.
+   * @param dir The directory to set as spark-submit's working directory.
    * @return This launcher.
    */
-  public SparkLauncher setPropertiesFile(String path) {
-    checkNotNull(path, "path");
-    builder.setPropertiesFile(path);
+  public SparkLauncher directory(File dir) {
+    workingDir = dir;
     return this;
   }
 
   /**
-   * Set a single configuration value for the application.
+   * Specifies that stderr in spark-submit should be redirected to stdout.
    *
-   * @param key Configuration key.
-   * @param value The value to use.
    * @return This launcher.
    */
-  public SparkLauncher setConf(String key, String value) {
-    checkNotNull(key, "key");
-    checkNotNull(value, "value");
-    checkArgument(key.startsWith("spark."), "'key' must start with 'spark.'");
-    builder.conf.put(key, value);
+  public SparkLauncher redirectError() {
+    redirectErrorStream = true;
     return this;
   }
 
   /**
-   * Set the application name.
+   * Redirects error output to the specified Redirect.
    *
-   * @param appName Application name.
+   * @param to The method of redirection.
    * @return This launcher.
    */
-  public SparkLauncher setAppName(String appName) {
-    checkNotNull(appName, "appName");
-    builder.appName = appName;
+  public SparkLauncher redirectError(ProcessBuilder.Redirect to) {
+    errorStream = to;
     return this;
   }
 
   /**
-   * Set the Spark master for the application.
+   * Redirects standard output to the specified Redirect.
    *
-   * @param master Spark master.
+   * @param to The method of redirection.
    * @return This launcher.
    */
-  public SparkLauncher setMaster(String master) {
-    checkNotNull(master, "master");
-    builder.master = master;
+  public SparkLauncher redirectOutput(ProcessBuilder.Redirect to) {
+    outputStream = to;
     return this;
   }
 
   /**
-   * Set the deploy mode for the application.
+   * Redirects error output to the specified File.
    *
-   * @param mode Deploy mode.
+   * @param errFile The file to which stderr is written.
    * @return This launcher.
    */
-  public SparkLauncher setDeployMode(String mode) {
-    checkNotNull(mode, "mode");
-    builder.deployMode = mode;
+  public SparkLauncher redirectError(File errFile) {
+    errorStream = ProcessBuilder.Redirect.to(errFile);
     return this;
   }
 
   /**
-   * Set the main application resource. This should be the location of a jar file for Scala/Java
-   * applications, or a python script for PySpark applications.
+   * Redirects error output to the specified File.
    *
-   * @param resource Path to the main application resource.
+   * @param outFile The file to which stdout is written.
    * @return This launcher.
    */
-  public SparkLauncher setAppResource(String resource) {
-    checkNotNull(resource, "resource");
-    builder.appResource = resource;
+  public SparkLauncher redirectOutput(File outFile) {
+    outputStream = ProcessBuilder.Redirect.to(outFile);
     return this;
   }
 
   /**
-   * Sets the application class name for Java/Scala applications.
+   * Sets all output to be logged and redirected to a logger with the specified name.
    *
-   * @param mainClass Application's main class.
+   * @param loggerName The name of the logger to log stdout and stderr.
    * @return This launcher.
    */
-  public SparkLauncher setMainClass(String mainClass) {
-    checkNotNull(mainClass, "mainClass");
-    builder.mainClass = mainClass;
+  public SparkLauncher redirectToLog(String loggerName) {
+    setConf(CHILD_PROCESS_LOGGER_NAME, loggerName);
     return this;
   }
 
-  /**
-   * Adds a no-value argument to the Spark invocation. If the argument is known, this method
-   * validates whether the argument is indeed a no-value argument, and throws an exception
-   * otherwise.
-   * <p>
-   * Use this method with caution. It is possible to create an invalid Spark command by passing
-   * unknown arguments to this method, since those are allowed for forward compatibility.
-   *
-   * @since 1.5.0
-   * @param arg Argument to add.
-   * @return This launcher.
-   */
-  public SparkLauncher addSparkArg(String arg) {
-    SparkSubmitOptionParser validator = new ArgumentValidator(false);
-    validator.parse(Arrays.asList(arg));
-    builder.sparkArgs.add(arg);
-    return this;
+  // The following methods just delegate to the parent class, but they are needed to keep
+  // binary compatibility with previous versions of this class.
+
+  @Override
+  public SparkLauncher setPropertiesFile(String path) {
+    return super.setPropertiesFile(path);
   }
 
-  /**
-   * Adds an argument with a value to the Spark invocation. If the argument name corresponds to
-   * a known argument, the code validates that the argument actually expects a value, and throws
-   * an exception otherwise.
-   * <p>
-   * It is safe to add arguments modified by other methods in this class (such as
-   * {@link #setMaster(String)} - the last invocation will be the one to take effect.
-   * <p>
-   * Use this method with caution. It is possible to create an invalid Spark command by passing
-   * unknown arguments to this method, since those are allowed for forward compatibility.
-   *
-   * @since 1.5.0
-   * @param name Name of argument to add.
-   * @param value Value of the argument.
-   * @return This launcher.
-   */
-  public SparkLauncher addSparkArg(String name, String value) {
-    SparkSubmitOptionParser validator = new ArgumentValidator(true);
-    if (validator.MASTER.equals(name)) {
-      setMaster(value);
-    } else if (validator.PROPERTIES_FILE.equals(name)) {
-      setPropertiesFile(value);
-    } else if (validator.CONF.equals(name)) {
-      String[] vals = value.split("=", 2);
-      setConf(vals[0], vals[1]);
-    } else if (validator.CLASS.equals(name)) {
-      setMainClass(value);
-    } else if (validator.JARS.equals(name)) {
-      builder.jars.clear();
-      for (String jar : value.split(",")) {
-        addJar(jar);
-      }
-    } else if (validator.FILES.equals(name)) {
-      builder.files.clear();
-      for (String file : value.split(",")) {
-        addFile(file);
-      }
-    } else if (validator.PY_FILES.equals(name)) {
-      builder.pyFiles.clear();
-      for (String file : value.split(",")) {
-        addPyFile(file);
-      }
-    } else {
-      validator.parse(Arrays.asList(name, value));
-      builder.sparkArgs.add(name);
-      builder.sparkArgs.add(value);
-    }
-    return this;
+  @Override
+  public SparkLauncher setConf(String key, String value) {
+    return super.setConf(key, value);
   }
 
-  /**
-   * Adds command line arguments for the application.
-   *
-   * @param args Arguments to pass to the application's main class.
-   * @return This launcher.
-   */
-  public SparkLauncher addAppArgs(String... args) {
-    for (String arg : args) {
-      checkNotNull(arg, "arg");
-      builder.appArgs.add(arg);
-    }
-    return this;
+  @Override
+  public SparkLauncher setAppName(String appName) {
+    return super.setAppName(appName);
   }
 
-  /**
-   * Adds a jar file to be submitted with the application.
-   *
-   * @param jar Path to the jar file.
-   * @return This launcher.
-   */
-  public SparkLauncher addJar(String jar) {
-    checkNotNull(jar, "jar");
-    builder.jars.add(jar);
-    return this;
+  @Override
+  public SparkLauncher setMaster(String master) {
+    return super.setMaster(master);
   }
 
-  /**
-   * Adds a file to be submitted with the application.
-   *
-   * @param file Path to the file.
-   * @return This launcher.
-   */
-  public SparkLauncher addFile(String file) {
-    checkNotNull(file, "file");
-    builder.files.add(file);
-    return this;
+  @Override
+  public SparkLauncher setDeployMode(String mode) {
+    return super.setDeployMode(mode);
   }
 
-  /**
-   * Adds a python file / zip / egg to be submitted with the application.
-   *
-   * @param file Path to the file.
-   * @return This launcher.
-   */
-  public SparkLauncher addPyFile(String file) {
-    checkNotNull(file, "file");
-    builder.pyFiles.add(file);
-    return this;
+  @Override
+  public SparkLauncher setAppResource(String resource) {
+    return super.setAppResource(resource);
   }
 
-  /**
-   * Enables verbose reporting for SparkSubmit.
-   *
-   * @param verbose Whether to enable verbose output.
-   * @return This launcher.
-   */
-  public SparkLauncher setVerbose(boolean verbose) {
-    builder.verbose = verbose;
-    return this;
+  @Override
+  public SparkLauncher setMainClass(String mainClass) {
+    return super.setMainClass(mainClass);
   }
 
-  /**
-   * Sets the working directory of spark-submit.
-   *
-   * @param dir The directory to set as spark-submit's working directory.
-   * @return This launcher.
-   */
-  public SparkLauncher directory(File dir) {
-    workingDir = dir;
-    return this;
+  @Override
+  public SparkLauncher addSparkArg(String arg) {
+    return super.addSparkArg(arg);
   }
 
-  /**
-   * Specifies that stderr in spark-submit should be redirected to stdout.
-   *
-   * @return This launcher.
-   */
-  public SparkLauncher redirectError() {
-    redirectErrorStream = true;
-    return this;
+  @Override
+  public SparkLauncher addSparkArg(String name, String value) {
+    return super.addSparkArg(name, value);
   }
 
-  /**
-   * Redirects error output to the specified Redirect.
-   *
-   * @param to The method of redirection.
-   * @return This launcher.
-   */
-  public SparkLauncher redirectError(ProcessBuilder.Redirect to) {
-    errorStream = to;
-    return this;
+  @Override
+  public SparkLauncher addAppArgs(String... args) {
+    return super.addAppArgs(args);
   }
 
-  /**
-   * Redirects standard output to the specified Redirect.
-   *
-   * @param to The method of redirection.
-   * @return This launcher.
-   */
-  public SparkLauncher redirectOutput(ProcessBuilder.Redirect to) {
-    outputStream = to;
-    return this;
+  @Override
+  public SparkLauncher addJar(String jar) {
+    return super.addJar(jar);
   }
 
-  /**
-   * Redirects error output to the specified File.
-   *
-   * @param errFile The file to which stderr is written.
-   * @return This launcher.
-   */
-  public SparkLauncher redirectError(File errFile) {
-    errorStream = ProcessBuilder.Redirect.to(errFile);
-    return this;
+  @Override
+  public SparkLauncher addFile(String file) {
+    return super.addFile(file);
   }
 
-  /**
-   * Redirects error output to the specified File.
-   *
-   * @param outFile The file to which stdout is written.
-   * @return This launcher.
-   */
-  public SparkLauncher redirectOutput(File outFile) {
-    outputStream = ProcessBuilder.Redirect.to(outFile);
-    return this;
+  @Override
+  public SparkLauncher addPyFile(String file) {
+    return super.addPyFile(file);
   }
 
-  /**
-   * Sets all output to be logged and redirected to a logger with the specified name.
-   *
-   * @param loggerName The name of the logger to log stdout and stderr.
-   * @return This launcher.
-   */
-  public SparkLauncher redirectToLog(String loggerName) {
-    setConf(CHILD_PROCESS_LOGGER_NAME, loggerName);
-    return this;
+  @Override
+  public SparkLauncher setVerbose(boolean verbose) {
+    return super.setVerbose(verbose);
   }
 
   /**
@@ -479,17 +331,9 @@ public Process launch() throws IOException {
 
   /**
    * Starts a Spark application.
+   *
    * <p>
-   * This method returns a handle that provides information about the running application and can
-   * be used to do basic interaction with it.
-   * <p>
-   * The returned handle assumes that the application will instantiate a single SparkContext
-   * during its lifetime. Once that context reports a final state (one that indicates the
-   * SparkContext has stopped), the handle will not perform new state transitions, so anything
-   * that happens after that cannot be monitored. If the underlying application is launched as
-   * a child process, {@link SparkAppHandle#kill()} can still be used to kill the child process.
-   * <p>
-   * Currently, all applications are launched as child processes. The child's stdout and stderr
+   * Applications launched by this launcher run as child processes. The child's stdout and stderr
    * are merged and written to a logger (see <code>java.util.logging</code>) only if redirection
    * has not otherwise been configured on this <code>SparkLauncher</code>. The logger's name can be
    * defined by setting {@link #CHILD_PROCESS_LOGGER_NAME} in the app's configuration. If that
@@ -499,15 +343,20 @@ public Process launch() throws IOException {
    * easily into the configuration of commonly-used logging systems.
    *
    * @since 1.6.0
+   * @see AbstractLauncher#startApplication(SparkAppHandle.Listener...)
    * @param listeners Listeners to add to the handle before the app is launched.
    * @return A handle for the launched application.
    */
+  @Override
   public SparkAppHandle startApplication(SparkAppHandle.Listener... listeners) throws IOException {
-    ChildProcAppHandle handle = LauncherServer.newAppHandle();
+    LauncherServer server = LauncherServer.getOrCreateServer();
+    ChildProcAppHandle handle = new ChildProcAppHandle(server);
     for (SparkAppHandle.Listener l : listeners) {
       handle.addListener(l);
     }
 
+    String secret = server.registerHandle(handle);
+
     String loggerName = getLoggerName();
     ProcessBuilder pb = createBuilder();
 
@@ -540,9 +389,8 @@ public SparkAppHandle startApplication(SparkAppHandle.Listener... listeners) thr
       pb.redirectErrorStream(true);
     }
 
-    pb.environment().put(LauncherProtocol.ENV_LAUNCHER_PORT,
-      String.valueOf(LauncherServer.getServerInstance().getPort()));
-    pb.environment().put(LauncherProtocol.ENV_LAUNCHER_SECRET, handle.getSecret());
+    pb.environment().put(LauncherProtocol.ENV_LAUNCHER_PORT, String.valueOf(server.getPort()));
+    pb.environment().put(LauncherProtocol.ENV_LAUNCHER_SECRET, secret);
     try {
       Process child = pb.start();
       InputStream logStream = null;
@@ -604,6 +452,11 @@ private ProcessBuilder createBuilder() throws IOException {
     return pb;
   }
 
+  @Override
+  SparkLauncher self() {
+    return this;
+  }
+
   // Visible for testing.
   String findSparkSubmit() {
     String script = isWindows() ? "spark-submit.cmd" : "spark-submit";
@@ -614,32 +467,4 @@ private String getLoggerName() throws IOException {
     return builder.getEffectiveConfig().get(CHILD_PROCESS_LOGGER_NAME);
   }
 
-  private static class ArgumentValidator extends SparkSubmitOptionParser {
-
-    private final boolean hasValue;
-
-    ArgumentValidator(boolean hasValue) {
-      this.hasValue = hasValue;
-    }
-
-    @Override
-    protected boolean handle(String opt, String value) {
-      if (value == null && hasValue) {
-        throw new IllegalArgumentException(String.format("'%s' expects a value.", opt));
-      }
-      return true;
-    }
-
-    @Override
-    protected boolean handleUnknown(String opt) {
-      // Do not fail on unknown arguments, to support future arguments added to SparkSubmit.
-      return true;
-    }
-
-    protected void handleExtraArgs(List<String> extra) {
-      // No op.
-    }
-
-  }
-
 }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 5f2da036ff9f7..e0ef22d7d5058 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -355,7 +355,7 @@ private void constructEnvVarArgs(
     env.put(submitArgsEnvVariable, submitArgs.toString());
   }
 
-  private boolean isClientMode(Map<String, String> userProps) {
+  boolean isClientMode(Map<String, String> userProps) {
     String userMaster = firstNonEmpty(master, userProps.get(SparkLauncher.SPARK_MASTER));
     String userDeployMode = firstNonEmpty(deployMode, userProps.get(SparkLauncher.DEPLOY_MODE));
     // Default master is "local[*]", so assume client mode in that case
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
index a4d43c0795abc..9bd1a58e17a79 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
@@ -85,7 +85,7 @@ class SparkSubmitOptionParser {
    * name of the option, passed to {@link #handle(String, String)}.
    * <p>
    * Options not listed here nor in the "switch" list below will result in a call to
-   * {@link $#handleUnknown(String)}.
+   * {@link #handleUnknown(String)}.
    * <p>
    * These two arrays are visible for tests.
    */
diff --git a/launcher/src/main/java/org/apache/spark/launcher/package-info.java b/launcher/src/main/java/org/apache/spark/launcher/package-info.java
index d1ac39bdc76a9..248b6d978733a 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/package-info.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/package-info.java
@@ -16,17 +16,18 @@
  */
 
 /**
- * Library for launching Spark applications.
+ * Library for launching Spark applications programmatically.
  *
  * <p>
- * This library allows applications to launch Spark programmatically. There's only one entry
- * point to the library - the {@link org.apache.spark.launcher.SparkLauncher} class.
+ * There are two ways to start applications with this library: as a child process, using
+ * {@link org.apache.spark.launcher.SparkLauncher}, or in-process, using
+ * {@link org.apache.spark.launcher.InProcessLauncher}.
  * </p>
  *
  * <p>
- * The {@link org.apache.spark.launcher.SparkLauncher#startApplication(
- * org.apache.spark.launcher.SparkAppHandle.Listener...)} can be used to start Spark and provide
- * a handle to monitor and control the running application:
+ * The {@link org.apache.spark.launcher.AbstractLauncher#startApplication(
+ * org.apache.spark.launcher.SparkAppHandle.Listener...)}  method can be used to start Spark and
+ * provide a handle to monitor and control the running application:
  * </p>
  *
  * <pre>
@@ -49,7 +50,20 @@
  * </pre>
  *
  * <p>
- * It's also possible to launch a raw child process, using the
+ * Launching applications as a child process requires a full Spark installation. The installation
+ * directory can be provided to the launcher explicitly in the launcher's configuration, or by
+ * setting the <i>SPARK_HOME</i> environment variable.
+ * </p>
+ *
+ * <p>
+ * Launching applications in-process is only recommended in cluster mode, since Spark cannot run
+ * multiple client-mode applications concurrently in the same process. The in-process launcher
+ * requires the necessary Spark dependencies (such as spark-core and cluster manager-specific
+ * modules) to be present in the caller thread's class loader.
+ * </p>
+ *
+ * <p>
+ * It's also possible to launch a raw child process, without the extra monitoring, using the
  * {@link org.apache.spark.launcher.SparkLauncher#launch()} method:
  * </p>
  *
diff --git a/launcher/src/test/java/org/apache/spark/launcher/BaseSuite.java b/launcher/src/test/java/org/apache/spark/launcher/BaseSuite.java
index 23e2c64d6dcd7..3e1a90eae98d4 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/BaseSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/BaseSuite.java
@@ -17,10 +17,14 @@
 
 package org.apache.spark.launcher;
 
+import java.util.concurrent.TimeUnit;
+
+import org.junit.After;
 import org.slf4j.bridge.SLF4JBridgeHandler;
+import static org.junit.Assert.*;
 
 /**
- * Handles configuring the JUL -> SLF4J bridge.
+ * Handles configuring the JUL -> SLF4J bridge, and provides some utility methods for tests.
  */
 class BaseSuite {
 
@@ -29,4 +33,33 @@ class BaseSuite {
     SLF4JBridgeHandler.install();
   }
 
+  @After
+  public void postChecks() {
+    LauncherServer server = LauncherServer.getServer();
+    if (server != null) {
+      // Shut down the server to clean things up for the next test.
+      try {
+        server.close();
+      } catch (Exception e) {
+        // Ignore.
+      }
+    }
+    assertNull(server);
+  }
+
+  protected void waitFor(SparkAppHandle handle) throws Exception {
+    long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(10);
+    try {
+      while (!handle.getState().isFinal()) {
+        assertTrue("Timed out waiting for handle to transition to final state.",
+          System.nanoTime() < deadline);
+        TimeUnit.MILLISECONDS.sleep(10);
+      }
+    } finally {
+      if (!handle.getState().isFinal()) {
+        handle.kill();
+      }
+    }
+  }
+
 }
diff --git a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
index 9f59b41d52d44..fe44efd2e46ab 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
@@ -24,7 +24,6 @@
 import java.util.Arrays;
 import java.util.EnumSet;
 import java.util.List;
-import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 import static java.nio.file.attribute.PosixFilePermission.*;
 
@@ -217,21 +216,6 @@ public void testFailedChildProc() throws Exception {
     assertEquals(SparkAppHandle.State.FAILED, handle.getState());
   }
 
-  private void waitFor(SparkAppHandle handle) throws Exception {
-    long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(10);
-    try {
-      while (!handle.getState().isFinal()) {
-        assertTrue("Timed out waiting for handle to transition to final state.",
-          System.nanoTime() < deadline);
-        TimeUnit.MILLISECONDS.sleep(10);
-      }
-    } finally {
-      if (!handle.getState().isFinal()) {
-        handle.kill();
-      }
-    }
-  }
-
   private static class TestSparkLauncher extends SparkLauncher {
 
     TestSparkLauncher() {
diff --git a/launcher/src/test/java/org/apache/spark/launcher/InProcessLauncherSuite.java b/launcher/src/test/java/org/apache/spark/launcher/InProcessLauncherSuite.java
new file mode 100644
index 0000000000000..b23e56537040f
--- /dev/null
+++ b/launcher/src/test/java/org/apache/spark/launcher/InProcessLauncherSuite.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.launcher;
+
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.junit.Before;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class InProcessLauncherSuite extends BaseSuite {
+
+  // Arguments passed to the test class to identify the test being run.
+  private static final String TEST_SUCCESS = "success";
+  private static final String TEST_FAILURE = "failure";
+  private static final String TEST_KILL = "kill";
+
+  private static final String TEST_FAILURE_MESSAGE = "d'oh";
+
+  private static Throwable lastError;
+
+  @Before
+  public void testSetup() {
+    lastError = null;
+  }
+
+  @Test
+  public void testLauncher() throws Exception {
+    SparkAppHandle app = startTest(TEST_SUCCESS);
+    waitFor(app);
+    assertNull(lastError);
+
+    // Because the test doesn't implement the launcher protocol, the final state here will be
+    // LOST instead of FINISHED.
+    assertEquals(SparkAppHandle.State.LOST, app.getState());
+  }
+
+  @Test
+  public void testKill() throws Exception {
+    SparkAppHandle app = startTest(TEST_KILL);
+    app.kill();
+    waitFor(app);
+    assertNull(lastError);
+    assertEquals(SparkAppHandle.State.KILLED, app.getState());
+  }
+
+  @Test
+  public void testErrorPropagation() throws Exception {
+    SparkAppHandle app = startTest(TEST_FAILURE);
+    waitFor(app);
+    assertEquals(SparkAppHandle.State.FAILED, app.getState());
+
+    assertNotNull(lastError);
+    assertEquals(TEST_FAILURE_MESSAGE, lastError.getMessage());
+  }
+
+  private SparkAppHandle startTest(String test) throws Exception {
+    return new TestInProcessLauncher()
+      .addAppArgs(test)
+      .setAppResource(SparkLauncher.NO_RESOURCE)
+      .startApplication();
+  }
+
+  public static void runTest(String[] args) {
+    try {
+      assertTrue(args.length != 0);
+
+      // Make sure at least the launcher-provided config options are in the args array.
+      final AtomicReference<String> port = new AtomicReference<>();
+      final AtomicReference<String> secret = new AtomicReference<>();
+      SparkSubmitOptionParser parser = new SparkSubmitOptionParser() {
+
+        @Override
+        protected boolean handle(String opt, String value) {
+          if (opt == CONF) {
+            String[] conf = value.split("=");
+            switch(conf[0]) {
+              case LauncherProtocol.CONF_LAUNCHER_PORT:
+                port.set(conf[1]);
+                break;
+
+              case LauncherProtocol.CONF_LAUNCHER_SECRET:
+                secret.set(conf[1]);
+                break;
+
+              default:
+                // no op
+            }
+          }
+
+          return true;
+        }
+
+        @Override
+        protected boolean handleUnknown(String opt) {
+          return true;
+        }
+
+        @Override
+        protected void handleExtraArgs(List<String> extra) {
+          // no op.
+        }
+
+      };
+
+      parser.parse(Arrays.asList(args));
+      assertNotNull("Launcher port not found.", port.get());
+      assertNotNull("Launcher secret not found.", secret.get());
+
+      String test = args[args.length - 1];
+      switch (test) {
+      case TEST_SUCCESS:
+        break;
+
+      case TEST_FAILURE:
+        throw new IllegalStateException(TEST_FAILURE_MESSAGE);
+
+      case TEST_KILL:
+        try {
+          // Wait for a reasonable amount of time to avoid the test hanging forever on failure,
+          // but still allowing for time outs to hopefully not occur on busy machines.
+          Thread.sleep(10000);
+          fail("Did not get expected interrupt after 10s.");
+        } catch (InterruptedException ie) {
+          // Expected.
+        }
+        break;
+
+      default:
+        fail("Unknown test " + test);
+      }
+    } catch (Throwable t) {
+      lastError = t;
+      throw new RuntimeException(t);
+    }
+  }
+
+  private static class TestInProcessLauncher extends InProcessLauncher {
+
+    @Override
+    Method findSparkSubmit() throws IOException {
+      try {
+        return InProcessLauncherSuite.class.getMethod("runTest", String[].class);
+      } catch (Exception e) {
+        throw new IOException(e);
+      }
+    }
+
+  }
+
+}
diff --git a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
index d0f6abe2fe692..7e2b09ce25c9b 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
@@ -39,39 +39,27 @@ public class LauncherServerSuite extends BaseSuite {
 
   @Test
   public void testLauncherServerReuse() throws Exception {
-    ChildProcAppHandle handle1 = null;
-    ChildProcAppHandle handle2 = null;
-    ChildProcAppHandle handle3 = null;
+    LauncherServer server1 = LauncherServer.getOrCreateServer();
+    ChildProcAppHandle handle = new ChildProcAppHandle(server1);
+    handle.kill();
 
+    LauncherServer server2 = LauncherServer.getOrCreateServer();
     try {
-      handle1 = LauncherServer.newAppHandle();
-      handle2 = LauncherServer.newAppHandle();
-      LauncherServer server1 = handle1.getServer();
-      assertSame(server1, handle2.getServer());
-
-      handle1.kill();
-      handle2.kill();
-
-      handle3 = LauncherServer.newAppHandle();
-      assertNotSame(server1, handle3.getServer());
-
-      handle3.kill();
-
-      assertNull(LauncherServer.getServerInstance());
+      assertNotSame(server1, server2);
     } finally {
-      kill(handle1);
-      kill(handle2);
-      kill(handle3);
+      server2.unref();
     }
   }
 
   @Test
   public void testCommunication() throws Exception {
-    ChildProcAppHandle handle = LauncherServer.newAppHandle();
+    LauncherServer server = LauncherServer.getOrCreateServer();
+    ChildProcAppHandle handle = new ChildProcAppHandle(server);
+    String secret = server.registerHandle(handle);
+
     TestClient client = null;
     try {
-      Socket s = new Socket(InetAddress.getLoopbackAddress(),
-        LauncherServer.getServerInstance().getPort());
+      Socket s = new Socket(InetAddress.getLoopbackAddress(), server.getPort());
 
       final Semaphore semaphore = new Semaphore(0);
       handle.addListener(new SparkAppHandle.Listener() {
@@ -86,7 +74,7 @@ public void infoChanged(SparkAppHandle handle) {
       });
 
       client = new TestClient(s);
-      client.send(new Hello(handle.getSecret(), "1.4.0"));
+      client.send(new Hello(secret, "1.4.0"));
       assertTrue(semaphore.tryAcquire(30, TimeUnit.SECONDS));
 
       // Make sure the server matched the client to the handle.
@@ -104,7 +92,7 @@ public void infoChanged(SparkAppHandle handle) {
       Message stopMsg = client.inbound.poll(30, TimeUnit.SECONDS);
       assertTrue(stopMsg instanceof Stop);
     } finally {
-      kill(handle);
+      handle.kill();
       close(client);
       client.clientThread.join();
     }
@@ -112,34 +100,36 @@ public void infoChanged(SparkAppHandle handle) {
 
   @Test
   public void testTimeout() throws Exception {
-    ChildProcAppHandle handle = null;
+    LauncherServer server = LauncherServer.getOrCreateServer();
+    ChildProcAppHandle handle = new ChildProcAppHandle(server);
+    String secret = server.registerHandle(handle);
+
     TestClient client = null;
     try {
       // LauncherServer will immediately close the server-side socket when the timeout is set
       // to 0.
       SparkLauncher.setConfig(SparkLauncher.CHILD_CONNECTION_TIMEOUT, "0");
 
-      handle = LauncherServer.newAppHandle();
-
-      Socket s = new Socket(InetAddress.getLoopbackAddress(),
-        LauncherServer.getServerInstance().getPort());
+      Socket s = new Socket(InetAddress.getLoopbackAddress(), server.getPort());
       client = new TestClient(s);
-      waitForError(client, handle.getSecret());
+      waitForError(client, secret);
     } finally {
       SparkLauncher.launcherConfig.remove(SparkLauncher.CHILD_CONNECTION_TIMEOUT);
-      kill(handle);
+      handle.kill();
       close(client);
     }
   }
 
   @Test
   public void testSparkSubmitVmShutsDown() throws Exception {
-    ChildProcAppHandle handle = LauncherServer.newAppHandle();
+    LauncherServer server = LauncherServer.getOrCreateServer();
+    ChildProcAppHandle handle = new ChildProcAppHandle(server);
+    String secret = server.registerHandle(handle);
+
     TestClient client = null;
     final Semaphore semaphore = new Semaphore(0);
     try {
-      Socket s = new Socket(InetAddress.getLoopbackAddress(),
-        LauncherServer.getServerInstance().getPort());
+      Socket s = new Socket(InetAddress.getLoopbackAddress(), server.getPort());
       handle.addListener(new SparkAppHandle.Listener() {
         public void stateChanged(SparkAppHandle handle) {
           semaphore.release();
@@ -149,7 +139,7 @@ public void infoChanged(SparkAppHandle handle) {
         }
       });
       client = new TestClient(s);
-      client.send(new Hello(handle.getSecret(), "1.4.0"));
+      client.send(new Hello(secret, "1.4.0"));
       assertTrue(semaphore.tryAcquire(30, TimeUnit.SECONDS));
       // Make sure the server matched the client to the handle.
       assertNotNull(handle.getConnection());
@@ -157,7 +147,7 @@ public void infoChanged(SparkAppHandle handle) {
       assertTrue(semaphore.tryAcquire(30, TimeUnit.SECONDS));
       assertEquals(SparkAppHandle.State.LOST, handle.getState());
     } finally {
-      kill(handle);
+      handle.kill();
       close(client);
       client.clientThread.join();
     }
@@ -165,11 +155,13 @@ public void infoChanged(SparkAppHandle handle) {
 
   @Test
   public void testStreamFiltering() throws Exception {
-    ChildProcAppHandle handle = LauncherServer.newAppHandle();
+    LauncherServer server = LauncherServer.getOrCreateServer();
+    ChildProcAppHandle handle = new ChildProcAppHandle(server);
+    String secret = server.registerHandle(handle);
+
     TestClient client = null;
     try {
-      Socket s = new Socket(InetAddress.getLoopbackAddress(),
-        LauncherServer.getServerInstance().getPort());
+      Socket s = new Socket(InetAddress.getLoopbackAddress(), server.getPort());
 
       client = new TestClient(s);
 
@@ -181,21 +173,15 @@ public void testStreamFiltering() throws Exception {
         // happening for other reasons).
       }
 
-      waitForError(client, handle.getSecret());
+      waitForError(client, secret);
       assertEquals(0, EvilPayload.EVIL_BIT);
     } finally {
-      kill(handle);
+      handle.kill();
       close(client);
       client.clientThread.join();
     }
   }
 
-  private void kill(SparkAppHandle handle) {
-    if (handle != null) {
-      handle.kill();
-    }
-  }
-
   private void close(Closeable c) {
     if (c != null) {
       try {
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 043d13609fd26..53286fe93478d 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
index 66c53624419d9..14428c6f45cce 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
@@ -476,6 +476,9 @@ class DenseMatrix @Since("2.0.0") (
 @Since("2.0.0")
 object DenseMatrix {
 
+  private[ml] def unapply(dm: DenseMatrix): Option[(Int, Int, Array[Double], Boolean)] =
+    Some((dm.numRows, dm.numCols, dm.values, dm.isTransposed))
+
   /**
    * Generate a `DenseMatrix` consisting of zeros.
    * @param numRows number of rows of the matrix
@@ -827,6 +830,10 @@ class SparseMatrix @Since("2.0.0") (
 @Since("2.0.0")
 object SparseMatrix {
 
+  private[ml] def unapply(
+       sm: SparseMatrix): Option[(Int, Int, Array[Int], Array[Int], Array[Double], Boolean)] =
+    Some((sm.numRows, sm.numCols, sm.colPtrs, sm.rowIndices, sm.values, sm.isTransposed))
+
   /**
    * Generate a `SparseMatrix` from Coordinate List (COO) format. Input must be an array of
    * (i, j, value) tuples. Entries that have duplicate values of i and j are
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 925b5422a54cc..f07d7f24fd312 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -60,6 +60,20 @@
       <artifactId>spark-sql_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-graphx_${scala.binary.version}</artifactId>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 3ab99b35ece2b..f04fde2cbbca1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -165,7 +165,7 @@ final class OneVsRestModel private[ml] (
     val newDataset = dataset.withColumn(accColName, initUDF())
 
     // persist if underlying dataset is not persistent.
-    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
+    val handlePersistence = !dataset.isStreaming && dataset.storageLevel == StorageLevel.NONE
     if (handlePersistence) {
       newDataset.persist(StorageLevel.MEMORY_AND_DISK)
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index e07f2a107badb..8299a3e95d822 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -155,10 +155,16 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
   override def transform(dataset: Dataset[_]): DataFrame = {
     val transformedSchema = transformSchema(dataset.schema)
 
+    val (inputColumns, outputColumns) = if (isBucketizeMultipleColumns()) {
+      ($(inputCols).toSeq, $(outputCols).toSeq)
+    } else {
+      (Seq($(inputCol)), Seq($(outputCol)))
+    }
+
     val (filteredDataset, keepInvalid) = {
       if (getHandleInvalid == Bucketizer.SKIP_INVALID) {
         // "skip" NaN option is set, will filter out NaN values in the dataset
-        (dataset.na.drop().toDF(), false)
+        (dataset.na.drop(inputColumns).toDF(), false)
       } else {
         (dataset.toDF(), getHandleInvalid == Bucketizer.KEEP_INVALID)
       }
@@ -176,11 +182,7 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
       }.withName(s"bucketizer_$idx")
     }
 
-    val (inputColumns, outputColumns) = if (isBucketizeMultipleColumns()) {
-      ($(inputCols).toSeq, $(outputCols).toSeq)
-    } else {
-      (Seq($(inputCol)), Seq($(outputCol)))
-    }
+
     val newCols = inputColumns.zipWithIndex.map { case (inputCol, idx) =>
       bucketizers(idx)(filteredDataset(inputCol).cast(DoubleType))
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
index 4615daed20fb1..a918dd4c075da 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.linalg.Vectors
-import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators}
+import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators, StringArrayParam}
 import org.apache.spark.ml.param.shared.{HasInputCols, HasOutputCol}
 import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
 import org.apache.spark.mllib.feature.{HashingTF => OldHashingTF}
@@ -40,9 +40,9 @@ import org.apache.spark.util.collection.OpenHashMap
  * The [[FeatureHasher]] transformer operates on multiple columns. Each column may contain either
  * numeric or categorical features. Behavior and handling of column data types is as follows:
  *  -Numeric columns: For numeric features, the hash value of the column name is used to map the
- *                    feature value to its index in the feature vector. Numeric features are never
- *                    treated as categorical, even when they are integers. You must explicitly
- *                    convert numeric columns containing categorical features to strings first.
+ *                    feature value to its index in the feature vector. By default, numeric features
+ *                    are not treated as categorical (even when they are integers). To treat them
+ *                    as categorical, specify the relevant columns in `categoricalCols`.
  *  -String columns: For categorical features, the hash value of the string "column_name=value"
  *                   is used to map to the vector index, with an indicator value of `1.0`.
  *                   Thus, categorical features are "one-hot" encoded
@@ -86,6 +86,17 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
   @Since("2.3.0")
   def this() = this(Identifiable.randomUID("featureHasher"))
 
+  /**
+   * Numeric columns to treat as categorical features. By default only string and boolean
+   * columns are treated as categorical, so this param can be used to explicitly specify the
+   * numerical columns to treat as categorical. Note, the relevant columns must also be set in
+   * `inputCols`.
+   * @group param
+   */
+  @Since("2.3.0")
+  val categoricalCols = new StringArrayParam(this, "categoricalCols",
+    "numeric columns to treat as categorical")
+
   /**
    * Number of features. Should be greater than 0.
    * (default = 2^18^)
@@ -117,15 +128,28 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
   @Since("2.3.0")
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
+  /** @group getParam */
+  @Since("2.3.0")
+  def getCategoricalCols: Array[String] = $(categoricalCols)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setCategoricalCols(value: Array[String]): this.type = set(categoricalCols, value)
+
   @Since("2.3.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
     val hashFunc: Any => Int = OldHashingTF.murmur3Hash
     val n = $(numFeatures)
     val localInputCols = $(inputCols)
+    val catCols = if (isSet(categoricalCols)) {
+      $(categoricalCols).toSet
+    } else {
+      Set[String]()
+    }
 
     val outputSchema = transformSchema(dataset.schema)
     val realFields = outputSchema.fields.filter { f =>
-      f.dataType.isInstanceOf[NumericType]
+      f.dataType.isInstanceOf[NumericType] && !catCols.contains(f.name)
     }.map(_.name).toSet
 
     def getDouble(x: Any): Double = {
@@ -149,8 +173,8 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
             val hash = hashFunc(colName)
             (hash, value)
           } else {
-            // string and boolean values are treated as categorical, with an indicator value of 1.0
-            // and vector index based on hash of "column_name=value"
+            // string, boolean and numeric values that are in catCols are treated as categorical,
+            // with an indicator value of 1.0 and vector index based on hash of "column_name=value"
             val value = row.get(fieldIndex).toString
             val fieldName = s"$colName=$value"
             val hash = hashFunc(fieldName)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
index 4663f16b5f5dc..730ee9fc08db8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
@@ -23,7 +23,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.HasInputCols
+import org.apache.spark.ml.param.shared.{HasInputCols, HasOutputCols}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions._
@@ -32,7 +32,7 @@ import org.apache.spark.sql.types._
 /**
  * Params for [[Imputer]] and [[ImputerModel]].
  */
-private[feature] trait ImputerParams extends Params with HasInputCols {
+private[feature] trait ImputerParams extends Params with HasInputCols with HasOutputCols {
 
   /**
    * The imputation strategy. Currently only "mean" and "median" are supported.
@@ -63,16 +63,6 @@ private[feature] trait ImputerParams extends Params with HasInputCols {
   /** @group getParam */
   def getMissingValue: Double = $(missingValue)
 
-  /**
-   * Param for output column names.
-   * @group param
-   */
-  final val outputCols: StringArrayParam = new StringArrayParam(this, "outputCols",
-    "output column names")
-
-  /** @group getParam */
-  final def getOutputCols: Array[String] = $(outputCols)
-
   /** Validates and transforms the input schema. */
   protected def validateAndTransformSchema(schema: StructType): StructType = {
     require($(inputCols).length == $(inputCols).distinct.length, s"inputCols contains" +
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index a669da183e2c8..5ab6c2dde667a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -41,8 +41,12 @@ import org.apache.spark.sql.types.{DoubleType, NumericType, StructType}
  * The output vectors are sparse.
  *
  * @see `StringIndexer` for converting categorical values into category indices
+ * @deprecated `OneHotEncoderEstimator` will be renamed `OneHotEncoder` and this `OneHotEncoder`
+ * will be removed in 3.0.0.
  */
 @Since("1.4.0")
+@deprecated("`OneHotEncoderEstimator` will be renamed `OneHotEncoder` and this `OneHotEncoder`" +
+  " will be removed in 3.0.0.", "2.3.0")
 class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer
   with HasInputCol with HasOutputCol with DefaultParamsWritable {
 
@@ -78,56 +82,16 @@ class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) e
   override def transformSchema(schema: StructType): StructType = {
     val inputColName = $(inputCol)
     val outputColName = $(outputCol)
+    val inputFields = schema.fields
 
     require(schema(inputColName).dataType.isInstanceOf[NumericType],
       s"Input column must be of type NumericType but got ${schema(inputColName).dataType}")
-    val inputFields = schema.fields
     require(!inputFields.exists(_.name == outputColName),
       s"Output column $outputColName already exists.")
 
-    val inputAttr = Attribute.fromStructField(schema(inputColName))
-    val outputAttrNames: Option[Array[String]] = inputAttr match {
-      case nominal: NominalAttribute =>
-        if (nominal.values.isDefined) {
-          nominal.values
-        } else if (nominal.numValues.isDefined) {
-          nominal.numValues.map(n => Array.tabulate(n)(_.toString))
-        } else {
-          None
-        }
-      case binary: BinaryAttribute =>
-        if (binary.values.isDefined) {
-          binary.values
-        } else {
-          Some(Array.tabulate(2)(_.toString))
-        }
-      case _: NumericAttribute =>
-        throw new RuntimeException(
-          s"The input column $inputColName cannot be numeric.")
-      case _ =>
-        None // optimistic about unknown attributes
-    }
-
-    val filteredOutputAttrNames = outputAttrNames.map { names =>
-      if ($(dropLast)) {
-        require(names.length > 1,
-          s"The input column $inputColName should have at least two distinct values.")
-        names.dropRight(1)
-      } else {
-        names
-      }
-    }
-
-    val outputAttrGroup = if (filteredOutputAttrNames.isDefined) {
-      val attrs: Array[Attribute] = filteredOutputAttrNames.get.map { name =>
-        BinaryAttribute.defaultAttr.withName(name)
-      }
-      new AttributeGroup($(outputCol), attrs)
-    } else {
-      new AttributeGroup($(outputCol))
-    }
-
-    val outputFields = inputFields :+ outputAttrGroup.toStructField()
+    val outputField = OneHotEncoderCommon.transformOutputColumnSchema(
+      schema(inputColName), outputColName, $(dropLast))
+    val outputFields = inputFields :+ outputField
     StructType(outputFields)
   }
 
@@ -136,30 +100,17 @@ class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) e
     // schema transformation
     val inputColName = $(inputCol)
     val outputColName = $(outputCol)
-    val shouldDropLast = $(dropLast)
-    var outputAttrGroup = AttributeGroup.fromStructField(
+
+    val outputAttrGroupFromSchema = AttributeGroup.fromStructField(
       transformSchema(dataset.schema)(outputColName))
-    if (outputAttrGroup.size < 0) {
-      // If the number of attributes is unknown, we check the values from the input column.
-      val numAttrs = dataset.select(col(inputColName).cast(DoubleType)).rdd.map(_.getDouble(0))
-        .treeAggregate(0.0)(
-          (m, x) => {
-            assert(x <= Int.MaxValue,
-              s"OneHotEncoder only supports up to ${Int.MaxValue} indices, but got $x")
-            assert(x >= 0.0 && x == x.toInt,
-              s"Values from column $inputColName must be indices, but got $x.")
-            math.max(m, x)
-          },
-          (m0, m1) => {
-            math.max(m0, m1)
-          }
-        ).toInt + 1
-      val outputAttrNames = Array.tabulate(numAttrs)(_.toString)
-      val filtered = if (shouldDropLast) outputAttrNames.dropRight(1) else outputAttrNames
-      val outputAttrs: Array[Attribute] =
-        filtered.map(name => BinaryAttribute.defaultAttr.withName(name))
-      outputAttrGroup = new AttributeGroup(outputColName, outputAttrs)
+
+    val outputAttrGroup = if (outputAttrGroupFromSchema.size < 0) {
+      OneHotEncoderCommon.getOutputAttrGroupFromData(
+        dataset, Seq(inputColName), Seq(outputColName), $(dropLast))(0)
+    } else {
+      outputAttrGroupFromSchema
     }
+
     val metadata = outputAttrGroup.toMetadata()
 
     // data transformation
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoderEstimator.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoderEstimator.scala
new file mode 100644
index 0000000000000..bd1e3426c8780
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoderEstimator.scala
@@ -0,0 +1,528 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkException
+import org.apache.spark.annotation.Since
+import org.apache.spark.ml.{Estimator, Model}
+import org.apache.spark.ml.attribute._
+import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCols, HasOutputCols}
+import org.apache.spark.ml.util._
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.expressions.UserDefinedFunction
+import org.apache.spark.sql.functions.{col, lit, udf}
+import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
+
+/** Private trait for params and common methods for OneHotEncoderEstimator and OneHotEncoderModel */
+private[ml] trait OneHotEncoderBase extends Params with HasHandleInvalid
+    with HasInputCols with HasOutputCols {
+
+  /**
+   * Param for how to handle invalid data during transform().
+   * Options are 'keep' (invalid data presented as an extra categorical feature) or
+   * 'error' (throw an error).
+   * Note that this Param is only used during transform; during fitting, invalid data
+   * will result in an error.
+   * Default: "error"
+   * @group param
+   */
+  @Since("2.3.0")
+  override val handleInvalid: Param[String] = new Param[String](this, "handleInvalid",
+    "How to handle invalid data during transform(). " +
+    "Options are 'keep' (invalid data presented as an extra categorical feature) " +
+    "or error (throw an error). Note that this Param is only used during transform; " +
+    "during fitting, invalid data will result in an error.",
+    ParamValidators.inArray(OneHotEncoderEstimator.supportedHandleInvalids))
+
+  setDefault(handleInvalid, OneHotEncoderEstimator.ERROR_INVALID)
+
+  /**
+   * Whether to drop the last category in the encoded vector (default: true)
+   * @group param
+   */
+  @Since("2.3.0")
+  final val dropLast: BooleanParam =
+    new BooleanParam(this, "dropLast", "whether to drop the last category")
+  setDefault(dropLast -> true)
+
+  /** @group getParam */
+  @Since("2.3.0")
+  def getDropLast: Boolean = $(dropLast)
+
+  protected def validateAndTransformSchema(
+      schema: StructType,
+      dropLast: Boolean,
+      keepInvalid: Boolean): StructType = {
+    val inputColNames = $(inputCols)
+    val outputColNames = $(outputCols)
+
+    require(inputColNames.length == outputColNames.length,
+      s"The number of input columns ${inputColNames.length} must be the same as the number of " +
+        s"output columns ${outputColNames.length}.")
+
+    // Input columns must be NumericType.
+    inputColNames.foreach(SchemaUtils.checkNumericType(schema, _))
+
+    // Prepares output columns with proper attributes by examining input columns.
+    val inputFields = $(inputCols).map(schema(_))
+
+    val outputFields = inputFields.zip(outputColNames).map { case (inputField, outputColName) =>
+      OneHotEncoderCommon.transformOutputColumnSchema(
+        inputField, outputColName, dropLast, keepInvalid)
+    }
+    outputFields.foldLeft(schema) { case (newSchema, outputField) =>
+      SchemaUtils.appendColumn(newSchema, outputField)
+    }
+  }
+}
+
+/**
+ * A one-hot encoder that maps a column of category indices to a column of binary vectors, with
+ * at most a single one-value per row that indicates the input category index.
+ * For example with 5 categories, an input value of 2.0 would map to an output vector of
+ * `[0.0, 0.0, 1.0, 0.0]`.
+ * The last category is not included by default (configurable via `dropLast`),
+ * because it makes the vector entries sum up to one, and hence linearly dependent.
+ * So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
+ *
+ * @note This is different from scikit-learn's OneHotEncoder, which keeps all categories.
+ * The output vectors are sparse.
+ *
+ * When `handleInvalid` is configured to 'keep', an extra "category" indicating invalid values is
+ * added as last category. So when `dropLast` is true, invalid values are encoded as all-zeros
+ * vector.
+ *
+ * @note When encoding multi-column by using `inputCols` and `outputCols` params, input/output cols
+ * come in pairs, specified by the order in the arrays, and each pair is treated independently.
+ *
+ * @see `StringIndexer` for converting categorical values into category indices
+ */
+@Since("2.3.0")
+class OneHotEncoderEstimator @Since("2.3.0") (@Since("2.3.0") override val uid: String)
+    extends Estimator[OneHotEncoderModel] with OneHotEncoderBase with DefaultParamsWritable {
+
+  @Since("2.3.0")
+  def this() = this(Identifiable.randomUID("oneHotEncoder"))
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setInputCols(values: Array[String]): this.type = set(inputCols, values)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setOutputCols(values: Array[String]): this.type = set(outputCols, values)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setDropLast(value: Boolean): this.type = set(dropLast, value)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
+
+  @Since("2.3.0")
+  override def transformSchema(schema: StructType): StructType = {
+    val keepInvalid = $(handleInvalid) == OneHotEncoderEstimator.KEEP_INVALID
+    validateAndTransformSchema(schema, dropLast = $(dropLast),
+      keepInvalid = keepInvalid)
+  }
+
+  @Since("2.3.0")
+  override def fit(dataset: Dataset[_]): OneHotEncoderModel = {
+    transformSchema(dataset.schema)
+
+    // Compute the plain number of categories without `handleInvalid` and
+    // `dropLast` taken into account.
+    val transformedSchema = validateAndTransformSchema(dataset.schema, dropLast = false,
+      keepInvalid = false)
+    val categorySizes = new Array[Int]($(outputCols).length)
+
+    val columnToScanIndices = $(outputCols).zipWithIndex.flatMap { case (outputColName, idx) =>
+      val numOfAttrs = AttributeGroup.fromStructField(
+        transformedSchema(outputColName)).size
+      if (numOfAttrs < 0) {
+        Some(idx)
+      } else {
+        categorySizes(idx) = numOfAttrs
+        None
+      }
+    }
+
+    // Some input columns don't have attributes or their attributes don't have necessary info.
+    // We need to scan the data to get the number of values for each column.
+    if (columnToScanIndices.length > 0) {
+      val inputColNames = columnToScanIndices.map($(inputCols)(_))
+      val outputColNames = columnToScanIndices.map($(outputCols)(_))
+
+      // When fitting data, we want the plain number of categories without `handleInvalid` and
+      // `dropLast` taken into account.
+      val attrGroups = OneHotEncoderCommon.getOutputAttrGroupFromData(
+        dataset, inputColNames, outputColNames, dropLast = false)
+      attrGroups.zip(columnToScanIndices).foreach { case (attrGroup, idx) =>
+        categorySizes(idx) = attrGroup.size
+      }
+    }
+
+    val model = new OneHotEncoderModel(uid, categorySizes).setParent(this)
+    copyValues(model)
+  }
+
+  @Since("2.3.0")
+  override def copy(extra: ParamMap): OneHotEncoderEstimator = defaultCopy(extra)
+}
+
+@Since("2.3.0")
+object OneHotEncoderEstimator extends DefaultParamsReadable[OneHotEncoderEstimator] {
+
+  private[feature] val KEEP_INVALID: String = "keep"
+  private[feature] val ERROR_INVALID: String = "error"
+  private[feature] val supportedHandleInvalids: Array[String] = Array(KEEP_INVALID, ERROR_INVALID)
+
+  @Since("2.3.0")
+  override def load(path: String): OneHotEncoderEstimator = super.load(path)
+}
+
+/**
+ * @param categorySizes  Original number of categories for each feature being encoded.
+ *                       The array contains one value for each input column, in order.
+ */
+@Since("2.3.0")
+class OneHotEncoderModel private[ml] (
+    @Since("2.3.0") override val uid: String,
+    @Since("2.3.0") val categorySizes: Array[Int])
+  extends Model[OneHotEncoderModel] with OneHotEncoderBase with MLWritable {
+
+  import OneHotEncoderModel._
+
+  // Returns the category size for each index with `dropLast` and `handleInvalid`
+  // taken into account.
+  private def getConfigedCategorySizes: Array[Int] = {
+    val dropLast = getDropLast
+    val keepInvalid = getHandleInvalid == OneHotEncoderEstimator.KEEP_INVALID
+
+    if (!dropLast && keepInvalid) {
+      // When `handleInvalid` is "keep", an extra category is added as last category
+      // for invalid data.
+      categorySizes.map(_ + 1)
+    } else if (dropLast && !keepInvalid) {
+      // When `dropLast` is true, the last category is removed.
+      categorySizes.map(_ - 1)
+    } else {
+      // When `dropLast` is true and `handleInvalid` is "keep", the extra category for invalid
+      // data is removed. Thus, it is the same as the plain number of categories.
+      categorySizes
+    }
+  }
+
+  private def encoder: UserDefinedFunction = {
+    val keepInvalid = getHandleInvalid == OneHotEncoderEstimator.KEEP_INVALID
+    val configedSizes = getConfigedCategorySizes
+    val localCategorySizes = categorySizes
+
+    // The udf performed on input data. The first parameter is the input value. The second
+    // parameter is the index in inputCols of the column being encoded.
+    udf { (label: Double, colIdx: Int) =>
+      val origCategorySize = localCategorySizes(colIdx)
+      // idx: index in vector of the single 1-valued element
+      val idx = if (label >= 0 && label < origCategorySize) {
+        label
+      } else {
+        if (keepInvalid) {
+          origCategorySize
+        } else {
+          if (label < 0) {
+            throw new SparkException(s"Negative value: $label. Input can't be negative. " +
+              s"To handle invalid values, set Param handleInvalid to " +
+              s"${OneHotEncoderEstimator.KEEP_INVALID}")
+          } else {
+            throw new SparkException(s"Unseen value: $label. To handle unseen values, " +
+              s"set Param handleInvalid to ${OneHotEncoderEstimator.KEEP_INVALID}.")
+          }
+        }
+      }
+
+      val size = configedSizes(colIdx)
+      if (idx < size) {
+        Vectors.sparse(size, Array(idx.toInt), Array(1.0))
+      } else {
+        Vectors.sparse(size, Array.empty[Int], Array.empty[Double])
+      }
+    }
+  }
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setInputCols(values: Array[String]): this.type = set(inputCols, values)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setOutputCols(values: Array[String]): this.type = set(outputCols, values)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setDropLast(value: Boolean): this.type = set(dropLast, value)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
+
+  @Since("2.3.0")
+  override def transformSchema(schema: StructType): StructType = {
+    val inputColNames = $(inputCols)
+
+    require(inputColNames.length == categorySizes.length,
+      s"The number of input columns ${inputColNames.length} must be the same as the number of " +
+        s"features ${categorySizes.length} during fitting.")
+
+    val keepInvalid = $(handleInvalid) == OneHotEncoderEstimator.KEEP_INVALID
+    val transformedSchema = validateAndTransformSchema(schema, dropLast = $(dropLast),
+      keepInvalid = keepInvalid)
+    verifyNumOfValues(transformedSchema)
+  }
+
+  /**
+   * If the metadata of input columns also specifies the number of categories, we need to
+   * compare with expected category number with `handleInvalid` and `dropLast` taken into
+   * account. Mismatched numbers will cause exception.
+   */
+  private def verifyNumOfValues(schema: StructType): StructType = {
+    val configedSizes = getConfigedCategorySizes
+    $(outputCols).zipWithIndex.foreach { case (outputColName, idx) =>
+      val inputColName = $(inputCols)(idx)
+      val attrGroup = AttributeGroup.fromStructField(schema(outputColName))
+
+      // If the input metadata specifies number of category for output column,
+      // comparing with expected category number with `handleInvalid` and
+      // `dropLast` taken into account.
+      if (attrGroup.attributes.nonEmpty) {
+        val numCategories = configedSizes(idx)
+        require(attrGroup.size == numCategories, "OneHotEncoderModel expected " +
+          s"$numCategories categorical values for input column $inputColName, " +
+            s"but the input column had metadata specifying ${attrGroup.size} values.")
+      }
+    }
+    schema
+  }
+
+  @Since("2.3.0")
+  override def transform(dataset: Dataset[_]): DataFrame = {
+    val transformedSchema = transformSchema(dataset.schema, logging = true)
+    val keepInvalid = $(handleInvalid) == OneHotEncoderEstimator.KEEP_INVALID
+
+    val encodedColumns = $(inputCols).indices.map { idx =>
+      val inputColName = $(inputCols)(idx)
+      val outputColName = $(outputCols)(idx)
+
+      val outputAttrGroupFromSchema =
+        AttributeGroup.fromStructField(transformedSchema(outputColName))
+
+      val metadata = if (outputAttrGroupFromSchema.size < 0) {
+        OneHotEncoderCommon.createAttrGroupForAttrNames(outputColName,
+          categorySizes(idx), $(dropLast), keepInvalid).toMetadata()
+      } else {
+        outputAttrGroupFromSchema.toMetadata()
+      }
+
+      encoder(col(inputColName).cast(DoubleType), lit(idx))
+        .as(outputColName, metadata)
+    }
+    dataset.withColumns($(outputCols), encodedColumns)
+  }
+
+  @Since("2.3.0")
+  override def copy(extra: ParamMap): OneHotEncoderModel = {
+    val copied = new OneHotEncoderModel(uid, categorySizes)
+    copyValues(copied, extra).setParent(parent)
+  }
+
+  @Since("2.3.0")
+  override def write: MLWriter = new OneHotEncoderModelWriter(this)
+}
+
+@Since("2.3.0")
+object OneHotEncoderModel extends MLReadable[OneHotEncoderModel] {
+
+  private[OneHotEncoderModel]
+  class OneHotEncoderModelWriter(instance: OneHotEncoderModel) extends MLWriter {
+
+    private case class Data(categorySizes: Array[Int])
+
+    override protected def saveImpl(path: String): Unit = {
+      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      val data = Data(instance.categorySizes)
+      val dataPath = new Path(path, "data").toString
+      sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
+    }
+  }
+
+  private class OneHotEncoderModelReader extends MLReader[OneHotEncoderModel] {
+
+    private val className = classOf[OneHotEncoderModel].getName
+
+    override def load(path: String): OneHotEncoderModel = {
+      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val dataPath = new Path(path, "data").toString
+      val data = sparkSession.read.parquet(dataPath)
+        .select("categorySizes")
+        .head()
+      val categorySizes = data.getAs[Seq[Int]](0).toArray
+      val model = new OneHotEncoderModel(metadata.uid, categorySizes)
+      DefaultParamsReader.getAndSetParams(model, metadata)
+      model
+    }
+  }
+
+  @Since("2.3.0")
+  override def read: MLReader[OneHotEncoderModel] = new OneHotEncoderModelReader
+
+  @Since("2.3.0")
+  override def load(path: String): OneHotEncoderModel = super.load(path)
+}
+
+/**
+ * Provides some helper methods used by both `OneHotEncoder` and `OneHotEncoderEstimator`.
+ */
+private[feature] object OneHotEncoderCommon {
+
+  private def genOutputAttrNames(inputCol: StructField): Option[Array[String]] = {
+    val inputAttr = Attribute.fromStructField(inputCol)
+    inputAttr match {
+      case nominal: NominalAttribute =>
+        if (nominal.values.isDefined) {
+          nominal.values
+        } else if (nominal.numValues.isDefined) {
+          nominal.numValues.map(n => Array.tabulate(n)(_.toString))
+        } else {
+          None
+        }
+      case binary: BinaryAttribute =>
+        if (binary.values.isDefined) {
+          binary.values
+        } else {
+          Some(Array.tabulate(2)(_.toString))
+        }
+      case _: NumericAttribute =>
+        throw new RuntimeException(
+          s"The input column ${inputCol.name} cannot be continuous-value.")
+      case _ =>
+        None // optimistic about unknown attributes
+    }
+  }
+
+  /** Creates an `AttributeGroup` filled by the `BinaryAttribute` named as required. */
+  private def genOutputAttrGroup(
+      outputAttrNames: Option[Array[String]],
+      outputColName: String): AttributeGroup = {
+    outputAttrNames.map { attrNames =>
+      val attrs: Array[Attribute] = attrNames.map { name =>
+        BinaryAttribute.defaultAttr.withName(name)
+      }
+      new AttributeGroup(outputColName, attrs)
+    }.getOrElse{
+      new AttributeGroup(outputColName)
+    }
+  }
+
+  /**
+   * Prepares the `StructField` with proper metadata for `OneHotEncoder`'s output column.
+   */
+  def transformOutputColumnSchema(
+      inputCol: StructField,
+      outputColName: String,
+      dropLast: Boolean,
+      keepInvalid: Boolean = false): StructField = {
+    val outputAttrNames = genOutputAttrNames(inputCol)
+    val filteredOutputAttrNames = outputAttrNames.map { names =>
+      if (dropLast && !keepInvalid) {
+        require(names.length > 1,
+          s"The input column ${inputCol.name} should have at least two distinct values.")
+        names.dropRight(1)
+      } else if (!dropLast && keepInvalid) {
+        names ++ Seq("invalidValues")
+      } else {
+        names
+      }
+    }
+
+    genOutputAttrGroup(filteredOutputAttrNames, outputColName).toStructField()
+  }
+
+  /**
+   * This method is called when we want to generate `AttributeGroup` from actual data for
+   * one-hot encoder.
+   */
+  def getOutputAttrGroupFromData(
+      dataset: Dataset[_],
+      inputColNames: Seq[String],
+      outputColNames: Seq[String],
+      dropLast: Boolean): Seq[AttributeGroup] = {
+    // The RDD approach has advantage of early-stop if any values are invalid. It seems that
+    // DataFrame ops don't have equivalent functions.
+    val columns = inputColNames.map { inputColName =>
+      col(inputColName).cast(DoubleType)
+    }
+    val numOfColumns = columns.length
+
+    val numAttrsArray = dataset.select(columns: _*).rdd.map { row =>
+      (0 until numOfColumns).map(idx => row.getDouble(idx)).toArray
+    }.treeAggregate(new Array[Double](numOfColumns))(
+      (maxValues, curValues) => {
+        (0 until numOfColumns).foreach { idx =>
+          val x = curValues(idx)
+          assert(x <= Int.MaxValue,
+            s"OneHotEncoder only supports up to ${Int.MaxValue} indices, but got $x.")
+          assert(x >= 0.0 && x == x.toInt,
+            s"Values from column ${inputColNames(idx)} must be indices, but got $x.")
+          maxValues(idx) = math.max(maxValues(idx), x)
+        }
+        maxValues
+      },
+      (m0, m1) => {
+        (0 until numOfColumns).foreach { idx =>
+          m0(idx) = math.max(m0(idx), m1(idx))
+        }
+        m0
+      }
+    ).map(_.toInt + 1)
+
+    outputColNames.zip(numAttrsArray).map { case (outputColName, numAttrs) =>
+      createAttrGroupForAttrNames(outputColName, numAttrs, dropLast, keepInvalid = false)
+    }
+  }
+
+  /** Creates an `AttributeGroup` with the required number of `BinaryAttribute`. */
+  def createAttrGroupForAttrNames(
+      outputColName: String,
+      numAttrs: Int,
+      dropLast: Boolean,
+      keepInvalid: Boolean): AttributeGroup = {
+    val outputAttrNames = Array.tabulate(numAttrs)(_.toString)
+    val filtered = if (dropLast && !keepInvalid) {
+      outputAttrNames.dropRight(1)
+    } else if (!dropLast && keepInvalid) {
+      outputAttrNames ++ Seq("invalidValues")
+    } else {
+      outputAttrNames
+    }
+    genOutputAttrGroup(Some(filtered), outputColName)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index 95e8830283dee..1ec5f8cb6139b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -22,7 +22,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCol, HasOutputCol}
+import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCol, HasInputCols, HasOutputCol, HasOutputCols}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.types.StructType
@@ -50,10 +50,28 @@ private[feature] trait QuantileDiscretizerBase extends Params
   /** @group getParam */
   def getNumBuckets: Int = getOrDefault(numBuckets)
 
+  /**
+   * Array of number of buckets (quantiles, or categories) into which data points are grouped.
+   * Each value must be greater than or equal to 2
+   *
+   * See also [[handleInvalid]], which can optionally create an additional bucket for NaN values.
+   *
+   * @group param
+   */
+  val numBucketsArray = new IntArrayParam(this, "numBucketsArray", "Array of number of buckets " +
+    "(quantiles, or categories) into which data points are grouped. This is for multiple " +
+    "columns input. If transforming multiple columns and numBucketsArray is not set, but " +
+    "numBuckets is set, then numBuckets will be applied across all columns.",
+    (arrayOfNumBuckets: Array[Int]) => arrayOfNumBuckets.forall(ParamValidators.gtEq(2)))
+
+  /** @group getParam */
+  def getNumBucketsArray: Array[Int] = $(numBucketsArray)
+
   /**
    * Relative error (see documentation for
    * `org.apache.spark.sql.DataFrameStatFunctions.approxQuantile` for description)
    * Must be in the range [0, 1].
+   * Note that in multiple columns case, relative error is applied to all columns.
    * default: 0.001
    * @group param
    */
@@ -68,7 +86,9 @@ private[feature] trait QuantileDiscretizerBase extends Params
   /**
    * Param for how to handle invalid entries. Options are 'skip' (filter out rows with
    * invalid values), 'error' (throw an error), or 'keep' (keep invalid values in a special
-   * additional bucket).
+   * additional bucket). Note that in the multiple columns case, the invalid handling is applied
+   * to all columns. That said for 'error' it will throw an error if any invalids are found in
+   * any column, for 'skip' it will skip rows with any invalids in any columns, etc.
    * Default: "error"
    * @group param
    */
@@ -86,6 +106,11 @@ private[feature] trait QuantileDiscretizerBase extends Params
  * categorical features. The number of bins can be set using the `numBuckets` parameter. It is
  * possible that the number of buckets used will be smaller than this value, for example, if there
  * are too few distinct values of the input to create enough distinct quantiles.
+ * Since 2.3.0, `QuantileDiscretizer` can map multiple columns at once by setting the `inputCols`
+ * parameter. If both of the `inputCol` and `inputCols` parameters are set, an Exception will be
+ * thrown. To specify the number of buckets for each column, the `numBucketsArray` parameter can
+ * be set, or if the number of buckets should be the same across columns, `numBuckets` can be
+ * set as a convenience.
  *
  * NaN handling:
  * null and NaN values will be ignored from the column during `QuantileDiscretizer` fitting. This
@@ -104,7 +129,8 @@ private[feature] trait QuantileDiscretizerBase extends Params
  */
 @Since("1.6.0")
 final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val uid: String)
-  extends Estimator[Bucketizer] with QuantileDiscretizerBase with DefaultParamsWritable {
+  extends Estimator[Bucketizer] with QuantileDiscretizerBase with DefaultParamsWritable
+    with HasInputCols with HasOutputCols {
 
   @Since("1.6.0")
   def this() = this(Identifiable.randomUID("quantileDiscretizer"))
@@ -129,34 +155,96 @@ final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val ui
   @Since("2.1.0")
   def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
 
+  /** @group setParam */
+  @Since("2.3.0")
+  def setNumBucketsArray(value: Array[Int]): this.type = set(numBucketsArray, value)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setInputCols(value: Array[String]): this.type = set(inputCols, value)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setOutputCols(value: Array[String]): this.type = set(outputCols, value)
+
+  private[feature] def getInOutCols: (Array[String], Array[String]) = {
+    require((isSet(inputCol) && isSet(outputCol) && !isSet(inputCols) && !isSet(outputCols)) ||
+      (!isSet(inputCol) && !isSet(outputCol) && isSet(inputCols) && isSet(outputCols)),
+      "QuantileDiscretizer only supports setting either inputCol/outputCol or" +
+        "inputCols/outputCols."
+    )
+
+    if (isSet(inputCol)) {
+      (Array($(inputCol)), Array($(outputCol)))
+    } else {
+      require($(inputCols).length == $(outputCols).length,
+        "inputCols number do not match outputCols")
+      ($(inputCols), $(outputCols))
+    }
+  }
+
   @Since("1.6.0")
   override def transformSchema(schema: StructType): StructType = {
-    SchemaUtils.checkNumericType(schema, $(inputCol))
-    val inputFields = schema.fields
-    require(inputFields.forall(_.name != $(outputCol)),
-      s"Output column ${$(outputCol)} already exists.")
-    val attr = NominalAttribute.defaultAttr.withName($(outputCol))
-    val outputFields = inputFields :+ attr.toStructField()
+    val (inputColNames, outputColNames) = getInOutCols
+    val existingFields = schema.fields
+    var outputFields = existingFields
+    inputColNames.zip(outputColNames).foreach { case (inputColName, outputColName) =>
+      SchemaUtils.checkNumericType(schema, inputColName)
+      require(existingFields.forall(_.name != outputColName),
+        s"Output column ${outputColName} already exists.")
+      val attr = NominalAttribute.defaultAttr.withName(outputColName)
+      outputFields :+= attr.toStructField()
+    }
     StructType(outputFields)
   }
 
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): Bucketizer = {
     transformSchema(dataset.schema, logging = true)
-    val splits = dataset.stat.approxQuantile($(inputCol),
-      (0.0 to 1.0 by 1.0/$(numBuckets)).toArray, $(relativeError))
+    val bucketizer = new Bucketizer(uid).setHandleInvalid($(handleInvalid))
+    if (isSet(inputCols)) {
+      val splitsArray = if (isSet(numBucketsArray)) {
+        val probArrayPerCol = $(numBucketsArray).map { numOfBuckets =>
+          (0.0 to 1.0 by 1.0 / numOfBuckets).toArray
+        }
+
+        val probabilityArray = probArrayPerCol.flatten.sorted.distinct
+        val splitsArrayRaw = dataset.stat.approxQuantile($(inputCols),
+          probabilityArray, $(relativeError))
+
+        splitsArrayRaw.zip(probArrayPerCol).map { case (splits, probs) =>
+          val probSet = probs.toSet
+          val idxSet = probabilityArray.zipWithIndex.collect {
+            case (p, idx) if probSet(p) =>
+              idx
+          }.toSet
+          splits.zipWithIndex.collect {
+            case (s, idx) if idxSet(idx) =>
+              s
+          }
+        }
+      } else {
+        dataset.stat.approxQuantile($(inputCols),
+          (0.0 to 1.0 by 1.0 / $(numBuckets)).toArray, $(relativeError))
+      }
+      bucketizer.setSplitsArray(splitsArray.map(getDistinctSplits))
+    } else {
+      val splits = dataset.stat.approxQuantile($(inputCol),
+        (0.0 to 1.0 by 1.0 / $(numBuckets)).toArray, $(relativeError))
+      bucketizer.setSplits(getDistinctSplits(splits))
+    }
+    copyValues(bucketizer.setParent(this))
+  }
+
+  private def getDistinctSplits(splits: Array[Double]): Array[Double] = {
     splits(0) = Double.NegativeInfinity
     splits(splits.length - 1) = Double.PositiveInfinity
-
     val distinctSplits = splits.distinct
     if (splits.length != distinctSplits.length) {
       log.warn(s"Some quantiles were identical. Bucketing to ${distinctSplits.length - 1}" +
         s" buckets as a result.")
     }
-    val bucketizer = new Bucketizer(uid)
-      .setSplits(distinctSplits.sorted)
-      .setHandleInvalid($(handleInvalid))
-    copyValues(bucketizer.setParent(this))
+    distinctSplits.sorted
   }
 
   @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index 7da3339f8b487..f384ffbf578bc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -25,7 +25,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.ml.{Estimator, Model, Pipeline, PipelineModel, PipelineStage, Transformer}
 import org.apache.spark.ml.attribute.AttributeGroup
-import org.apache.spark.ml.linalg.VectorUDT
+import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param.{BooleanParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasHandleInvalid, HasLabelCol}
 import org.apache.spark.ml.util._
@@ -210,8 +210,8 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
 
     // First we index each string column referenced by the input terms.
     val indexed: Map[String, String] = resolvedFormula.terms.flatten.distinct.map { term =>
-      dataset.schema(term) match {
-        case column if column.dataType == StringType =>
+      dataset.schema(term).dataType match {
+        case _: StringType =>
           val indexCol = tmpColumn("stridx")
           encoderStages += new StringIndexer()
             .setInputCol(term)
@@ -220,6 +220,18 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
             .setHandleInvalid($(handleInvalid))
           prefixesToRewrite(indexCol + "_") = term + "_"
           (term, indexCol)
+        case _: VectorUDT =>
+          val group = AttributeGroup.fromStructField(dataset.schema(term))
+          val size = if (group.size < 0) {
+            dataset.select(term).first().getAs[Vector](0).size
+          } else {
+            group.size
+          }
+          encoderStages += new VectorSizeHint(uid)
+            .setHandleInvalid("optimistic")
+            .setInputCol(term)
+            .setSize(size)
+          (term, term)
         case _ =>
           (term, term)
       }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
new file mode 100644
index 0000000000000..f5947d61fe349
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.SparkException
+import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.ml.Transformer
+import org.apache.spark.ml.attribute.AttributeGroup
+import org.apache.spark.ml.linalg.{Vector, VectorUDT}
+import org.apache.spark.ml.param.{IntParam, Param, ParamMap, ParamValidators}
+import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCol}
+import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable}
+import org.apache.spark.sql.{Column, DataFrame, Dataset}
+import org.apache.spark.sql.functions.{col, udf}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * :: Experimental ::
+ * A feature transformer that adds size information to the metadata of a vector column.
+ * VectorAssembler needs size information for its input columns and cannot be used on streaming
+ * dataframes without this metadata.
+ *
+ * Note: VectorSizeHint modifies `inputCol` to include size metadata and does not have an outputCol.
+ */
+@Experimental
+@Since("2.3.0")
+class VectorSizeHint @Since("2.3.0") (@Since("2.3.0") override val uid: String)
+  extends Transformer with HasInputCol with HasHandleInvalid with DefaultParamsWritable {
+
+  @Since("2.3.0")
+  def this() = this(Identifiable.randomUID("vectSizeHint"))
+
+  /**
+   * The size of Vectors in `inputCol`.
+   * @group param
+   */
+  @Since("2.3.0")
+  val size: IntParam = new IntParam(
+    this,
+    "size",
+    "Size of vectors in column.",
+    {s: Int => s >= 0})
+
+  /** group getParam */
+  @Since("2.3.0")
+  def getSize: Int = getOrDefault(size)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setSize(value: Int): this.type = set(size, value)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  /**
+   * Param for how to handle invalid entries. Invalid vectors include nulls and vectors with the
+   * wrong size. The options are `skip` (filter out rows with invalid vectors), `error` (throw an
+   * error) and `optimistic` (do not check the vector size, and keep all rows). `error` by default.
+   *
+   * Note: Users should take care when setting this param to `optimistic`. The use of the
+   * `optimistic` option will prevent the transformer from validating the sizes of vectors in
+   * `inputCol`. A mismatch between the metadata of a column and its contents could result in
+   * unexpected behaviour or errors when using that column.
+   *
+   * @group param
+   */
+  @Since("2.3.0")
+  override val handleInvalid: Param[String] = new Param[String](
+    this,
+    "handleInvalid",
+    "How to handle invalid vectors in inputCol. Invalid vectors include nulls and vectors with " +
+      "the wrong size. The options are `skip` (filter out rows with invalid vectors), `error` " +
+      "(throw an error) and `optimistic` (do not check the vector size, and keep all rows). " +
+      "`error` by default.",
+    ParamValidators.inArray(VectorSizeHint.supportedHandleInvalids))
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
+  setDefault(handleInvalid, VectorSizeHint.ERROR_INVALID)
+
+  @Since("2.3.0")
+  override def transform(dataset: Dataset[_]): DataFrame = {
+    val localInputCol = getInputCol
+    val localSize = getSize
+    val localHandleInvalid = getHandleInvalid
+
+    val group = AttributeGroup.fromStructField(dataset.schema(localInputCol))
+    val newGroup = validateSchemaAndSize(dataset.schema, group)
+    if (localHandleInvalid == VectorSizeHint.OPTIMISTIC_INVALID && group.size == localSize) {
+      dataset.toDF()
+    } else {
+      val newCol: Column = localHandleInvalid match {
+        case VectorSizeHint.OPTIMISTIC_INVALID => col(localInputCol)
+        case VectorSizeHint.ERROR_INVALID =>
+          val checkVectorSizeUDF = udf { vector: Vector =>
+            if (vector == null) {
+              throw new SparkException(s"Got null vector in VectorSizeHint, set `handleInvalid` " +
+                s"to 'skip' to filter invalid rows.")
+            }
+            if (vector.size != localSize) {
+              throw new SparkException(s"VectorSizeHint Expecting a vector of size $localSize but" +
+                s" got ${vector.size}")
+            }
+            vector
+          }.asNondeterministic()
+          checkVectorSizeUDF(col(localInputCol))
+        case VectorSizeHint.SKIP_INVALID =>
+          val checkVectorSizeUDF = udf { vector: Vector =>
+            if (vector != null && vector.size == localSize) {
+              vector
+            } else {
+              null
+            }
+          }
+          checkVectorSizeUDF(col(localInputCol))
+      }
+
+      val res = dataset.withColumn(localInputCol, newCol.as(localInputCol, newGroup.toMetadata()))
+      if (localHandleInvalid == VectorSizeHint.SKIP_INVALID) {
+        res.na.drop(Array(localInputCol))
+      } else {
+        res
+      }
+    }
+  }
+
+  /**
+   * Checks that schema can be updated with new size and returns a new attribute group with
+   * updated size.
+   */
+  private def validateSchemaAndSize(schema: StructType, group: AttributeGroup): AttributeGroup = {
+    // This will throw a NoSuchElementException if params are not set.
+    val localSize = getSize
+    val localInputCol = getInputCol
+
+    val inputColType = schema(getInputCol).dataType
+    require(
+      inputColType.isInstanceOf[VectorUDT],
+      s"Input column, $getInputCol must be of Vector type, got $inputColType"
+    )
+    group.size match {
+      case `localSize` => group
+      case -1 => new AttributeGroup(localInputCol, localSize)
+      case _ =>
+        val msg = s"Trying to set size of vectors in `$localInputCol` to $localSize but size " +
+          s"already set to ${group.size}."
+        throw new IllegalArgumentException(msg)
+    }
+  }
+
+  @Since("2.3.0")
+  override def transformSchema(schema: StructType): StructType = {
+    val fieldIndex = schema.fieldIndex(getInputCol)
+    val fields = schema.fields.clone()
+    val inputField = fields(fieldIndex)
+    val group = AttributeGroup.fromStructField(inputField)
+    val newGroup = validateSchemaAndSize(schema, group)
+    fields(fieldIndex) = inputField.copy(metadata = newGroup.toMetadata())
+    StructType(fields)
+  }
+
+  @Since("2.3.0")
+  override def copy(extra: ParamMap): this.type = defaultCopy(extra)
+}
+
+/** :: Experimental :: */
+@Experimental
+@Since("2.3.0")
+object VectorSizeHint extends DefaultParamsReadable[VectorSizeHint] {
+
+  private[feature] val OPTIMISTIC_INVALID = "optimistic"
+  private[feature] val ERROR_INVALID = "error"
+  private[feature] val SKIP_INVALID = "skip"
+  private[feature] val supportedHandleInvalids: Array[String] =
+    Array(OPTIMISTIC_INVALID, ERROR_INVALID, SKIP_INVALID)
+
+  @Since("2.3.0")
+  override def load(path: String): VectorSizeHint = super.load(path)
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala b/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala
new file mode 100644
index 0000000000000..0bee643412b3f
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.ml.linalg
+
+import org.json4s.DefaultFormats
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render}
+
+private[ml] object JsonMatrixConverter {
+
+  /** Unique class name for identifying JSON object encoded by this class. */
+  val className = "matrix"
+
+  /**
+   * Parses the JSON representation of a Matrix into a [[Matrix]].
+   */
+  def fromJson(json: String): Matrix = {
+    implicit val formats = DefaultFormats
+    val jValue = parseJson(json)
+    (jValue \ "type").extract[Int] match {
+      case 0 => // sparse
+        val numRows = (jValue \ "numRows").extract[Int]
+        val numCols = (jValue \ "numCols").extract[Int]
+        val colPtrs = (jValue \ "colPtrs").extract[Seq[Int]].toArray
+        val rowIndices = (jValue \ "rowIndices").extract[Seq[Int]].toArray
+        val values = (jValue \ "values").extract[Seq[Double]].toArray
+        val isTransposed = (jValue \ "isTransposed").extract[Boolean]
+        new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values, isTransposed)
+      case 1 => // dense
+        val numRows = (jValue \ "numRows").extract[Int]
+        val numCols = (jValue \ "numCols").extract[Int]
+        val values = (jValue \ "values").extract[Seq[Double]].toArray
+        val isTransposed = (jValue \ "isTransposed").extract[Boolean]
+        new DenseMatrix(numRows, numCols, values, isTransposed)
+      case _ =>
+        throw new IllegalArgumentException(s"Cannot parse $json into a Matrix.")
+    }
+  }
+
+  /**
+   * Coverts the Matrix to a JSON string.
+   */
+  def toJson(m: Matrix): String = {
+    m match {
+      case SparseMatrix(numRows, numCols, colPtrs, rowIndices, values, isTransposed) =>
+        val jValue = ("class" -> className) ~
+          ("type" -> 0) ~
+          ("numRows" -> numRows) ~
+          ("numCols" -> numCols) ~
+          ("colPtrs" -> colPtrs.toSeq) ~
+          ("rowIndices" -> rowIndices.toSeq) ~
+          ("values" -> values.toSeq) ~
+          ("isTransposed" -> isTransposed)
+        compact(render(jValue))
+      case DenseMatrix(numRows, numCols, values, isTransposed) =>
+        val jValue = ("class" -> className) ~
+          ("type" -> 1) ~
+          ("numRows" -> numRows) ~
+          ("numCols" -> numCols) ~
+          ("values" -> values.toSeq) ~
+          ("isTransposed" -> isTransposed)
+        compact(render(jValue))
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
new file mode 100644
index 0000000000000..13f64d2d50424
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.ml.optim.aggregator
+
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.ml.feature.Instance
+import org.apache.spark.ml.linalg.Vector
+
+/**
+ * HuberAggregator computes the gradient and loss for a huber loss function,
+ * as used in robust regression for samples in sparse or dense vector in an online fashion.
+ *
+ * The huber loss function based on:
+ * <a href="http://statweb.stanford.edu/~owen/reports/hhu.pdf">Art B. Owen (2006),
+ * A robust hybrid of lasso and ridge regression</a>.
+ *
+ * Two HuberAggregator can be merged together to have a summary of loss and gradient of
+ * the corresponding joint dataset.
+ *
+ * The huber loss function is given by
+ *
+ * <blockquote>
+ *   $$
+ *   \begin{align}
+ *   \min_{w, \sigma}\frac{1}{2n}{\sum_{i=1}^n\left(\sigma +
+ *   H_m\left(\frac{X_{i}w - y_{i}}{\sigma}\right)\sigma\right) + \frac{1}{2}\lambda {||w||_2}^2}
+ *   \end{align}
+ *   $$
+ * </blockquote>
+ *
+ * where
+ *
+ * <blockquote>
+ *   $$
+ *   \begin{align}
+ *   H_m(z) = \begin{cases}
+ *            z^2, & \text {if } |z| &lt; \epsilon, \\
+ *            2\epsilon|z| - \epsilon^2, & \text{otherwise}
+ *            \end{cases}
+ *   \end{align}
+ *   $$
+ * </blockquote>
+ *
+ * It is advised to set the parameter $\epsilon$ to 1.35 to achieve 95% statistical efficiency
+ * for normally distributed data. Please refer to chapter 2 of
+ * <a href="http://statweb.stanford.edu/~owen/reports/hhu.pdf">
+ * A robust hybrid of lasso and ridge regression</a> for more detail.
+ *
+ * @param fitIntercept Whether to fit an intercept term.
+ * @param epsilon The shape parameter to control the amount of robustness.
+ * @param bcFeaturesStd The broadcast standard deviation values of the features.
+ * @param bcParameters including three parts: the regression coefficients corresponding
+ *                     to the features, the intercept (if fitIntercept is ture)
+ *                     and the scale parameter (sigma).
+ */
+private[ml] class HuberAggregator(
+    fitIntercept: Boolean,
+    epsilon: Double,
+    bcFeaturesStd: Broadcast[Array[Double]])(bcParameters: Broadcast[Vector])
+  extends DifferentiableLossAggregator[Instance, HuberAggregator] {
+
+  protected override val dim: Int = bcParameters.value.size
+  private val numFeatures: Int = if (fitIntercept) dim - 2 else dim - 1
+  private val sigma: Double = bcParameters.value(dim - 1)
+  private val intercept: Double = if (fitIntercept) {
+    bcParameters.value(dim - 2)
+  } else {
+    0.0
+  }
+
+  /**
+   * Add a new training instance to this HuberAggregator, and update the loss and gradient
+   * of the objective function.
+   *
+   * @param instance The instance of data point to be added.
+   * @return This HuberAggregator object.
+   */
+  def add(instance: Instance): HuberAggregator = {
+    instance match { case Instance(label, weight, features) =>
+      require(numFeatures == features.size, s"Dimensions mismatch when adding new sample." +
+        s" Expecting $numFeatures but got ${features.size}.")
+      require(weight >= 0.0, s"instance weight, $weight has to be >= 0.0")
+
+      if (weight == 0.0) return this
+      val localFeaturesStd = bcFeaturesStd.value
+      val localCoefficients = bcParameters.value.toArray.slice(0, numFeatures)
+      val localGradientSumArray = gradientSumArray
+
+      val margin = {
+        var sum = 0.0
+        features.foreachActive { (index, value) =>
+          if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+            sum += localCoefficients(index) * (value / localFeaturesStd(index))
+          }
+        }
+        if (fitIntercept) sum += intercept
+        sum
+      }
+      val linearLoss = label - margin
+
+      if (math.abs(linearLoss) <= sigma * epsilon) {
+        lossSum += 0.5 * weight * (sigma + math.pow(linearLoss, 2.0) / sigma)
+        val linearLossDivSigma = linearLoss / sigma
+
+        features.foreachActive { (index, value) =>
+          if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+            localGradientSumArray(index) +=
+              -1.0 * weight * linearLossDivSigma * (value / localFeaturesStd(index))
+          }
+        }
+        if (fitIntercept) {
+          localGradientSumArray(dim - 2) += -1.0 * weight * linearLossDivSigma
+        }
+        localGradientSumArray(dim - 1) += 0.5 * weight * (1.0 - math.pow(linearLossDivSigma, 2.0))
+      } else {
+        val sign = if (linearLoss >= 0) -1.0 else 1.0
+        lossSum += 0.5 * weight *
+          (sigma + 2.0 * epsilon * math.abs(linearLoss) - sigma * epsilon * epsilon)
+
+        features.foreachActive { (index, value) =>
+          if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+            localGradientSumArray(index) +=
+              weight * sign * epsilon * (value / localFeaturesStd(index))
+          }
+        }
+        if (fitIntercept) {
+          localGradientSumArray(dim - 2) += weight * sign * epsilon
+        }
+        localGradientSumArray(dim - 1) += 0.5 * weight * (1.0 - epsilon * epsilon)
+      }
+
+      weightSum += weight
+      this
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 8985f2af90a9a..1b4b401ac4aa0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -28,9 +28,9 @@ import scala.collection.mutable
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
+import org.apache.spark.SparkException
 import org.apache.spark.annotation.{DeveloperApi, Since}
-import org.apache.spark.ml.linalg.JsonVectorConverter
-import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.linalg.{JsonMatrixConverter, JsonVectorConverter, Matrix, Vector}
 import org.apache.spark.ml.util.Identifiable
 
 /**
@@ -94,9 +94,11 @@ class Param[T](val parent: String, val name: String, val doc: String, val isVali
         compact(render(JString(x)))
       case v: Vector =>
         JsonVectorConverter.toJson(v)
+      case m: Matrix =>
+        JsonMatrixConverter.toJson(m)
       case _ =>
         throw new NotImplementedError(
-          "The default jsonEncode only supports string and vector. " +
+          "The default jsonEncode only supports string, vector and matrix. " +
             s"${this.getClass.getName} must override jsonEncode for ${value.getClass.getName}.")
     }
   }
@@ -122,17 +124,35 @@ private[ml] object Param {
 
   /** Decodes a param value from JSON. */
   def jsonDecode[T](json: String): T = {
-    parse(json) match {
+    val jValue = parse(json)
+    jValue match {
       case JString(x) =>
         x.asInstanceOf[T]
       case JObject(v) =>
         val keys = v.map(_._1)
-        assert(keys.contains("type") && keys.contains("values"),
-          s"Expect a JSON serialized vector but cannot find fields 'type' and 'values' in $json.")
-        JsonVectorConverter.fromJson(json).asInstanceOf[T]
+        if (keys.contains("class")) {
+          implicit val formats = DefaultFormats
+          val className = (jValue \ "class").extract[String]
+          className match {
+            case JsonMatrixConverter.className =>
+              val checkFields = Array("numRows", "numCols", "values", "isTransposed", "type")
+              require(checkFields.forall(keys.contains), s"Expect a JSON serialized Matrix" +
+                s" but cannot find fields ${checkFields.mkString(", ")} in $json.")
+              JsonMatrixConverter.fromJson(json).asInstanceOf[T]
+
+            case s => throw new SparkException(s"unrecognized class $s in $json")
+          }
+        } else {
+          // "class" info in JSON was added in Spark 2.3(SPARK-22289). JSON support for Vector was
+          // implemented before that and does not have "class" attribute.
+          require(keys.contains("type") && keys.contains("values"), s"Expect a JSON serialized" +
+            s" vector/matrix but cannot find fields 'type' and 'values' in $json.")
+          JsonVectorConverter.fromJson(json).asInstanceOf[T]
+        }
+
       case _ =>
         throw new NotImplementedError(
-          "The default jsonDecode only supports string and vector. " +
+          "The default jsonDecode only supports string, vector and matrix. " +
             s"${this.getClass.getName} must override jsonDecode to support its value type.")
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index c54062921fce6..6ad44af9ef7eb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -63,7 +63,9 @@ private[shared] object SharedParamsCodeGen {
       ParamDesc[Array[String]]("outputCols", "output column names"),
       ParamDesc[Int]("checkpointInterval", "set checkpoint interval (>= 1) or " +
         "disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed " +
-        "every 10 iterations", isValid = "(interval: Int) => interval == -1 || interval >= 1"),
+        "every 10 iterations. Note: this setting will be ignored if the checkpoint directory " +
+        "is not set in the SparkContext",
+        isValid = "(interval: Int) => interval == -1 || interval >= 1"),
       ParamDesc[Boolean]("fitIntercept", "whether to fit an intercept term", Some("true")),
       ParamDesc[String]("handleInvalid", "how to handle invalid entries. Options are skip (which " +
         "will filter out rows with bad values), or error (which will throw an error). More " +
@@ -84,11 +86,12 @@ private[shared] object SharedParamsCodeGen {
       ParamDesc[String]("solver", "the solver algorithm for optimization", finalFields = false),
       ParamDesc[Int]("aggregationDepth", "suggested depth for treeAggregate (>= 2)", Some("2"),
         isValid = "ParamValidators.gtEq(2)", isExpertParam = true),
-      ParamDesc[Boolean]("collectSubModels", "If set to false, then only the single best " +
-        "sub-model will be available after fitting. If set to true, then all sub-models will be " +
-        "available. Warning: For large models, collecting all sub-models can cause OOMs on the " +
-        "Spark driver.",
-        Some("false"), isExpertParam = true)
+      ParamDesc[Boolean]("collectSubModels", "whether to collect a list of sub-models trained " +
+        "during tuning. If set to false, then only the single best sub-model will be available " +
+        "after fitting. If set to true, then all sub-models will be available. Warning: For " +
+        "large models, collecting all sub-models can cause OOMs on the Spark driver",
+        Some("false"), isExpertParam = true),
+      ParamDesc[String]("loss", "the loss function to be optimized", finalFields = false)
     )
 
     val code = genSharedParams(params)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 34aa38ac751fd..be8b2f273164b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -282,10 +282,10 @@ trait HasOutputCols extends Params {
 trait HasCheckpointInterval extends Params {
 
   /**
-   * Param for set checkpoint interval (&gt;= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations.
+   * Param for set checkpoint interval (&gt;= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext.
    * @group param
    */
-  final val checkpointInterval: IntParam = new IntParam(this, "checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations", (interval: Int) => interval == -1 || interval >= 1)
+  final val checkpointInterval: IntParam = new IntParam(this, "checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext", (interval: Int) => interval == -1 || interval >= 1)
 
   /** @group getParam */
   final def getCheckpointInterval: Int = $(checkpointInterval)
@@ -470,19 +470,38 @@ trait HasAggregationDepth extends Params {
 }
 
 /**
- * Trait for shared param collectSubModels (default: false).
+ * Trait for shared param collectSubModels (default: false). This trait may be changed or
+ * removed between minor versions.
  */
-private[ml] trait HasCollectSubModels extends Params {
+@DeveloperApi
+trait HasCollectSubModels extends Params {
 
   /**
-   * Param for whether to collect a list of sub-models trained during tuning.
+   * Param for whether to collect a list of sub-models trained during tuning. If set to false, then only the single best sub-model will be available after fitting. If set to true, then all sub-models will be available. Warning: For large models, collecting all sub-models can cause OOMs on the Spark driver.
    * @group expertParam
    */
-  final val collectSubModels: BooleanParam = new BooleanParam(this, "collectSubModels", "whether to collect a list of sub-models trained during tuning")
+  final val collectSubModels: BooleanParam = new BooleanParam(this, "collectSubModels", "whether to collect a list of sub-models trained during tuning. If set to false, then only the single best sub-model will be available after fitting. If set to true, then all sub-models will be available. Warning: For large models, collecting all sub-models can cause OOMs on the Spark driver")
 
   setDefault(collectSubModels, false)
 
   /** @group expertGetParam */
   final def getCollectSubModels: Boolean = $(collectSubModels)
 }
+
+/**
+ * Trait for shared param loss. This trait may be changed or
+ * removed between minor versions.
+ */
+@DeveloperApi
+trait HasLoss extends Params {
+
+  /**
+   * Param for the loss function to be optimized.
+   * @group param
+   */
+  val loss: Param[String] = new Param[String](this, "loss", "the loss function to be optimized")
+
+  /** @group getParam */
+  final def getLoss: String = $(loss)
+}
 // scalastyle:on
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index da6bcf07e4742..a5873d03b4161 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.regression
 import scala.collection.mutable
 
 import breeze.linalg.{DenseVector => BDV}
-import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}
+import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN}
 import breeze.stats.distributions.StudentsT
 import org.apache.hadoop.fs.Path
 
@@ -32,9 +32,9 @@ import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.linalg.BLAS._
 import org.apache.spark.ml.optim.WeightedLeastSquares
-import org.apache.spark.ml.optim.aggregator.LeastSquaresAggregator
+import org.apache.spark.ml.optim.aggregator.{HuberAggregator, LeastSquaresAggregator}
 import org.apache.spark.ml.optim.loss.{L2Regularization, RDDLossFunction}
-import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators}
+import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
 import org.apache.spark.mllib.evaluation.RegressionMetrics
@@ -44,8 +44,9 @@ import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.DoubleType
+import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.VersionUtils.majorMinorVersion
 
 /**
  * Params for linear regression.
@@ -53,7 +54,7 @@ import org.apache.spark.storage.StorageLevel
 private[regression] trait LinearRegressionParams extends PredictorParams
     with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol
     with HasFitIntercept with HasStandardization with HasWeightCol with HasSolver
-    with HasAggregationDepth {
+    with HasAggregationDepth with HasLoss {
 
   import LinearRegression._
 
@@ -69,25 +70,105 @@ private[regression] trait LinearRegressionParams extends PredictorParams
     "The solver algorithm for optimization. Supported options: " +
       s"${supportedSolvers.mkString(", ")}. (Default auto)",
     ParamValidators.inArray[String](supportedSolvers))
+
+  /**
+   * The loss function to be optimized.
+   * Supported options: "squaredError" and "huber".
+   * Default: "squaredError"
+   *
+   * @group param
+   */
+  @Since("2.3.0")
+  final override val loss: Param[String] = new Param[String](this, "loss", "The loss function to" +
+    s" be optimized. Supported options: ${supportedLosses.mkString(", ")}. (Default squaredError)",
+    ParamValidators.inArray[String](supportedLosses))
+
+  /**
+   * The shape parameter to control the amount of robustness. Must be &gt; 1.0.
+   * At larger values of epsilon, the huber criterion becomes more similar to least squares
+   * regression; for small values of epsilon, the criterion is more similar to L1 regression.
+   * Default is 1.35 to get as much robustness as possible while retaining
+   * 95% statistical efficiency for normally distributed data. It matches sklearn
+   * HuberRegressor and is "M" from <a href="http://statweb.stanford.edu/~owen/reports/hhu.pdf">
+   * A robust hybrid of lasso and ridge regression</a>.
+   * Only valid when "loss" is "huber".
+   *
+   * @group expertParam
+   */
+  @Since("2.3.0")
+  final val epsilon = new DoubleParam(this, "epsilon", "The shape parameter to control the " +
+    "amount of robustness. Must be > 1.0.", ParamValidators.gt(1.0))
+
+  /** @group getExpertParam */
+  @Since("2.3.0")
+  def getEpsilon: Double = $(epsilon)
+
+  override protected def validateAndTransformSchema(
+      schema: StructType,
+      fitting: Boolean,
+      featuresDataType: DataType): StructType = {
+    if ($(loss) == Huber) {
+      require($(solver)!= Normal, "LinearRegression with huber loss doesn't support " +
+        "normal solver, please change solver to auto or l-bfgs.")
+      require($(elasticNetParam) == 0.0, "LinearRegression with huber loss only supports " +
+        s"L2 regularization, but got elasticNetParam = $getElasticNetParam.")
+
+    }
+    super.validateAndTransformSchema(schema, fitting, featuresDataType)
+  }
 }
 
 /**
  * Linear regression.
  *
- * The learning objective is to minimize the squared error, with regularization.
- * The specific squared error loss function used is:
- *
- * <blockquote>
- *    $$
- *    L = 1/2n ||A coefficients - y||^2^
- *    $$
- * </blockquote>
+ * The learning objective is to minimize the specified loss function, with regularization.
+ * This supports two kinds of loss:
+ *  - squaredError (a.k.a squared loss)
+ *  - huber (a hybrid of squared error for relatively small errors and absolute error for
+ *  relatively large ones, and we estimate the scale parameter from training data)
  *
  * This supports multiple types of regularization:
  *  - none (a.k.a. ordinary least squares)
  *  - L2 (ridge regression)
  *  - L1 (Lasso)
  *  - L2 + L1 (elastic net)
+ *
+ * The squared error objective function is:
+ *
+ * <blockquote>
+ *   $$
+ *   \begin{align}
+ *   \min_{w}\frac{1}{2n}{\sum_{i=1}^n(X_{i}w - y_{i})^{2} +
+ *   \lambda\left[\frac{1-\alpha}{2}{||w||_{2}}^{2} + \alpha{||w||_{1}}\right]}
+ *   \end{align}
+ *   $$
+ * </blockquote>
+ *
+ * The huber objective function is:
+ *
+ * <blockquote>
+ *   $$
+ *   \begin{align}
+ *   \min_{w, \sigma}\frac{1}{2n}{\sum_{i=1}^n\left(\sigma +
+ *   H_m\left(\frac{X_{i}w - y_{i}}{\sigma}\right)\sigma\right) + \frac{1}{2}\lambda {||w||_2}^2}
+ *   \end{align}
+ *   $$
+ * </blockquote>
+ *
+ * where
+ *
+ * <blockquote>
+ *   $$
+ *   \begin{align}
+ *   H_m(z) = \begin{cases}
+ *            z^2, & \text {if } |z| &lt; \epsilon, \\
+ *            2\epsilon|z| - \epsilon^2, & \text{otherwise}
+ *            \end{cases}
+ *   \end{align}
+ *   $$
+ * </blockquote>
+ *
+ * Note: Fitting with huber loss only supports none and L2 regularization.
  */
 @Since("1.3.0")
 class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String)
@@ -142,6 +223,9 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    * For alpha in (0,1), the penalty is a combination of L1 and L2.
    * Default is 0.0 which is an L2 penalty.
    *
+   * Note: Fitting with huber loss only supports None and L2 regularization,
+   * so throws exception if this param is non-zero value.
+   *
    * @group setParam
    */
   @Since("1.4.0")
@@ -190,6 +274,8 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    *    The Normal Equations solver will be used when possible, but this will automatically fall
    *    back to iterative optimization methods when needed.
    *
+   * Note: Fitting with huber loss doesn't support normal solver,
+   * so throws exception if this param was set with "normal".
    * @group setParam
    */
   @Since("1.6.0")
@@ -208,6 +294,26 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
   setDefault(aggregationDepth -> 2)
 
+  /**
+   * Sets the value of param [[loss]].
+   * Default is "squaredError".
+   *
+   * @group setParam
+   */
+  @Since("2.3.0")
+  def setLoss(value: String): this.type = set(loss, value)
+  setDefault(loss -> SquaredError)
+
+  /**
+   * Sets the value of param [[epsilon]].
+   * Default is 1.35.
+   *
+   * @group setExpertParam
+   */
+  @Since("2.3.0")
+  def setEpsilon(value: Double): this.type = set(epsilon, value)
+  setDefault(epsilon -> 1.35)
+
   override protected def train(dataset: Dataset[_]): LinearRegressionModel = {
     // Extract the number of features before deciding optimization solver.
     val numFeatures = dataset.select(col($(featuresCol))).first().getAs[Vector](0).size
@@ -220,12 +326,12 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     }
 
     val instr = Instrumentation.create(this, dataset)
-    instr.logParams(labelCol, featuresCol, weightCol, predictionCol, solver, tol,
-      elasticNetParam, fitIntercept, maxIter, regParam, standardization, aggregationDepth)
+    instr.logParams(labelCol, featuresCol, weightCol, predictionCol, solver, tol, elasticNetParam,
+      fitIntercept, maxIter, regParam, standardization, aggregationDepth, loss, epsilon)
     instr.logNumFeatures(numFeatures)
 
-    if (($(solver) == Auto &&
-      numFeatures <= WeightedLeastSquares.MAX_NUM_FEATURES) || $(solver) == Normal) {
+    if ($(loss) == SquaredError && (($(solver) == Auto &&
+      numFeatures <= WeightedLeastSquares.MAX_NUM_FEATURES) || $(solver) == Normal)) {
       // For low dimensional data, WeightedLeastSquares is more efficient since the
       // training algorithm only requires one pass through the data. (SPARK-10668)
 
@@ -330,12 +436,13 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
 
     // Since we implicitly do the feature scaling when we compute the cost function
     // to improve the convergence, the effective regParam will be changed.
-    val effectiveRegParam = $(regParam) / yStd
+    val effectiveRegParam = $(loss) match {
+      case SquaredError => $(regParam) / yStd
+      case Huber => $(regParam)
+    }
     val effectiveL1RegParam = $(elasticNetParam) * effectiveRegParam
     val effectiveL2RegParam = (1.0 - $(elasticNetParam)) * effectiveRegParam
 
-    val getAggregatorFunc = new LeastSquaresAggregator(yStd, yMean, $(fitIntercept),
-      bcFeaturesStd, bcFeaturesMean)(_)
     val getFeaturesStd = (j: Int) => if (j >= 0 && j < numFeatures) featuresStd(j) else 0.0
     val regularization = if (effectiveL2RegParam != 0.0) {
       val shouldApply = (idx: Int) => idx >= 0 && idx < numFeatures
@@ -344,33 +451,58 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     } else {
       None
     }
-    val costFun = new RDDLossFunction(instances, getAggregatorFunc, regularization,
-      $(aggregationDepth))
 
-    val optimizer = if ($(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) {
-      new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol))
-    } else {
-      val standardizationParam = $(standardization)
-      def effectiveL1RegFun = (index: Int) => {
-        if (standardizationParam) {
-          effectiveL1RegParam
+    val costFun = $(loss) match {
+      case SquaredError =>
+        val getAggregatorFunc = new LeastSquaresAggregator(yStd, yMean, $(fitIntercept),
+          bcFeaturesStd, bcFeaturesMean)(_)
+        new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth))
+      case Huber =>
+        val getAggregatorFunc = new HuberAggregator($(fitIntercept), $(epsilon), bcFeaturesStd)(_)
+        new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth))
+    }
+
+    val optimizer = $(loss) match {
+      case SquaredError =>
+        if ($(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) {
+          new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol))
         } else {
-          // If `standardization` is false, we still standardize the data
-          // to improve the rate of convergence; as a result, we have to
-          // perform this reverse standardization by penalizing each component
-          // differently to get effectively the same objective function when
-          // the training dataset is not standardized.
-          if (featuresStd(index) != 0.0) effectiveL1RegParam / featuresStd(index) else 0.0
+          val standardizationParam = $(standardization)
+          def effectiveL1RegFun = (index: Int) => {
+            if (standardizationParam) {
+              effectiveL1RegParam
+            } else {
+              // If `standardization` is false, we still standardize the data
+              // to improve the rate of convergence; as a result, we have to
+              // perform this reverse standardization by penalizing each component
+              // differently to get effectively the same objective function when
+              // the training dataset is not standardized.
+              if (featuresStd(index) != 0.0) effectiveL1RegParam / featuresStd(index) else 0.0
+            }
+          }
+          new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, effectiveL1RegFun, $(tol))
         }
-      }
-      new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, effectiveL1RegFun, $(tol))
+      case Huber =>
+        val dim = if ($(fitIntercept)) numFeatures + 2 else numFeatures + 1
+        val lowerBounds = BDV[Double](Array.fill(dim)(Double.MinValue))
+        // Optimize huber loss in space "\sigma > 0"
+        lowerBounds(dim - 1) = Double.MinPositiveValue
+        val upperBounds = BDV[Double](Array.fill(dim)(Double.MaxValue))
+        new BreezeLBFGSB(lowerBounds, upperBounds, $(maxIter), 10, $(tol))
+    }
+
+    val initialValues = $(loss) match {
+      case SquaredError =>
+        Vectors.zeros(numFeatures)
+      case Huber =>
+        val dim = if ($(fitIntercept)) numFeatures + 2 else numFeatures + 1
+        Vectors.dense(Array.fill(dim)(1.0))
     }
 
-    val initialCoefficients = Vectors.zeros(numFeatures)
     val states = optimizer.iterations(new CachedDiffFunction(costFun),
-      initialCoefficients.asBreeze.toDenseVector)
+      initialValues.asBreeze.toDenseVector)
 
-    val (coefficients, objectiveHistory) = {
+    val (coefficients, intercept, scale, objectiveHistory) = {
       /*
          Note that in Linear Regression, the objective history (loss + regularization) returned
          from optimizer is computed in the scaled space given by the following formula.
@@ -396,35 +528,54 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       bcFeaturesMean.destroy(blocking = false)
       bcFeaturesStd.destroy(blocking = false)
 
+      val parameters = state.x.toArray.clone()
+
       /*
          The coefficients are trained in the scaled space; we're converting them back to
          the original space.
        */
-      val rawCoefficients = state.x.toArray.clone()
+      val rawCoefficients: Array[Double] = $(loss) match {
+        case SquaredError => parameters
+        case Huber => parameters.slice(0, numFeatures)
+      }
+
       var i = 0
       val len = rawCoefficients.length
+      val multiplier = $(loss) match {
+        case SquaredError => yStd
+        case Huber => 1.0
+      }
       while (i < len) {
-        rawCoefficients(i) *= { if (featuresStd(i) != 0.0) yStd / featuresStd(i) else 0.0 }
+        rawCoefficients(i) *= { if (featuresStd(i) != 0.0) multiplier / featuresStd(i) else 0.0 }
         i += 1
       }
 
-      (Vectors.dense(rawCoefficients).compressed, arrayBuilder.result())
-    }
+      val interceptValue: Double = if ($(fitIntercept)) {
+        $(loss) match {
+          case SquaredError =>
+            /*
+            The intercept of squared error in R's GLMNET is computed using closed form
+            after the coefficients are converged. See the following discussion for detail.
+            http://stats.stackexchange.com/questions/13617/how-is-the-intercept-computed-in-glmnet
+            */
+            yMean - dot(Vectors.dense(rawCoefficients), Vectors.dense(featuresMean))
+          case Huber => parameters(numFeatures)
+        }
+      } else {
+        0.0
+      }
 
-    /*
-       The intercept in R's GLMNET is computed using closed form after the coefficients are
-       converged. See the following discussion for detail.
-       http://stats.stackexchange.com/questions/13617/how-is-the-intercept-computed-in-glmnet
-     */
-    val intercept = if ($(fitIntercept)) {
-      yMean - dot(coefficients, Vectors.dense(featuresMean))
-    } else {
-      0.0
+      val scaleValue: Double = $(loss) match {
+        case SquaredError => 1.0
+        case Huber => parameters.last
+      }
+
+      (Vectors.dense(rawCoefficients).compressed, interceptValue, scaleValue, arrayBuilder.result())
     }
 
     if (handlePersistence) instances.unpersist()
 
-    val model = copyValues(new LinearRegressionModel(uid, coefficients, intercept))
+    val model = copyValues(new LinearRegressionModel(uid, coefficients, intercept, scale))
     // Handle possible missing or invalid prediction columns
     val (summaryModel, predictionColName) = model.findSummaryModelAndPredictionCol()
 
@@ -471,6 +622,15 @@ object LinearRegression extends DefaultParamsReadable[LinearRegression] {
 
   /** Set of solvers that LinearRegression supports. */
   private[regression] val supportedSolvers = Array(Auto, Normal, LBFGS)
+
+  /** String name for "squaredError". */
+  private[regression] val SquaredError = "squaredError"
+
+  /** String name for "huber". */
+  private[regression] val Huber = "huber"
+
+  /** Set of loss function names that LinearRegression supports. */
+  private[regression] val supportedLosses = Array(SquaredError, Huber)
 }
 
 /**
@@ -480,10 +640,14 @@ object LinearRegression extends DefaultParamsReadable[LinearRegression] {
 class LinearRegressionModel private[ml] (
     @Since("1.4.0") override val uid: String,
     @Since("2.0.0") val coefficients: Vector,
-    @Since("1.3.0") val intercept: Double)
+    @Since("1.3.0") val intercept: Double,
+    @Since("2.3.0") val scale: Double)
   extends RegressionModel[Vector, LinearRegressionModel]
   with LinearRegressionParams with MLWritable {
 
+  def this(uid: String, coefficients: Vector, intercept: Double) =
+    this(uid, coefficients, intercept, 1.0)
+
   private var trainingSummary: Option[LinearRegressionTrainingSummary] = None
 
   override val numFeatures: Int = coefficients.size
@@ -570,13 +734,13 @@ object LinearRegressionModel extends MLReadable[LinearRegressionModel] {
   private[LinearRegressionModel] class LinearRegressionModelWriter(instance: LinearRegressionModel)
     extends MLWriter with Logging {
 
-    private case class Data(intercept: Double, coefficients: Vector)
+    private case class Data(intercept: Double, coefficients: Vector, scale: Double)
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
       DefaultParamsWriter.saveMetadata(instance, path, sc)
-      // Save model data: intercept, coefficients
-      val data = Data(instance.intercept, instance.coefficients)
+      // Save model data: intercept, coefficients, scale
+      val data = Data(instance.intercept, instance.coefficients, instance.scale)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
     }
@@ -592,11 +756,20 @@ object LinearRegressionModel extends MLReadable[LinearRegressionModel] {
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.format("parquet").load(dataPath)
-      val Row(intercept: Double, coefficients: Vector) =
-        MLUtils.convertVectorColumnsToML(data, "coefficients")
-          .select("intercept", "coefficients")
-          .head()
-      val model = new LinearRegressionModel(metadata.uid, coefficients, intercept)
+      val (majorVersion, minorVersion) = majorMinorVersion(metadata.sparkVersion)
+      val model = if (majorVersion < 2 || (majorVersion == 2 && minorVersion <= 2)) {
+        // Spark 2.2 and before
+        val Row(intercept: Double, coefficients: Vector) =
+          MLUtils.convertVectorColumnsToML(data, "coefficients")
+            .select("intercept", "coefficients")
+            .head()
+        new LinearRegressionModel(metadata.uid, coefficients, intercept)
+      } else {
+        // Spark 2.3 and later
+        val Row(intercept: Double, coefficients: Vector, scale: Double) =
+          data.select("intercept", "coefficients", "scale").head()
+        new LinearRegressionModel(metadata.uid, coefficients, intercept, scale)
+      }
 
       DefaultParamsReader.getAndSetParams(model, metadata)
       model
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
index cae41edb7aca8..9bed74a9f2c05 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
@@ -24,7 +24,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Expression, UnsafeArrayData}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes, UnsafeArrayData}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, TypedImperativeAggregate}
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.types._
@@ -41,7 +41,7 @@ sealed abstract class SummaryBuilder {
   /**
    * Returns an aggregate object that contains the summary of the column with the requested metrics.
    * @param featuresCol a column that contains features Vector object.
-   * @param weightCol a column that contains weight value.
+   * @param weightCol a column that contains weight value. Default weight is 1.0.
    * @return an aggregate column that contains the statistics. The exact content of this
    *         structure is determined during the creation of the builder.
    */
@@ -50,6 +50,7 @@ sealed abstract class SummaryBuilder {
 
   @Since("2.3.0")
   def summary(featuresCol: Column): Column = summary(featuresCol, lit(1.0))
+
 }
 
 /**
@@ -60,15 +61,18 @@ sealed abstract class SummaryBuilder {
  * This class lets users pick the statistics they would like to extract for a given column. Here is
  * an example in Scala:
  * {{{
- *   val dataframe = ... // Some dataframe containing a feature column
- *   val allStats = dataframe.select(Summarizer.metrics("min", "max").summary($"features"))
- *   val Row(Row(min_, max_)) = allStats.first()
+ *   import org.apache.spark.ml.linalg._
+ *   import org.apache.spark.sql.Row
+ *   val dataframe = ... // Some dataframe containing a feature column and a weight column
+ *   val multiStatsDF = dataframe.select(
+ *       Summarizer.metrics("min", "max", "count").summary($"features", $"weight")
+ *   val Row(Row(minVec, maxVec, count)) = multiStatsDF.first()
  * }}}
  *
  * If one wants to get a single metric, shortcuts are also available:
  * {{{
  *   val meanDF = dataframe.select(Summarizer.mean($"features"))
- *   val Row(mean_) = meanDF.first()
+ *   val Row(meanVec) = meanDF.first()
  * }}}
  *
  * Note: Currently, the performance of this interface is about 2x~3x slower then using the RDD
@@ -94,8 +98,7 @@ object Summarizer extends Logging {
    *  - min: the minimum for each coefficient.
    *  - normL2: the Euclidian norm for each coefficient.
    *  - normL1: the L1 norm of each coefficient (sum of the absolute values).
-   * @param firstMetric the metric being provided
-   * @param metrics additional metrics that can be provided.
+   * @param metrics metrics that can be provided.
    * @return a builder.
    * @throws IllegalArgumentException if one of the metric names is not understood.
    *
@@ -103,37 +106,79 @@ object Summarizer extends Logging {
    * interface.
    */
   @Since("2.3.0")
-  def metrics(firstMetric: String, metrics: String*): SummaryBuilder = {
-    val (typedMetrics, computeMetrics) = getRelevantMetrics(Seq(firstMetric) ++ metrics)
+  @scala.annotation.varargs
+  def metrics(metrics: String*): SummaryBuilder = {
+    require(metrics.size >= 1, "Should include at least one metric")
+    val (typedMetrics, computeMetrics) = getRelevantMetrics(metrics)
     new SummaryBuilderImpl(typedMetrics, computeMetrics)
   }
 
   @Since("2.3.0")
-  def mean(col: Column): Column = getSingleMetric(col, "mean")
+  def mean(col: Column, weightCol: Column): Column = {
+    getSingleMetric(col, weightCol, "mean")
+  }
+
+  @Since("2.3.0")
+  def mean(col: Column): Column = mean(col, lit(1.0))
+
+  @Since("2.3.0")
+  def variance(col: Column, weightCol: Column): Column = {
+    getSingleMetric(col, weightCol, "variance")
+  }
+
+  @Since("2.3.0")
+  def variance(col: Column): Column = variance(col, lit(1.0))
+
+  @Since("2.3.0")
+  def count(col: Column, weightCol: Column): Column = {
+    getSingleMetric(col, weightCol, "count")
+  }
+
+  @Since("2.3.0")
+  def count(col: Column): Column = count(col, lit(1.0))
 
   @Since("2.3.0")
-  def variance(col: Column): Column = getSingleMetric(col, "variance")
+  def numNonZeros(col: Column, weightCol: Column): Column = {
+    getSingleMetric(col, weightCol, "numNonZeros")
+  }
+
+  @Since("2.3.0")
+  def numNonZeros(col: Column): Column = numNonZeros(col, lit(1.0))
+
+  @Since("2.3.0")
+  def max(col: Column, weightCol: Column): Column = {
+    getSingleMetric(col, weightCol, "max")
+  }
+
+  @Since("2.3.0")
+  def max(col: Column): Column = max(col, lit(1.0))
 
   @Since("2.3.0")
-  def count(col: Column): Column = getSingleMetric(col, "count")
+  def min(col: Column, weightCol: Column): Column = {
+    getSingleMetric(col, weightCol, "min")
+  }
 
   @Since("2.3.0")
-  def numNonZeros(col: Column): Column = getSingleMetric(col, "numNonZeros")
+  def min(col: Column): Column = min(col, lit(1.0))
 
   @Since("2.3.0")
-  def max(col: Column): Column = getSingleMetric(col, "max")
+  def normL1(col: Column, weightCol: Column): Column = {
+    getSingleMetric(col, weightCol, "normL1")
+  }
 
   @Since("2.3.0")
-  def min(col: Column): Column = getSingleMetric(col, "min")
+  def normL1(col: Column): Column = normL1(col, lit(1.0))
 
   @Since("2.3.0")
-  def normL1(col: Column): Column = getSingleMetric(col, "normL1")
+  def normL2(col: Column, weightCol: Column): Column = {
+    getSingleMetric(col, weightCol, "normL2")
+  }
 
   @Since("2.3.0")
-  def normL2(col: Column): Column = getSingleMetric(col, "normL2")
+  def normL2(col: Column): Column = normL2(col, lit(1.0))
 
-  private def getSingleMetric(col: Column, metric: String): Column = {
-    val c1 = metrics(metric).summary(col)
+  private def getSingleMetric(col: Column, weightCol: Column, metric: String): Column = {
+    val c1 = metrics(metric).summary(col, weightCol)
     c1.getField(metric).as(s"$metric($col)")
   }
 }
@@ -187,8 +232,7 @@ private[ml] object SummaryBuilderImpl extends Logging {
     StructType(fields)
   }
 
-  private val arrayDType = ArrayType(DoubleType, containsNull = false)
-  private val arrayLType = ArrayType(LongType, containsNull = false)
+  private val vectorUDT = new VectorUDT
 
   /**
    * All the metrics that can be currently computed by Spark for vectors.
@@ -197,14 +241,14 @@ private[ml] object SummaryBuilderImpl extends Logging {
    * metrics that need to de computed internally to get the final result.
    */
   private val allMetrics: Seq[(String, Metric, DataType, Seq[ComputeMetric])] = Seq(
-    ("mean", Mean, arrayDType, Seq(ComputeMean, ComputeWeightSum)),
-    ("variance", Variance, arrayDType, Seq(ComputeWeightSum, ComputeMean, ComputeM2n)),
+    ("mean", Mean, vectorUDT, Seq(ComputeMean, ComputeWeightSum)),
+    ("variance", Variance, vectorUDT, Seq(ComputeWeightSum, ComputeMean, ComputeM2n)),
     ("count", Count, LongType, Seq()),
-    ("numNonZeros", NumNonZeros, arrayLType, Seq(ComputeNNZ)),
-    ("max", Max, arrayDType, Seq(ComputeMax, ComputeNNZ)),
-    ("min", Min, arrayDType, Seq(ComputeMin, ComputeNNZ)),
-    ("normL2", NormL2, arrayDType, Seq(ComputeM2)),
-    ("normL1", NormL1, arrayDType, Seq(ComputeL1))
+    ("numNonZeros", NumNonZeros, vectorUDT, Seq(ComputeNNZ)),
+    ("max", Max, vectorUDT, Seq(ComputeMax, ComputeNNZ)),
+    ("min", Min, vectorUDT, Seq(ComputeMin, ComputeNNZ)),
+    ("normL2", NormL2, vectorUDT, Seq(ComputeM2)),
+    ("normL1", NormL1, vectorUDT, Seq(ComputeL1))
   )
 
   /**
@@ -527,27 +571,28 @@ private[ml] object SummaryBuilderImpl extends Logging {
       weightExpr: Expression,
       mutableAggBufferOffset: Int,
       inputAggBufferOffset: Int)
-    extends TypedImperativeAggregate[SummarizerBuffer] {
+    extends TypedImperativeAggregate[SummarizerBuffer] with ImplicitCastInputTypes {
 
-    override def eval(state: SummarizerBuffer): InternalRow = {
+    override def eval(state: SummarizerBuffer): Any = {
       val metrics = requestedMetrics.map {
-        case Mean => UnsafeArrayData.fromPrimitiveArray(state.mean.toArray)
-        case Variance => UnsafeArrayData.fromPrimitiveArray(state.variance.toArray)
+        case Mean => vectorUDT.serialize(state.mean)
+        case Variance => vectorUDT.serialize(state.variance)
         case Count => state.count
-        case NumNonZeros => UnsafeArrayData.fromPrimitiveArray(
-          state.numNonzeros.toArray.map(_.toLong))
-        case Max => UnsafeArrayData.fromPrimitiveArray(state.max.toArray)
-        case Min => UnsafeArrayData.fromPrimitiveArray(state.min.toArray)
-        case NormL2 => UnsafeArrayData.fromPrimitiveArray(state.normL2.toArray)
-        case NormL1 => UnsafeArrayData.fromPrimitiveArray(state.normL1.toArray)
+        case NumNonZeros => vectorUDT.serialize(state.numNonzeros)
+        case Max => vectorUDT.serialize(state.max)
+        case Min => vectorUDT.serialize(state.min)
+        case NormL2 => vectorUDT.serialize(state.normL2)
+        case NormL1 => vectorUDT.serialize(state.normL1)
       }
       InternalRow.apply(metrics: _*)
     }
 
+    override def inputTypes: Seq[DataType] = vectorUDT :: DoubleType :: Nil
+
     override def children: Seq[Expression] = featuresExpr :: weightExpr :: Nil
 
     override def update(state: SummarizerBuffer, row: InternalRow): SummarizerBuffer = {
-      val features = udt.deserialize(featuresExpr.eval(row))
+      val features = vectorUDT.deserialize(featuresExpr.eval(row))
       val weight = weightExpr.eval(row).asInstanceOf[Double]
       state.add(features, weight)
       state
@@ -591,7 +636,4 @@ private[ml] object SummaryBuilderImpl extends Logging {
     override def prettyName: String = "aggregate_metrics"
 
   }
-
-  private[this] val udt = new VectorUDT
-
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 1682ca91bf832..a0b507d2e718c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -94,7 +94,7 @@ class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
   def setSeed(value: Long): this.type = set(seed, value)
 
   /**
-   * Set the mamixum level of parallelism to evaluate models in parallel.
+   * Set the maximum level of parallelism to evaluate models in parallel.
    * Default is 1 for serial evaluation
    *
    * @group expertSetParam
@@ -147,24 +147,12 @@ class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
       logDebug(s"Train split $splitIndex with multiple sets of parameters.")
 
       // Fit models in a Future for training in parallel
-      val modelFutures = epm.zipWithIndex.map { case (paramMap, paramIndex) =>
-        Future[Model[_]] {
+      val foldMetricFutures = epm.zipWithIndex.map { case (paramMap, paramIndex) =>
+        Future[Double] {
           val model = est.fit(trainingDataset, paramMap).asInstanceOf[Model[_]]
-
           if (collectSubModelsParam) {
             subModels.get(splitIndex)(paramIndex) = model
           }
-          model
-        } (executionContext)
-      }
-
-      // Unpersist training data only when all models have trained
-      Future.sequence[Model[_], Iterable](modelFutures)(implicitly, executionContext)
-        .onComplete { _ => trainingDataset.unpersist() } (executionContext)
-
-      // Evaluate models in a Future that will calulate a metric and allow model to be cleaned up
-      val foldMetricFutures = modelFutures.zip(epm).map { case (modelFuture, paramMap) =>
-        modelFuture.map { model =>
           // TODO: duplicate evaluator to take extra params from input
           val metric = eval.evaluate(model.transform(validationDataset, paramMap))
           logDebug(s"Got metric $metric for model trained with $paramMap.")
@@ -172,8 +160,11 @@ class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
         } (executionContext)
       }
 
-      // Wait for metrics to be calculated before unpersisting validation dataset
+      // Wait for metrics to be calculated
       val foldMetrics = foldMetricFutures.map(ThreadUtils.awaitResult(_, Duration.Inf))
+
+      // Unpersist training & validation set once all metrics have been produced
+      trainingDataset.unpersist()
       validationDataset.unpersist()
       foldMetrics
     }.transpose.map(_.sum / $(numFolds)) // Calculate average metric over all splits
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index c73bd18475475..8826ef3271bc1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -143,24 +143,13 @@ class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: St
 
     // Fit models in a Future for training in parallel
     logDebug(s"Train split with multiple sets of parameters.")
-    val modelFutures = epm.zipWithIndex.map { case (paramMap, paramIndex) =>
-      Future[Model[_]] {
+    val metricFutures = epm.zipWithIndex.map { case (paramMap, paramIndex) =>
+      Future[Double] {
         val model = est.fit(trainingDataset, paramMap).asInstanceOf[Model[_]]
 
         if (collectSubModelsParam) {
           subModels.get(paramIndex) = model
         }
-        model
-      } (executionContext)
-    }
-
-    // Unpersist training data only when all models have trained
-    Future.sequence[Model[_], Iterable](modelFutures)(implicitly, executionContext)
-      .onComplete { _ => trainingDataset.unpersist() } (executionContext)
-
-    // Evaluate models in a Future that will calulate a metric and allow model to be cleaned up
-    val metricFutures = modelFutures.zip(epm).map { case (modelFuture, paramMap) =>
-      modelFuture.map { model =>
         // TODO: duplicate evaluator to take extra params from input
         val metric = eval.evaluate(model.transform(validationDataset, paramMap))
         logDebug(s"Got metric $metric for model trained with $paramMap.")
@@ -171,7 +160,8 @@ class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: St
     // Wait for all metrics to be calculated
     val metrics = metricFutures.map(ThreadUtils.awaitResult(_, Duration.Inf))
 
-    // Unpersist validation set once all metrics have been produced
+    // Unpersist training & validation set once all metrics have been produced
+    trainingDataset.unpersist()
     validationDataset.unpersist()
 
     logInfo(s"Train validation split metrics: ${metrics.toSeq}")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index ae98e24a75681..9b9c70cfe5109 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -197,7 +197,9 @@ class BisectingKMeans private (
           newClusters = summarize(d, newAssignments)
           newClusterCenters = newClusters.mapValues(_.center).map(identity)
         }
-        if (preIndices != null) preIndices.unpersist()
+        if (preIndices != null) {
+          preIndices.unpersist(false)
+        }
         preIndices = indices
         indices = updateAssignments(assignments, divisibleIndices, newClusterCenters).keys
           .persist(StorageLevel.MEMORY_AND_DISK)
@@ -212,7 +214,13 @@ class BisectingKMeans private (
       }
       level += 1
     }
-    if(indices != null) indices.unpersist()
+    if (preIndices != null) {
+      preIndices.unpersist(false)
+    }
+    if (indices != null) {
+      indices.unpersist(false)
+    }
+    norms.unpersist(false)
     val clusters = activeClusters ++ inactiveClusters
     val root = buildTree(clusters)
     new BisectingKMeansModel(root)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index afbe4f978b286..1933d5499c3bf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -154,7 +154,7 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
       val dataArray = Array.tabulate(weights.length) { i =>
         Data(weights(i), gaussians(i).mu, gaussians(i).sigma)
       }
-      spark.createDataFrame(dataArray).repartition(1).write.parquet(Loader.dataPath(path))
+      spark.createDataFrame(sc.makeRDD(dataArray, 1)).write.parquet(Loader.dataPath(path))
     }
 
     def load(sc: SparkContext, path: String): GaussianMixtureModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
index 32f15552094e1..f923be871f438 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
@@ -144,7 +144,7 @@ object ChiSqSelectorModel extends Loader[ChiSqSelectorModel] {
       val dataArray = Array.tabulate(model.selectedFeatures.length) { i =>
         Data(model.selectedFeatures(i))
       }
-      spark.createDataFrame(dataArray).repartition(1).write.parquet(Loader.dataPath(path))
+      spark.createDataFrame(sc.makeRDD(dataArray, 1)).write.parquet(Loader.dataPath(path))
     }
 
     def load(sc: SparkContext, path: String): ChiSqSelectorModel = {
diff --git a/mllib/src/test/java/org/apache/spark/ml/stat/JavaSummarizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/stat/JavaSummarizerSuite.java
new file mode 100644
index 0000000000000..38ab39aa0f492
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/stat/JavaSummarizerSuite.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.stat;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertArrayEquals;
+
+import org.apache.spark.SharedSparkSession;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.Dataset;
+import static org.apache.spark.sql.functions.col;
+import org.apache.spark.ml.feature.LabeledPoint;
+import org.apache.spark.ml.linalg.Vector;
+import org.apache.spark.ml.linalg.Vectors;
+
+public class JavaSummarizerSuite extends SharedSparkSession {
+
+  private transient Dataset<Row> dataset;
+
+  @Override
+  public void setUp() throws IOException {
+    super.setUp();
+    List<LabeledPoint> points = new ArrayList<LabeledPoint>();
+    points.add(new LabeledPoint(0.0, Vectors.dense(1.0, 2.0)));
+    points.add(new LabeledPoint(0.0, Vectors.dense(3.0, 4.0)));
+
+    dataset = spark.createDataFrame(jsc.parallelize(points, 2), LabeledPoint.class);
+  }
+
+  @Test
+  public void testSummarizer() {
+    dataset.select(col("features"));
+    Row result = dataset
+      .select(Summarizer.metrics("mean", "max", "count").summary(col("features")))
+      .first().getStruct(0);
+    Vector meanVec = result.getAs("mean");
+    Vector maxVec = result.getAs("max");
+    long count = result.getAs("count");
+
+    assertEquals(2L, count);
+    assertArrayEquals(new double[]{2.0, 3.0}, meanVec.toArray(), 0.0);
+    assertArrayEquals(new double[]{3.0, 4.0}, maxVec.toArray(), 0.0);
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 14f550890d238..a5f81a38face9 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -2767,6 +2767,17 @@ class LogisticRegressionSuite
     val lr = new LogisticRegression()
     testEstimatorAndModelReadWrite(lr, smallBinaryDataset, LogisticRegressionSuite.allParamSettings,
       LogisticRegressionSuite.allParamSettings, checkModelData)
+
+    // test lr with bounds on coefficients, need to set elasticNetParam to 0.
+    val numFeatures = smallBinaryDataset.select("features").head().getAs[Vector](0).size
+    val lowerBounds = new DenseMatrix(1, numFeatures, (1 to numFeatures).map(_ / 1000.0).toArray)
+    val upperBounds = new DenseMatrix(1, numFeatures, (1 to numFeatures).map(_ * 1000.0).toArray)
+    val paramSettings = Map("lowerBoundsOnCoefficients" -> lowerBounds,
+      "upperBoundsOnCoefficients" -> upperBounds,
+      "elasticNetParam" -> 0.0
+    )
+    testEstimatorAndModelReadWrite(lr, smallBinaryDataset, paramSettings,
+      paramSettings, checkModelData)
   }
 
   test("should support all NumericType labels and weights, and not support other types") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
index 748dbd1b995d3..d9c97ae8067d3 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -123,6 +123,15 @@ class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
     }
   }
 
+  test("Bucketizer should only drop NaN in input columns, with handleInvalid=skip") {
+    val df = spark.createDataFrame(Seq((2.3, 3.0), (Double.NaN, 3.0), (6.7, Double.NaN)))
+      .toDF("a", "b")
+    val splits = Array(Double.NegativeInfinity, 3.0, Double.PositiveInfinity)
+    val bucketizer = new Bucketizer().setInputCol("a").setOutputCol("x").setSplits(splits)
+    bucketizer.setHandleInvalid("skip")
+    assert(bucketizer.transform(df).count() == 2)
+  }
+
   test("Bucket continuous features, with NaN splits") {
     val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity, Double.NaN)
     withClue("Invalid NaN split was not caught during Bucketizer initialization") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala
index 407371a826666..3fc3cbb62d5b5 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala
@@ -78,6 +78,31 @@ class FeatureHasherSuite extends SparkFunSuite
     assert(features.zip(expected).forall { case (e, a) => e ~== a absTol 1e-14 })
   }
 
+  test("setting explicit numerical columns to treat as categorical") {
+    val df = Seq(
+      (2.0, 1, "foo"),
+      (3.0, 2, "bar")
+    ).toDF("real", "int", "string")
+
+    val n = 100
+    val hasher = new FeatureHasher()
+      .setInputCols("real", "int", "string")
+      .setCategoricalCols(Array("real"))
+      .setOutputCol("features")
+      .setNumFeatures(n)
+    val output = hasher.transform(df)
+
+    val features = output.select("features").as[Vector].collect()
+    // Assume perfect hash on field names
+    def idx: Any => Int = murmur3FeatureIdx(n)
+    // check expected indices
+    val expected = Seq(
+      Vectors.sparse(n, Seq((idx("real=2.0"), 1.0), (idx("int"), 1.0), (idx("string=foo"), 1.0))),
+      Vectors.sparse(n, Seq((idx("real=3.0"), 1.0), (idx("int"), 2.0), (idx("string=bar"), 1.0)))
+    )
+    assert(features.zip(expected).forall { case (e, a) => e ~== a absTol 1e-14 })
+  }
+
   test("hashing works for all numeric types") {
     val df = Seq(5.0, 10.0, 15.0).toDF("real")
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuit.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
similarity index 79%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuit.scala
rename to mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
index 88c85a9425e78..cca7399b4b9c5 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuit.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
@@ -17,31 +17,29 @@
 
 package org.apache.spark.ml.feature
 
-import scala.reflect.ClassTag
-
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.serializer.KryoSerializer
 
-class InstanceSuit extends SparkFunSuite{
+class InstanceSuite extends SparkFunSuite{
   test("Kryo class register") {
     val conf = new SparkConf(false)
     conf.set("spark.kryo.registrationRequired", "true")
 
-    val ser = new KryoSerializer(conf)
-    val serInstance = new KryoSerializer(conf).newInstance()
-
-    def check[T: ClassTag](t: T) {
-      assert(serInstance.deserialize[T](serInstance.serialize(t)) === t)
-    }
+    val ser = new KryoSerializer(conf).newInstance()
 
     val instance1 = Instance(19.0, 2.0, Vectors.dense(1.0, 7.0))
     val instance2 = Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse)
+    Seq(instance1, instance2).foreach { i =>
+      val i2 = ser.deserialize[Instance](ser.serialize(i))
+      assert(i === i2)
+    }
+
     val oInstance1 = OffsetInstance(0.2, 1.0, 2.0, Vectors.dense(0.0, 5.0))
     val oInstance2 = OffsetInstance(0.2, 1.0, 2.0, Vectors.dense(0.0, 5.0).toSparse)
-    check(instance1)
-    check(instance2)
-    check(oInstance1)
-    check(oInstance2)
+    Seq(oInstance1, oInstance2).foreach { o =>
+      val o2 = ser.deserialize[OffsetInstance](ser.serialize(o))
+      assert(o === o2)
+    }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/LabeledPointSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/LabeledPointSuite.scala
new file mode 100644
index 0000000000000..05c7a58ee5ffd
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/LabeledPointSuite.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.serializer.KryoSerializer
+
+class LabeledPointSuite extends SparkFunSuite {
+  test("Kryo class register") {
+    val conf = new SparkConf(false)
+    conf.set("spark.kryo.registrationRequired", "true")
+
+    val ser = new KryoSerializer(conf).newInstance()
+
+    val labeled1 = LabeledPoint(1.0, Vectors.dense(Array(1.0, 2.0)))
+    val labeled2 = LabeledPoint(1.0, Vectors.sparse(10, Array(5, 7), Array(1.0, 2.0)))
+
+    Seq(labeled1, labeled2).foreach { l =>
+      val l2 = ser.deserialize[LabeledPoint](ser.serialize(l))
+      assert(l === l2)
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderEstimatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderEstimatorSuite.scala
new file mode 100644
index 0000000000000..1d3f845586426
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderEstimatorSuite.scala
@@ -0,0 +1,421 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.ml.attribute.{AttributeGroup, BinaryAttribute, NominalAttribute}
+import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
+import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.util.DefaultReadWriteTest
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.types._
+
+class OneHotEncoderEstimatorSuite
+  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+
+  import testImplicits._
+
+  test("params") {
+    ParamsSuite.checkParams(new OneHotEncoderEstimator)
+  }
+
+  test("OneHotEncoderEstimator dropLast = false") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
+    assert(encoder.getDropLast === true)
+    encoder.setDropLast(false)
+    assert(encoder.getDropLast === false)
+
+    val model = encoder.fit(df)
+    val encoded = model.transform(df)
+    encoded.select("output", "expected").rdd.map { r =>
+      (r.getAs[Vector](0), r.getAs[Vector](1))
+    }.collect().foreach { case (vec1, vec2) =>
+      assert(vec1 === vec2)
+    }
+  }
+
+  test("OneHotEncoderEstimator dropLast = true") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(2, Seq((1, 1.0)))),
+      Row(2.0, Vectors.sparse(2, Seq())),
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(2, Seq())))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
+
+    val model = encoder.fit(df)
+    val encoded = model.transform(df)
+    encoded.select("output", "expected").rdd.map { r =>
+      (r.getAs[Vector](0), r.getAs[Vector](1))
+    }.collect().foreach { case (vec1, vec2) =>
+      assert(vec1 === vec2)
+    }
+  }
+
+  test("input column with ML attribute") {
+    val attr = NominalAttribute.defaultAttr.withValues("small", "medium", "large")
+    val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("size")
+      .select(col("size").as("size", attr.toMetadata()))
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("size"))
+      .setOutputCols(Array("encoded"))
+    val model = encoder.fit(df)
+    val output = model.transform(df)
+    val group = AttributeGroup.fromStructField(output.schema("encoded"))
+    assert(group.size === 2)
+    assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("small").withIndex(0))
+    assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("medium").withIndex(1))
+  }
+
+  test("input column without ML attribute") {
+    val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("index")
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("index"))
+      .setOutputCols(Array("encoded"))
+    val model = encoder.fit(df)
+    val output = model.transform(df)
+    val group = AttributeGroup.fromStructField(output.schema("encoded"))
+    assert(group.size === 2)
+    assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("0").withIndex(0))
+    assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("1").withIndex(1))
+  }
+
+  test("read/write") {
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("index"))
+      .setOutputCols(Array("encoded"))
+    testDefaultReadWrite(encoder)
+  }
+
+  test("OneHotEncoderModel read/write") {
+    val instance = new OneHotEncoderModel("myOneHotEncoderModel", Array(1, 2, 3))
+    val newInstance = testDefaultReadWrite(instance)
+    assert(newInstance.categorySizes === instance.categorySizes)
+  }
+
+  test("OneHotEncoderEstimator with varying types") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val dfWithTypes = df
+      .withColumn("shortInput", df("input").cast(ShortType))
+      .withColumn("longInput", df("input").cast(LongType))
+      .withColumn("intInput", df("input").cast(IntegerType))
+      .withColumn("floatInput", df("input").cast(FloatType))
+      .withColumn("decimalInput", df("input").cast(DecimalType(10, 0)))
+
+    val cols = Array("input", "shortInput", "longInput", "intInput",
+      "floatInput", "decimalInput")
+    for (col <- cols) {
+      val encoder = new OneHotEncoderEstimator()
+        .setInputCols(Array(col))
+        .setOutputCols(Array("output"))
+        .setDropLast(false)
+
+      val model = encoder.fit(dfWithTypes)
+      val encoded = model.transform(dfWithTypes)
+
+      encoded.select("output", "expected").rdd.map { r =>
+        (r.getAs[Vector](0), r.getAs[Vector](1))
+      }.collect().foreach { case (vec1, vec2) =>
+        assert(vec1 === vec2)
+      }
+    }
+  }
+
+  test("OneHotEncoderEstimator: encoding multiple columns and dropLast = false") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 2.0, Vectors.sparse(4, Seq((2, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0))), 3.0, Vectors.sparse(4, Seq((3, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), 0.0, Vectors.sparse(4, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 1.0, Vectors.sparse(4, Seq((1, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 0.0, Vectors.sparse(4, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), 2.0, Vectors.sparse(4, Seq((2, 1.0)))))
+
+    val schema = StructType(Array(
+        StructField("input1", DoubleType),
+        StructField("expected1", new VectorUDT),
+        StructField("input2", DoubleType),
+        StructField("expected2", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("output1", "output2"))
+    assert(encoder.getDropLast === true)
+    encoder.setDropLast(false)
+    assert(encoder.getDropLast === false)
+
+    val model = encoder.fit(df)
+    val encoded = model.transform(df)
+    encoded.select("output1", "expected1", "output2", "expected2").rdd.map { r =>
+      (r.getAs[Vector](0), r.getAs[Vector](1), r.getAs[Vector](2), r.getAs[Vector](3))
+    }.collect().foreach { case (vec1, vec2, vec3, vec4) =>
+      assert(vec1 === vec2)
+      assert(vec3 === vec4)
+    }
+  }
+
+  test("OneHotEncoderEstimator: encoding multiple columns and dropLast = true") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0))), 2.0, Vectors.sparse(3, Seq((2, 1.0)))),
+      Row(1.0, Vectors.sparse(2, Seq((1, 1.0))), 3.0, Vectors.sparse(3, Seq())),
+      Row(2.0, Vectors.sparse(2, Seq()), 0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0))), 1.0, Vectors.sparse(3, Seq((1, 1.0)))),
+      Row(0.0, Vectors.sparse(2, Seq((0, 1.0))), 0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(2, Seq()), 2.0, Vectors.sparse(3, Seq((2, 1.0)))))
+
+    val schema = StructType(Array(
+        StructField("input1", DoubleType),
+        StructField("expected1", new VectorUDT),
+        StructField("input2", DoubleType),
+        StructField("expected2", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("output1", "output2"))
+
+    val model = encoder.fit(df)
+    val encoded = model.transform(df)
+    encoded.select("output1", "expected1", "output2", "expected2").rdd.map { r =>
+      (r.getAs[Vector](0), r.getAs[Vector](1), r.getAs[Vector](2), r.getAs[Vector](3))
+    }.collect().foreach { case (vec1, vec2, vec3, vec4) =>
+      assert(vec1 === vec2)
+      assert(vec3 === vec4)
+    }
+  }
+
+  test("Throw error on invalid values") {
+    val trainingData = Seq((0, 0), (1, 1), (2, 2))
+    val trainingDF = trainingData.toDF("id", "a")
+    val testData = Seq((0, 0), (1, 2), (1, 3))
+    val testDF = testData.toDF("id", "a")
+
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("a"))
+      .setOutputCols(Array("encoded"))
+
+    val model = encoder.fit(trainingDF)
+    val err = intercept[SparkException] {
+      model.transform(testDF).show
+    }
+    err.getMessage.contains("Unseen value: 3.0. To handle unseen values")
+  }
+
+  test("Can't transform on negative input") {
+    val trainingDF = Seq((0, 0), (1, 1), (2, 2)).toDF("a", "b")
+    val testDF = Seq((0, 0), (-1, 2), (1, 3)).toDF("a", "b")
+
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("a"))
+      .setOutputCols(Array("encoded"))
+
+    val model = encoder.fit(trainingDF)
+    val err = intercept[SparkException] {
+      model.transform(testDF).collect()
+    }
+    err.getMessage.contains("Negative value: -1.0. Input can't be negative")
+  }
+
+  test("Keep on invalid values: dropLast = false") {
+    val trainingDF = Seq(Tuple1(0), Tuple1(1), Tuple1(2)).toDF("input")
+
+    val testData = Seq(
+      Row(0.0, Vectors.sparse(4, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(4, Seq((1, 1.0)))),
+      Row(3.0, Vectors.sparse(4, Seq((3, 1.0)))))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val testDF = spark.createDataFrame(sc.parallelize(testData), schema)
+
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
+      .setHandleInvalid("keep")
+      .setDropLast(false)
+
+    val model = encoder.fit(trainingDF)
+    val encoded = model.transform(testDF)
+    encoded.select("output", "expected").rdd.map { r =>
+      (r.getAs[Vector](0), r.getAs[Vector](1))
+    }.collect().foreach { case (vec1, vec2) =>
+      assert(vec1 === vec2)
+    }
+  }
+
+  test("Keep on invalid values: dropLast = true") {
+    val trainingDF = Seq(Tuple1(0), Tuple1(1), Tuple1(2)).toDF("input")
+
+    val testData = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0)))),
+      Row(3.0, Vectors.sparse(3, Seq())))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val testDF = spark.createDataFrame(sc.parallelize(testData), schema)
+
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
+      .setHandleInvalid("keep")
+      .setDropLast(true)
+
+    val model = encoder.fit(trainingDF)
+    val encoded = model.transform(testDF)
+    encoded.select("output", "expected").rdd.map { r =>
+      (r.getAs[Vector](0), r.getAs[Vector](1))
+    }.collect().foreach { case (vec1, vec2) =>
+      assert(vec1 === vec2)
+    }
+  }
+
+  test("OneHotEncoderModel changes dropLast") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0))), Vectors.sparse(2, Seq((1, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), Vectors.sparse(2, Seq())),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), Vectors.sparse(2, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), Vectors.sparse(2, Seq())))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected1", new VectorUDT),
+        StructField("expected2", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
+
+    val model = encoder.fit(df)
+
+    model.setDropLast(false)
+    val encoded1 = model.transform(df)
+    encoded1.select("output", "expected1").rdd.map { r =>
+      (r.getAs[Vector](0), r.getAs[Vector](1))
+    }.collect().foreach { case (vec1, vec2) =>
+      assert(vec1 === vec2)
+    }
+
+    model.setDropLast(true)
+    val encoded2 = model.transform(df)
+    encoded2.select("output", "expected2").rdd.map { r =>
+      (r.getAs[Vector](0), r.getAs[Vector](1))
+    }.collect().foreach { case (vec1, vec2) =>
+      assert(vec1 === vec2)
+    }
+  }
+
+  test("OneHotEncoderModel changes handleInvalid") {
+    val trainingDF = Seq(Tuple1(0), Tuple1(1), Tuple1(2)).toDF("input")
+
+    val testData = Seq(
+      Row(0.0, Vectors.sparse(4, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(4, Seq((1, 1.0)))),
+      Row(3.0, Vectors.sparse(4, Seq((3, 1.0)))))
+
+    val schema = StructType(Array(
+        StructField("input", DoubleType),
+        StructField("expected", new VectorUDT)))
+
+    val testDF = spark.createDataFrame(sc.parallelize(testData), schema)
+
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("output"))
+
+    val model = encoder.fit(trainingDF)
+    model.setHandleInvalid("error")
+
+    val err = intercept[SparkException] {
+      model.transform(testDF).collect()
+    }
+    err.getMessage.contains("Unseen value: 3.0. To handle unseen values")
+
+    model.setHandleInvalid("keep")
+    model.transform(testDF).collect()
+  }
+
+  test("Transforming on mismatched attributes") {
+    val attr = NominalAttribute.defaultAttr.withValues("small", "medium", "large")
+    val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("size")
+      .select(col("size").as("size", attr.toMetadata()))
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("size"))
+      .setOutputCols(Array("encoded"))
+    val model = encoder.fit(df)
+
+    val testAttr = NominalAttribute.defaultAttr.withValues("tiny", "small", "medium", "large")
+    val testDF = Seq(0.0, 1.0, 2.0, 3.0).map(Tuple1.apply).toDF("size")
+      .select(col("size").as("size", testAttr.toMetadata()))
+    val err = intercept[Exception] {
+      model.transform(testDF).collect()
+    }
+    err.getMessage.contains("OneHotEncoderModel expected 2 categorical values")
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
index f219f775b2186..e9a75e931e6a8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.feature
 
 import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.ml.Pipeline
 import org.apache.spark.ml.util.DefaultReadWriteTest
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql._
@@ -146,4 +147,268 @@ class QuantileDiscretizerSuite
     val model = discretizer.fit(df)
     assert(model.hasParent)
   }
+
+  test("Multiple Columns: Test observed number of buckets and their sizes match expected values") {
+    val spark = this.spark
+    import spark.implicits._
+
+    val datasetSize = 100000
+    val numBuckets = 5
+    val data1 = Array.range(1, 100001, 1).map(_.toDouble)
+    val data2 = Array.range(1, 200000, 2).map(_.toDouble)
+    val df = data1.zip(data2).toSeq.toDF("input1", "input2")
+
+    val discretizer = new QuantileDiscretizer()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("result1", "result2"))
+      .setNumBuckets(numBuckets)
+    val result = discretizer.fit(df).transform(df)
+
+    val relativeError = discretizer.getRelativeError
+    val isGoodBucket = udf {
+      (size: Int) => math.abs( size - (datasetSize / numBuckets)) <= (relativeError * datasetSize)
+    }
+
+    for (i <- 1 to 2) {
+      val observedNumBuckets = result.select("result" + i).distinct.count
+      assert(observedNumBuckets === numBuckets,
+        "Observed number of buckets does not equal expected number of buckets.")
+
+      val numGoodBuckets = result.groupBy("result" + i).count.filter(isGoodBucket($"count")).count
+      assert(numGoodBuckets === numBuckets,
+        "Bucket sizes are not within expected relative error tolerance.")
+    }
+  }
+
+  test("Multiple Columns: Test on data with high proportion of duplicated values") {
+    val spark = this.spark
+    import spark.implicits._
+
+    val numBuckets = 5
+    val expectedNumBucket = 3
+    val data1 = Array(1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0)
+    val data2 = Array(1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0, 1.0, 2.0)
+    val df = data1.zip(data2).toSeq.toDF("input1", "input2")
+    val discretizer = new QuantileDiscretizer()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("result1", "result2"))
+      .setNumBuckets(numBuckets)
+    val result = discretizer.fit(df).transform(df)
+    for (i <- 1 to 2) {
+      val observedNumBuckets = result.select("result" + i).distinct.count
+      assert(observedNumBuckets == expectedNumBucket,
+        s"Observed number of buckets are not correct." +
+          s" Expected $expectedNumBucket but found ($observedNumBuckets")
+    }
+  }
+
+  test("Multiple Columns: Test transform on data with NaN value") {
+    val spark = this.spark
+    import spark.implicits._
+
+    val numBuckets = 3
+    val validData1 = Array(-0.9, -0.5, -0.3, 0.0, 0.2, 0.5, 0.9, Double.NaN, Double.NaN, Double.NaN)
+    val expectedKeep1 = Array(0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0)
+    val expectedSkip1 = Array(0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0)
+    val validData2 = Array(0.2, -0.1, 0.3, 0.0, 0.1, 0.3, 0.5, Double.NaN, Double.NaN, Double.NaN)
+    val expectedKeep2 = Array(1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0)
+    val expectedSkip2 = Array(1.0, 0.0, 2.0, 0.0, 1.0, 2.0, 2.0)
+
+    val discretizer = new QuantileDiscretizer()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("result1", "result2"))
+      .setNumBuckets(numBuckets)
+
+    withClue("QuantileDiscretizer with handleInvalid=error should throw exception for NaN values") {
+      val dataFrame: DataFrame = validData1.zip(validData2).toSeq.toDF("input1", "input2")
+      intercept[SparkException] {
+        discretizer.fit(dataFrame).transform(dataFrame).collect()
+      }
+    }
+
+    List(("keep", expectedKeep1, expectedKeep2), ("skip", expectedSkip1, expectedSkip2)).foreach {
+      case (u, v, w) =>
+        discretizer.setHandleInvalid(u)
+        val dataFrame: DataFrame = validData1.zip(validData2).zip(v).zip(w).map {
+          case (((a, b), c), d) => (a, b, c, d)
+        }.toSeq.toDF("input1", "input2", "expected1", "expected2")
+        val result = discretizer.fit(dataFrame).transform(dataFrame)
+        result.select("result1", "expected1", "result2", "expected2").collect().foreach {
+          case Row(x: Double, y: Double, z: Double, w: Double) =>
+            assert(x === y && w === z)
+        }
+    }
+  }
+
+  test("Multiple Columns: Test numBucketsArray") {
+    val spark = this.spark
+    import spark.implicits._
+
+    val numBucketsArray: Array[Int] = Array(2, 5, 10)
+    val data1 = Array.range(1, 21, 1).map(_.toDouble)
+    val expected1 = Array (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0,
+      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0)
+    val data2 = Array.range(1, 40, 2).map(_.toDouble)
+    val expected2 = Array (0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0,
+      2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0)
+    val data3 = Array.range(1, 60, 3).map(_.toDouble)
+    val expected3 = Array (0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 4.0, 4.0, 5.0,
+      5.0, 5.0, 6.0, 6.0, 7.0, 8.0, 8.0, 9.0, 9.0, 9.0)
+    val data = (0 until 20).map { idx =>
+      (data1(idx), data2(idx), data3(idx), expected1(idx), expected2(idx), expected3(idx))
+    }
+    val df =
+      data.toDF("input1", "input2", "input3", "expected1", "expected2", "expected3")
+
+    val discretizer = new QuantileDiscretizer()
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("result1", "result2", "result3"))
+      .setNumBucketsArray(numBucketsArray)
+
+    discretizer.fit(df).transform(df).
+      select("result1", "expected1", "result2", "expected2", "result3", "expected3")
+      .collect().foreach {
+      case Row(r1: Double, e1: Double, r2: Double, e2: Double, r3: Double, e3: Double) =>
+        assert(r1 === e1,
+          s"The result value is not correct after bucketing. Expected $e1 but found $r1")
+        assert(r2 === e2,
+          s"The result value is not correct after bucketing. Expected $e2 but found $r2")
+        assert(r3 === e3,
+          s"The result value is not correct after bucketing. Expected $e3 but found $r3")
+    }
+  }
+
+  test("Multiple Columns: Compare single/multiple column(s) QuantileDiscretizer in pipeline") {
+    val spark = this.spark
+    import spark.implicits._
+
+    val numBucketsArray: Array[Int] = Array(2, 5, 10)
+    val data1 = Array.range(1, 21, 1).map(_.toDouble)
+    val data2 = Array.range(1, 40, 2).map(_.toDouble)
+    val data3 = Array.range(1, 60, 3).map(_.toDouble)
+    val data = (0 until 20).map { idx =>
+      (data1(idx), data2(idx), data3(idx))
+    }
+    val df =
+      data.toDF("input1", "input2", "input3")
+
+    val multiColsDiscretizer = new QuantileDiscretizer()
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("result1", "result2", "result3"))
+      .setNumBucketsArray(numBucketsArray)
+    val plForMultiCols = new Pipeline()
+      .setStages(Array(multiColsDiscretizer))
+      .fit(df)
+
+    val discretizerForCol1 = new QuantileDiscretizer()
+      .setInputCol("input1")
+      .setOutputCol("result1")
+      .setNumBuckets(numBucketsArray(0))
+
+    val discretizerForCol2 = new QuantileDiscretizer()
+      .setInputCol("input2")
+      .setOutputCol("result2")
+      .setNumBuckets(numBucketsArray(1))
+
+    val discretizerForCol3 = new QuantileDiscretizer()
+      .setInputCol("input3")
+      .setOutputCol("result3")
+      .setNumBuckets(numBucketsArray(2))
+
+    val plForSingleCol = new Pipeline()
+      .setStages(Array(discretizerForCol1, discretizerForCol2, discretizerForCol3))
+      .fit(df)
+
+    val resultForMultiCols = plForMultiCols.transform(df)
+      .select("result1", "result2", "result3")
+      .collect()
+
+    val resultForSingleCol = plForSingleCol.transform(df)
+      .select("result1", "result2", "result3")
+      .collect()
+
+    resultForSingleCol.zip(resultForMultiCols).foreach {
+      case (rowForSingle, rowForMultiCols) =>
+        assert(rowForSingle.getDouble(0) == rowForMultiCols.getDouble(0) &&
+          rowForSingle.getDouble(1) == rowForMultiCols.getDouble(1) &&
+          rowForSingle.getDouble(2) == rowForMultiCols.getDouble(2))
+    }
+  }
+
+  test("Multiple Columns: Comparing setting numBuckets with setting numBucketsArray " +
+    "explicitly with identical values") {
+    val spark = this.spark
+    import spark.implicits._
+
+    val data1 = Array.range(1, 21, 1).map(_.toDouble)
+    val data2 = Array.range(1, 40, 2).map(_.toDouble)
+    val data3 = Array.range(1, 60, 3).map(_.toDouble)
+    val data = (0 until 20).map { idx =>
+      (data1(idx), data2(idx), data3(idx))
+    }
+    val df =
+      data.toDF("input1", "input2", "input3")
+
+    val discretizerSingleNumBuckets = new QuantileDiscretizer()
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("result1", "result2", "result3"))
+      .setNumBuckets(10)
+
+    val discretizerNumBucketsArray = new QuantileDiscretizer()
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("result1", "result2", "result3"))
+      .setNumBucketsArray(Array(10, 10, 10))
+
+    val result1 = discretizerSingleNumBuckets.fit(df).transform(df)
+      .select("result1", "result2", "result3")
+      .collect()
+    val result2 = discretizerNumBucketsArray.fit(df).transform(df)
+      .select("result1", "result2", "result3")
+      .collect()
+
+    result1.zip(result2).foreach {
+      case (row1, row2) =>
+        assert(row1.getDouble(0) == row2.getDouble(0) &&
+          row1.getDouble(1) == row2.getDouble(1) &&
+          row1.getDouble(2) == row2.getDouble(2))
+    }
+  }
+
+  test("Multiple Columns: read/write") {
+    val discretizer = new QuantileDiscretizer()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("result1", "result2"))
+      .setNumBucketsArray(Array(5, 10))
+    testDefaultReadWrite(discretizer)
+  }
+
+  test("Multiple Columns: Both inputCol and inputCols are set") {
+    val spark = this.spark
+    import spark.implicits._
+    val discretizer = new QuantileDiscretizer()
+      .setInputCol("input")
+      .setOutputCol("result")
+      .setNumBuckets(3)
+      .setInputCols(Array("input1", "input2"))
+    val df = sc.parallelize(Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
+      .map(Tuple1.apply).toDF("input")
+    // When both inputCol and inputCols are set, we throw Exception.
+    intercept[IllegalArgumentException] {
+      discretizer.fit(df)
+    }
+  }
+
+  test("Multiple Columns: Mismatched sizes of inputCols / outputCols") {
+    val spark = this.spark
+    import spark.implicits._
+    val discretizer = new QuantileDiscretizer()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("result1", "result2"))
+      .setNumBuckets(3)
+    val df = sc.parallelize(Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
+      .map(Tuple1.apply).toDF("input")
+    intercept[IllegalArgumentException] {
+      discretizer.fit(df)
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
index 5d09c90ec6dfa..f3f4b5a3d0233 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
@@ -17,15 +17,15 @@
 
 package org.apache.spark.ml.feature
 
-import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.SparkException
 import org.apache.spark.ml.attribute._
-import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
-import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
+import org.apache.spark.sql.{DataFrame, Encoder, Row}
 import org.apache.spark.sql.types.DoubleType
 
-class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class RFormulaSuite extends MLTest with DefaultReadWriteTest {
 
   import testImplicits._
 
@@ -548,4 +548,31 @@ class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul
     assert(result3.collect() === expected3.collect())
     assert(result4.collect() === expected4.collect())
   }
+
+  test("Use Vectors as inputs to formula.") {
+    val original = Seq(
+      (1, 4, Vectors.dense(0.0, 0.0, 4.0)),
+      (2, 4, Vectors.dense(1.0, 0.0, 4.0)),
+      (3, 5, Vectors.dense(1.0, 0.0, 5.0)),
+      (4, 5, Vectors.dense(0.0, 1.0, 5.0))
+    ).toDF("id", "a", "b")
+    val formula = new RFormula().setFormula("id ~ a + b")
+    val (first +: rest) = Seq("id", "a", "b", "features", "label")
+    testTransformer[(Int, Int, Vector)](original, formula.fit(original), first, rest: _*) {
+      case Row(id: Int, a: Int, b: Vector, features: Vector, label: Double) =>
+        assert(label === id)
+        assert(features.toArray === a +: b.toArray)
+    }
+
+    val group = new AttributeGroup("b", 3)
+    val vectorColWithMetadata = original("b").as("b", group.toMetadata())
+    val dfWithMetadata = original.withColumn("b", vectorColWithMetadata)
+    val model = formula.fit(dfWithMetadata)
+    // model should work even when applied to dataframe without metadata.
+    testTransformer[(Int, Int, Vector)](original, model, first, rest: _*) {
+      case Row(id: Int, a: Int, b: Vector, features: Vector, label: Double) =>
+        assert(label === id)
+        assert(features.toArray === a +: b.toArray)
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorSizeHintSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorSizeHintSuite.scala
new file mode 100644
index 0000000000000..f6c9a76599fae
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorSizeHintSuite.scala
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.attribute.AttributeGroup
+import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.ml.util.DefaultReadWriteTest
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.streaming.StreamTest
+
+class VectorSizeHintSuite
+  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+
+  import testImplicits._
+
+  test("Test Param Validators") {
+    intercept[IllegalArgumentException] (new VectorSizeHint().setHandleInvalid("invalidValue"))
+    intercept[IllegalArgumentException] (new VectorSizeHint().setSize(-3))
+  }
+
+  test("Required params must be set before transform.") {
+    val data = Seq((Vectors.dense(1, 2), 0)).toDF("vector", "intValue")
+
+    val noSizeTransformer = new VectorSizeHint().setInputCol("vector")
+    intercept[NoSuchElementException] (noSizeTransformer.transform(data))
+    intercept[NoSuchElementException] (noSizeTransformer.transformSchema(data.schema))
+
+    val noInputColTransformer = new VectorSizeHint().setSize(2)
+    intercept[NoSuchElementException] (noInputColTransformer.transform(data))
+    intercept[NoSuchElementException] (noInputColTransformer.transformSchema(data.schema))
+  }
+
+  test("Adding size to column of vectors.") {
+
+    val size = 3
+    val vectorColName = "vector"
+    val denseVector = Vectors.dense(1, 2, 3)
+    val sparseVector = Vectors.sparse(size, Array(), Array())
+
+    val data = Seq(denseVector, denseVector, sparseVector).map(Tuple1.apply)
+    val dataFrame = data.toDF(vectorColName)
+    assert(
+      AttributeGroup.fromStructField(dataFrame.schema(vectorColName)).size == -1,
+      s"This test requires that column '$vectorColName' not have size metadata.")
+
+    for (handleInvalid <- VectorSizeHint.supportedHandleInvalids) {
+      val transformer = new VectorSizeHint()
+        .setInputCol(vectorColName)
+        .setSize(size)
+        .setHandleInvalid(handleInvalid)
+      val withSize = transformer.transform(dataFrame)
+      assert(
+        AttributeGroup.fromStructField(withSize.schema(vectorColName)).size == size,
+        "Transformer did not add expected size data.")
+      val numRows = withSize.collect().length
+      assert(numRows === data.length, s"Expecting ${data.length} rows, got $numRows.")
+    }
+  }
+
+  test("Size hint preserves attributes.") {
+
+    val size = 3
+    val vectorColName = "vector"
+    val data = Seq((1, 2, 3), (2, 3, 3))
+    val dataFrame = data.toDF("x", "y", "z")
+
+    val assembler = new VectorAssembler()
+      .setInputCols(Array("x", "y", "z"))
+      .setOutputCol(vectorColName)
+    val dataFrameWithMetadata = assembler.transform(dataFrame)
+    val group = AttributeGroup.fromStructField(dataFrameWithMetadata.schema(vectorColName))
+
+    for (handleInvalid <- VectorSizeHint.supportedHandleInvalids) {
+      val transformer = new VectorSizeHint()
+        .setInputCol(vectorColName)
+        .setSize(size)
+        .setHandleInvalid(handleInvalid)
+      val withSize = transformer.transform(dataFrameWithMetadata)
+
+      val newGroup = AttributeGroup.fromStructField(withSize.schema(vectorColName))
+      assert(newGroup.size === size, "Column has incorrect size metadata.")
+      assert(
+        newGroup.attributes.get === group.attributes.get,
+        "VectorSizeHint did not preserve attributes.")
+      withSize.collect
+    }
+  }
+
+  test("Size mismatch between current and target size raises an error.") {
+    val size = 4
+    val vectorColName = "vector"
+    val data = Seq((1, 2, 3), (2, 3, 3))
+    val dataFrame = data.toDF("x", "y", "z")
+
+    val assembler = new VectorAssembler()
+      .setInputCols(Array("x", "y", "z"))
+      .setOutputCol(vectorColName)
+    val dataFrameWithMetadata = assembler.transform(dataFrame)
+
+    for (handleInvalid <- VectorSizeHint.supportedHandleInvalids) {
+      val transformer = new VectorSizeHint()
+        .setInputCol(vectorColName)
+        .setSize(size)
+        .setHandleInvalid(handleInvalid)
+      intercept[IllegalArgumentException](transformer.transform(dataFrameWithMetadata))
+    }
+  }
+
+  test("Handle invalid does the right thing.") {
+
+    val vector = Vectors.dense(1, 2, 3)
+    val short = Vectors.dense(2)
+    val dataWithNull = Seq(vector, null).map(Tuple1.apply).toDF("vector")
+    val dataWithShort = Seq(vector, short).map(Tuple1.apply).toDF("vector")
+
+    val sizeHint = new VectorSizeHint()
+      .setInputCol("vector")
+      .setHandleInvalid("error")
+      .setSize(3)
+
+    intercept[SparkException](sizeHint.transform(dataWithNull).collect())
+    intercept[SparkException](sizeHint.transform(dataWithShort).collect())
+
+    sizeHint.setHandleInvalid("skip")
+    assert(sizeHint.transform(dataWithNull).count() === 1)
+    assert(sizeHint.transform(dataWithShort).count() === 1)
+
+    sizeHint.setHandleInvalid("optimistic")
+    assert(sizeHint.transform(dataWithNull).count() === 2)
+    assert(sizeHint.transform(dataWithShort).count() === 2)
+  }
+
+  test("read/write") {
+    val sizeHint = new VectorSizeHint()
+      .setInputCol("myInputCol")
+      .setSize(11)
+      .setHandleInvalid("skip")
+    testDefaultReadWrite(sizeHint)
+  }
+}
+
+class VectorSizeHintStreamingSuite extends StreamTest {
+
+  import testImplicits._
+
+  test("Test assemble vectors with size hint in streaming.") {
+    val a = Vectors.dense(0, 1, 2)
+    val b = Vectors.sparse(4, Array(0, 3), Array(3, 6))
+
+    val stream = MemoryStream[(Vector, Vector)]
+    val streamingDF = stream.toDS.toDF("a", "b")
+    val sizeHintA = new VectorSizeHint()
+      .setSize(3)
+      .setInputCol("a")
+    val sizeHintB = new VectorSizeHint()
+      .setSize(4)
+      .setInputCol("b")
+    val vectorAssembler = new VectorAssembler()
+      .setInputCols(Array("a", "b"))
+      .setOutputCol("assembled")
+    val pipeline = new Pipeline().setStages(Array(sizeHintA, sizeHintB, vectorAssembler))
+    val output = pipeline.fit(streamingDF).transform(streamingDF).select("assembled")
+
+    val expected = Vectors.dense(0, 1, 2, 3, 0, 0, 6)
+
+    testStream (output) (
+      AddData(stream, (a, b), (a, b)),
+      CheckAnswer(Tuple1(expected), Tuple1(expected))
+    )
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/linalg/JsonMatrixConverterSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/linalg/JsonMatrixConverterSuite.scala
new file mode 100644
index 0000000000000..4d83f945b4ba8
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/linalg/JsonMatrixConverterSuite.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import org.json4s.jackson.JsonMethods.parse
+
+import org.apache.spark.SparkFunSuite
+
+class JsonMatrixConverterSuite extends SparkFunSuite {
+
+  test("toJson/fromJson") {
+    val denseMatrices = Seq(
+      Matrices.dense(0, 0, Array.empty[Double]),
+      Matrices.dense(1, 1, Array(0.1)),
+      new DenseMatrix(3, 2, Array(0.0, 1.21, 2.3, 9.8, 9.0, 0.0), true)
+    )
+
+    val sparseMatrices = denseMatrices.map(_.toSparse) ++ Seq(
+      Matrices.sparse(3, 2, Array(0, 1, 2), Array(1, 2), Array(3.1, 3.5))
+    )
+
+    for (m <- sparseMatrices ++ denseMatrices) {
+      val json = JsonMatrixConverter.toJson(m)
+      parse(json) // `json` should be a valid JSON string
+      val u = JsonMatrixConverter.fromJson(json)
+      assert(u.getClass === m.getClass, "toJson/fromJson should preserve Matrix types.")
+      assert(u === m, "toJson/fromJson should preserve Matrix values.")
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/HuberAggregatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/HuberAggregatorSuite.scala
new file mode 100644
index 0000000000000..718ffa230a749
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/HuberAggregatorSuite.scala
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.ml.optim.aggregator
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.feature.Instance
+import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors}
+import org.apache.spark.ml.util.TestingUtils._
+import org.apache.spark.mllib.util.MLlibTestSparkContext
+
+class HuberAggregatorSuite extends SparkFunSuite with MLlibTestSparkContext {
+
+  import DifferentiableLossAggregatorSuite.getRegressionSummarizers
+
+  @transient var instances: Array[Instance] = _
+  @transient var instancesConstantFeature: Array[Instance] = _
+  @transient var instancesConstantFeatureFiltered: Array[Instance] = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    instances = Array(
+      Instance(0.0, 0.1, Vectors.dense(1.0, 2.0)),
+      Instance(1.0, 0.5, Vectors.dense(1.5, 1.0)),
+      Instance(2.0, 0.3, Vectors.dense(4.0, 0.5))
+    )
+    instancesConstantFeature = Array(
+      Instance(0.0, 0.1, Vectors.dense(1.0, 2.0)),
+      Instance(1.0, 0.5, Vectors.dense(1.0, 1.0)),
+      Instance(2.0, 0.3, Vectors.dense(1.0, 0.5))
+    )
+    instancesConstantFeatureFiltered = Array(
+      Instance(0.0, 0.1, Vectors.dense(2.0)),
+      Instance(1.0, 0.5, Vectors.dense(1.0)),
+      Instance(2.0, 0.3, Vectors.dense(0.5))
+    )
+  }
+
+  /** Get summary statistics for some data and create a new HuberAggregator. */
+  private def getNewAggregator(
+      instances: Array[Instance],
+      parameters: Vector,
+      fitIntercept: Boolean,
+      epsilon: Double): HuberAggregator = {
+    val (featuresSummarizer, _) = getRegressionSummarizers(instances)
+    val featuresStd = featuresSummarizer.variance.toArray.map(math.sqrt)
+    val bcFeaturesStd = spark.sparkContext.broadcast(featuresStd)
+    val bcParameters = spark.sparkContext.broadcast(parameters)
+    new HuberAggregator(fitIntercept, epsilon, bcFeaturesStd)(bcParameters)
+  }
+
+  test("aggregator add method should check input size") {
+    val parameters = Vectors.dense(1.0, 2.0, 3.0, 4.0)
+    val agg = getNewAggregator(instances, parameters, fitIntercept = true, epsilon = 1.35)
+    withClue("HuberAggregator features dimension must match parameters dimension") {
+      intercept[IllegalArgumentException] {
+        agg.add(Instance(1.0, 1.0, Vectors.dense(2.0)))
+      }
+    }
+  }
+
+  test("negative weight") {
+    val parameters = Vectors.dense(1.0, 2.0, 3.0, 4.0)
+    val agg = getNewAggregator(instances, parameters, fitIntercept = true, epsilon = 1.35)
+    withClue("HuberAggregator does not support negative instance weights.") {
+      intercept[IllegalArgumentException] {
+        agg.add(Instance(1.0, -1.0, Vectors.dense(2.0, 1.0)))
+      }
+    }
+  }
+
+  test("check sizes") {
+    val paramWithIntercept = Vectors.dense(1.0, 2.0, 3.0, 4.0)
+    val paramWithoutIntercept = Vectors.dense(1.0, 2.0, 4.0)
+    val aggIntercept = getNewAggregator(instances, paramWithIntercept,
+      fitIntercept = true, epsilon = 1.35)
+    val aggNoIntercept = getNewAggregator(instances, paramWithoutIntercept,
+      fitIntercept = false, epsilon = 1.35)
+    instances.foreach(aggIntercept.add)
+    instances.foreach(aggNoIntercept.add)
+
+    assert(aggIntercept.gradient.size === 4)
+    assert(aggNoIntercept.gradient.size === 3)
+  }
+
+  test("check correctness") {
+    val parameters = Vectors.dense(1.0, 2.0, 3.0, 4.0)
+    val numFeatures = 2
+    val (featuresSummarizer, _) = getRegressionSummarizers(instances)
+    val featuresStd = featuresSummarizer.variance.toArray.map(math.sqrt)
+    val epsilon = 1.35
+    val weightSum = instances.map(_.weight).sum
+
+    val agg = getNewAggregator(instances, parameters, fitIntercept = true, epsilon)
+    instances.foreach(agg.add)
+
+    // compute expected loss sum
+    val coefficients = parameters.toArray.slice(0, 2)
+    val intercept = parameters(2)
+    val sigma = parameters(3)
+    val stdCoef = coefficients.indices.map(i => coefficients(i) / featuresStd(i)).toArray
+    val lossSum = instances.map { case Instance(label, weight, features) =>
+      val margin = BLAS.dot(Vectors.dense(stdCoef), features) + intercept
+      val linearLoss = label - margin
+      if (math.abs(linearLoss) <= sigma * epsilon) {
+        0.5 * weight * (sigma +  math.pow(linearLoss, 2.0) / sigma)
+      } else {
+        0.5 * weight * (sigma + 2.0 * epsilon * math.abs(linearLoss) - sigma * epsilon * epsilon)
+      }
+    }.sum
+    val loss = lossSum / weightSum
+
+    // compute expected gradients
+    val gradientCoef = new Array[Double](numFeatures + 2)
+    instances.foreach { case Instance(label, weight, features) =>
+      val margin = BLAS.dot(Vectors.dense(stdCoef), features) + intercept
+      val linearLoss = label - margin
+      if (math.abs(linearLoss) <= sigma * epsilon) {
+        features.toArray.indices.foreach { i =>
+          gradientCoef(i) +=
+            -1.0 * weight * (linearLoss / sigma) * (features(i) / featuresStd(i))
+        }
+        gradientCoef(2) += -1.0 * weight * (linearLoss / sigma)
+        gradientCoef(3) += 0.5 * weight * (1.0 - math.pow(linearLoss / sigma, 2.0))
+      } else {
+        val sign = if (linearLoss >= 0) -1.0 else 1.0
+        features.toArray.indices.foreach { i =>
+          gradientCoef(i) += weight * sign * epsilon * (features(i) / featuresStd(i))
+        }
+        gradientCoef(2) += weight * sign * epsilon
+        gradientCoef(3) += 0.5 * weight * (1.0 - epsilon * epsilon)
+      }
+    }
+    val gradient = Vectors.dense(gradientCoef.map(_ / weightSum))
+
+    assert(loss ~== agg.loss relTol 0.01)
+    assert(gradient ~== agg.gradient relTol 0.01)
+  }
+
+  test("check with zero standard deviation") {
+    val parameters = Vectors.dense(1.0, 2.0, 3.0, 4.0)
+    val parametersFiltered = Vectors.dense(2.0, 3.0, 4.0)
+    val aggConstantFeature = getNewAggregator(instancesConstantFeature, parameters,
+      fitIntercept = true, epsilon = 1.35)
+    val aggConstantFeatureFiltered = getNewAggregator(instancesConstantFeatureFiltered,
+      parametersFiltered, fitIntercept = true, epsilon = 1.35)
+    instances.foreach(aggConstantFeature.add)
+    instancesConstantFeatureFiltered.foreach(aggConstantFeatureFiltered.add)
+    // constant features should not affect gradient
+    def validateGradient(grad: Vector, gradFiltered: Vector): Unit = {
+      assert(grad(0) === 0.0)
+      assert(grad(1) ~== gradFiltered(0) relTol 0.01)
+    }
+
+    validateGradient(aggConstantFeature.gradient, aggConstantFeatureFiltered.gradient)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
index 02e5c6d294f44..4e4ff71c9de90 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
@@ -19,19 +19,16 @@ package org.apache.spark.ml.regression
 
 import scala.util.Random
 
-import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.random.{ExponentialGenerator, WeibullGenerator}
-import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions.{col, lit}
 import org.apache.spark.sql.types._
 
-class AFTSurvivalRegressionSuite
-  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class AFTSurvivalRegressionSuite extends MLTest with DefaultReadWriteTest {
 
   import testImplicits._
 
@@ -191,8 +188,8 @@ class AFTSurvivalRegressionSuite
     assert(model.predict(features) ~== responsePredictR relTol 1E-3)
     assert(model.predictQuantiles(features) ~== quantilePredictR relTol 1E-3)
 
-    model.transform(datasetUnivariate).select("features", "prediction", "quantiles")
-      .collect().foreach {
+    testTransformer[(Vector, Double, Double)](datasetUnivariate, model,
+      "features", "prediction", "quantiles") {
         case Row(features: Vector, prediction: Double, quantiles: Vector) =>
           assert(prediction ~== model.predict(features) relTol 1E-5)
           assert(quantiles ~== model.predictQuantiles(features) relTol 1E-5)
@@ -261,8 +258,8 @@ class AFTSurvivalRegressionSuite
     assert(model.predict(features) ~== responsePredictR relTol 1E-3)
     assert(model.predictQuantiles(features) ~== quantilePredictR relTol 1E-3)
 
-    model.transform(datasetMultivariate).select("features", "prediction", "quantiles")
-      .collect().foreach {
+    testTransformer[(Vector, Double, Double)](datasetMultivariate, model,
+      "features", "prediction", "quantiles") {
         case Row(features: Vector, prediction: Double, quantiles: Vector) =>
           assert(prediction ~== model.predict(features) relTol 1E-5)
           assert(quantiles ~== model.predictQuantiles(features) relTol 1E-5)
@@ -331,8 +328,8 @@ class AFTSurvivalRegressionSuite
     assert(model.predict(features) ~== responsePredictR relTol 1E-3)
     assert(model.predictQuantiles(features) ~== quantilePredictR relTol 1E-3)
 
-    model.transform(datasetMultivariate).select("features", "prediction", "quantiles")
-      .collect().foreach {
+    testTransformer[(Vector, Double, Double)](datasetMultivariate, model,
+      "features", "prediction", "quantiles") {
         case Row(features: Vector, prediction: Double, quantiles: Vector) =>
           assert(prediction ~== model.predict(features) relTol 1E-5)
           assert(quantiles ~== model.predictQuantiles(features) relTol 1E-5)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
index 642f266891b57..68a1218c23ece 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
@@ -21,19 +21,18 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.tree.impl.TreeTests
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint}
 import org.apache.spark.mllib.tree.{DecisionTree => OldDecisionTree,
   DecisionTreeSuite => OldDecisionTreeSuite}
-import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Row}
 
-class DecisionTreeRegressorSuite
-  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class DecisionTreeRegressorSuite extends MLTest with DefaultReadWriteTest {
 
   import DecisionTreeRegressorSuite.compareAPIs
+  import testImplicits._
 
   private var categoricalDataPointsRDD: RDD[LabeledPoint] = _
 
@@ -89,14 +88,11 @@ class DecisionTreeRegressorSuite
     val df = TreeTests.setMetadata(categoricalDataPointsRDD, categoricalFeatures, numClasses = 0)
     val model = dt.fit(df)
 
-    val predictions = model.transform(df)
-      .select(model.getFeaturesCol, model.getVarianceCol)
-      .collect()
-
-    predictions.foreach { case Row(features: Vector, variance: Double) =>
-      val expectedVariance = model.rootNode.predictImpl(features).impurityStats.calculate()
-      assert(variance === expectedVariance,
-        s"Expected variance $expectedVariance but got $variance.")
+    testTransformer[(Vector, Double)](df, model, "features", "variance") {
+      case Row(features: Vector, variance: Double) =>
+        val expectedVariance = model.rootNode.predictImpl(features).impurityStats.calculate()
+        assert(variance === expectedVariance,
+          s"Expected variance $expectedVariance but got $variance.")
     }
 
     val varianceData: RDD[LabeledPoint] = TreeTests.varianceData(sc)
@@ -104,18 +100,19 @@ class DecisionTreeRegressorSuite
     dt.setMaxDepth(1)
       .setMaxBins(6)
       .setSeed(0)
-    val transformVarDF = dt.fit(varianceDF).transform(varianceDF)
-    val calculatedVariances = transformVarDF.select(dt.getVarianceCol).collect().map {
-      case Row(variance: Double) => variance
-    }
 
-    // Since max depth is set to 1, the best split point is that which splits the data
-    // into (0.0, 1.0, 2.0) and (10.0, 12.0, 14.0). The predicted variance for each
-    // data point in the left node is 0.667 and for each data point in the right node
-    // is 2.667
-    val expectedVariances = Array(0.667, 0.667, 0.667, 2.667, 2.667, 2.667)
-    calculatedVariances.zip(expectedVariances).foreach { case (actual, expected) =>
-      assert(actual ~== expected absTol 1e-3)
+    testTransformerByGlobalCheckFunc[(Vector, Double)](varianceDF, dt.fit(varianceDF),
+      "variance") { case rows: Seq[Row] =>
+      val calculatedVariances = rows.map(_.getDouble(0))
+
+      // Since max depth is set to 1, the best split point is that which splits the data
+      // into (0.0, 1.0, 2.0) and (10.0, 12.0, 14.0). The predicted variance for each
+      // data point in the left node is 0.667 and for each data point in the right node
+      // is 2.667
+      val expectedVariances = Array(0.667, 0.667, 0.667, 2.667, 2.667, 2.667)
+      calculatedVariances.zip(expectedVariances).foreach { case (actual, expected) =>
+        assert(actual ~== expected absTol 1e-3)
+      }
     }
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
index ecbb57126d759..11c593b521e65 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
@@ -19,22 +19,20 @@ package org.apache.spark.ml.regression
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.feature.LabeledPoint
-import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.tree.impl.TreeTests
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint}
 import org.apache.spark.mllib.tree.{EnsembleTestHelper, GradientBoostedTrees => OldGBT}
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
-import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.util.Utils
 
 /**
  * Test suite for [[GBTRegressor]].
  */
-class GBTRegressorSuite extends SparkFunSuite with MLlibTestSparkContext
-  with DefaultReadWriteTest {
+class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
 
   import GBTRegressorSuite.compareAPIs
   import testImplicits._
@@ -91,11 +89,14 @@ class GBTRegressorSuite extends SparkFunSuite with MLlibTestSparkContext
     val model = gbt.fit(df)
 
     MLTestingUtils.checkCopyAndUids(gbt, model)
-    val preds = model.transform(df)
-    val predictions = preds.select("prediction").rdd.map(_.getDouble(0))
-    // Checks based on SPARK-8736 (to ensure it is not doing classification)
-    assert(predictions.max() > 2)
-    assert(predictions.min() < -1)
+
+    testTransformerByGlobalCheckFunc[(Double, Vector)](df, model, "prediction") {
+      case rows: Seq[Row] =>
+        val predictions = rows.map(_.getDouble(0))
+        // Checks based on SPARK-8736 (to ensure it is not doing classification)
+        assert(predictions.max > 2)
+        assert(predictions.min < -1)
+    }
   }
 
   test("Checkpointing") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index df7dee869d058..ef2ff94a5e213 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.ml.feature.{Instance, OffsetInstance}
 import org.apache.spark.ml.feature.{LabeledPoint, RFormula}
 import org.apache.spark.ml.linalg.{BLAS, DenseVector, Vector, Vectors}
 import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.random._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -33,8 +33,7 @@ import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.FloatType
 
-class GeneralizedLinearRegressionSuite
-  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest {
 
   import testImplicits._
 
@@ -268,8 +267,8 @@ class GeneralizedLinearRegressionSuite
           s"$link link and fitIntercept = $fitIntercept.")
 
         val familyLink = FamilyAndLink(trainer)
-        model.transform(dataset).select("features", "prediction", "linkPrediction").collect()
-          .foreach {
+        testTransformer[(Double, Vector)](dataset, model,
+          "features", "prediction", "linkPrediction") {
             case Row(features: DenseVector, prediction1: Double, linkPrediction1: Double) =>
               val eta = BLAS.dot(features, model.coefficients) + model.intercept
               val prediction2 = familyLink.fitted(eta)
@@ -278,7 +277,7 @@ class GeneralizedLinearRegressionSuite
                 s"gaussian family, $link link and fitIntercept = $fitIntercept.")
               assert(linkPrediction1 ~= linkPrediction2 relTol 1E-5, "Link Prediction mismatch: " +
                 s"GLM with gaussian family, $link link and fitIntercept = $fitIntercept.")
-          }
+        }
 
         idx += 1
       }
@@ -384,8 +383,8 @@ class GeneralizedLinearRegressionSuite
           s"$link link and fitIntercept = $fitIntercept.")
 
         val familyLink = FamilyAndLink(trainer)
-        model.transform(dataset).select("features", "prediction", "linkPrediction").collect()
-          .foreach {
+        testTransformer[(Double, Vector)](dataset, model,
+          "features", "prediction", "linkPrediction") {
             case Row(features: DenseVector, prediction1: Double, linkPrediction1: Double) =>
               val eta = BLAS.dot(features, model.coefficients) + model.intercept
               val prediction2 = familyLink.fitted(eta)
@@ -394,7 +393,7 @@ class GeneralizedLinearRegressionSuite
                 s"binomial family, $link link and fitIntercept = $fitIntercept.")
               assert(linkPrediction1 ~= linkPrediction2 relTol 1E-5, "Link Prediction mismatch: " +
                 s"GLM with binomial family, $link link and fitIntercept = $fitIntercept.")
-          }
+        }
 
         idx += 1
       }
@@ -456,8 +455,8 @@ class GeneralizedLinearRegressionSuite
           s"$link link and fitIntercept = $fitIntercept.")
 
         val familyLink = FamilyAndLink(trainer)
-        model.transform(dataset).select("features", "prediction", "linkPrediction").collect()
-          .foreach {
+        testTransformer[(Double, Vector)](dataset, model,
+          "features", "prediction", "linkPrediction") {
             case Row(features: DenseVector, prediction1: Double, linkPrediction1: Double) =>
               val eta = BLAS.dot(features, model.coefficients) + model.intercept
               val prediction2 = familyLink.fitted(eta)
@@ -466,7 +465,7 @@ class GeneralizedLinearRegressionSuite
                 s"poisson family, $link link and fitIntercept = $fitIntercept.")
               assert(linkPrediction1 ~= linkPrediction2 relTol 1E-5, "Link Prediction mismatch: " +
                 s"GLM with poisson family, $link link and fitIntercept = $fitIntercept.")
-          }
+        }
 
         idx += 1
       }
@@ -562,8 +561,8 @@ class GeneralizedLinearRegressionSuite
           s"$link link and fitIntercept = $fitIntercept.")
 
         val familyLink = FamilyAndLink(trainer)
-        model.transform(dataset).select("features", "prediction", "linkPrediction").collect()
-          .foreach {
+        testTransformer[(Double, Vector)](dataset, model,
+          "features", "prediction", "linkPrediction") {
             case Row(features: DenseVector, prediction1: Double, linkPrediction1: Double) =>
               val eta = BLAS.dot(features, model.coefficients) + model.intercept
               val prediction2 = familyLink.fitted(eta)
@@ -572,7 +571,7 @@ class GeneralizedLinearRegressionSuite
                 s"gamma family, $link link and fitIntercept = $fitIntercept.")
               assert(linkPrediction1 ~= linkPrediction2 relTol 1E-5, "Link Prediction mismatch: " +
                 s"GLM with gamma family, $link link and fitIntercept = $fitIntercept.")
-          }
+        }
 
         idx += 1
       }
@@ -649,8 +648,8 @@ class GeneralizedLinearRegressionSuite
         s"and variancePower = $variancePower.")
 
       val familyLink = FamilyAndLink(trainer)
-      model.transform(datasetTweedie).select("features", "prediction", "linkPrediction").collect()
-        .foreach {
+      testTransformer[(Double, Double, Vector)](datasetTweedie, model,
+        "features", "prediction", "linkPrediction") {
           case Row(features: DenseVector, prediction1: Double, linkPrediction1: Double) =>
             val eta = BLAS.dot(features, model.coefficients) + model.intercept
             val prediction2 = familyLink.fitted(eta)
@@ -661,7 +660,8 @@ class GeneralizedLinearRegressionSuite
             assert(linkPrediction1 ~= linkPrediction2 relTol 1E-5, "Link Prediction mismatch: " +
               s"GLM with tweedie family, linkPower = $linkPower, fitIntercept = $fitIntercept " +
               s"and variancePower = $variancePower.")
-        }
+      }
+
       idx += 1
     }
   }
@@ -724,8 +724,8 @@ class GeneralizedLinearRegressionSuite
           s"fitIntercept = $fitIntercept and variancePower = $variancePower.")
 
         val familyLink = FamilyAndLink(trainer)
-        model.transform(datasetTweedie).select("features", "prediction", "linkPrediction").collect()
-          .foreach {
+        testTransformer[(Double, Double, Vector)](datasetTweedie, model,
+          "features", "prediction", "linkPrediction") {
             case Row(features: DenseVector, prediction1: Double, linkPrediction1: Double) =>
               val eta = BLAS.dot(features, model.coefficients) + model.intercept
               val prediction2 = familyLink.fitted(eta)
@@ -736,7 +736,8 @@ class GeneralizedLinearRegressionSuite
               assert(linkPrediction1 ~= linkPrediction2 relTol 1E-5, "Link Prediction mismatch: " +
                 s"GLM with tweedie family, fitIntercept = $fitIntercept " +
                 s"and variancePower = $variancePower.")
-          }
+        }
+
         idx += 1
       }
     }
@@ -861,8 +862,8 @@ class GeneralizedLinearRegressionSuite
           s" and fitIntercept = $fitIntercept.")
 
         val familyLink = FamilyAndLink(trainer)
-        model.transform(dataset).select("features", "offset", "prediction", "linkPrediction")
-          .collect().foreach {
+        testTransformer[(Double, Double, Double, Vector)](dataset, model,
+          "features", "offset", "prediction", "linkPrediction") {
           case Row(features: DenseVector, offset: Double, prediction1: Double,
           linkPrediction1: Double) =>
             val eta = BLAS.dot(features, model.coefficients) + model.intercept + offset
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala
index 180f5f7ce5ab2..18fbbce936a2e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala
@@ -17,15 +17,12 @@
 
 package org.apache.spark.ml.regression
 
-import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.param.ParamsSuite
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
-import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.sql.{DataFrame, Row}
 
-class IsotonicRegressionSuite
-  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class IsotonicRegressionSuite extends MLTest with DefaultReadWriteTest {
 
   import testImplicits._
 
@@ -44,13 +41,11 @@ class IsotonicRegressionSuite
 
     val model = ir.fit(dataset)
 
-    val predictions = model
-      .transform(dataset)
-      .select("prediction").rdd.map { case Row(pred) =>
-        pred
-      }.collect()
-
-    assert(predictions === Array(1, 2, 2, 2, 6, 16.5, 16.5, 17, 18))
+    testTransformerByGlobalCheckFunc[(Double, Double, Double)](dataset, model,
+      "prediction") { case rows: Seq[Row] =>
+      val predictions = rows.map(_.getDouble(0))
+      assert(predictions === Array(1, 2, 2, 2, 6, 16.5, 16.5, 17, 18))
+    }
 
     assert(model.boundaries === Vectors.dense(0, 1, 3, 4, 5, 6, 7, 8))
     assert(model.predictions === Vectors.dense(1, 2, 2, 6, 16.5, 16.5, 17.0, 18.0))
@@ -64,13 +59,11 @@ class IsotonicRegressionSuite
     val model = ir.fit(dataset)
     val features = generatePredictionInput(Seq(-2.0, -1.0, 0.5, 0.75, 1.0, 2.0, 9.0))
 
-    val predictions = model
-      .transform(features)
-      .select("prediction").rdd.map {
-        case Row(pred) => pred
-      }.collect()
-
-    assert(predictions === Array(7, 7, 6, 5.5, 5, 4, 1))
+    testTransformerByGlobalCheckFunc[Tuple1[Double]](features, model,
+      "prediction") { case rows: Seq[Row] =>
+      val predictions = rows.map(_.getDouble(0))
+      assert(predictions === Array(7, 7, 6, 5.5, 5, 4, 1))
+    }
   }
 
   test("params validation") {
@@ -157,13 +150,11 @@ class IsotonicRegressionSuite
 
     val features = generatePredictionInput(Seq(2.0, 3.0, 4.0, 5.0))
 
-    val predictions = model
-      .transform(features)
-      .select("prediction").rdd.map {
-      case Row(pred) => pred
-    }.collect()
-
-    assert(predictions === Array(3.5, 5.0, 5.0, 5.0))
+    testTransformerByGlobalCheckFunc[Tuple1[Double]](features, model,
+      "prediction") { case rows: Seq[Row] =>
+      val predictions = rows.map(_.getDouble(0))
+      assert(predictions === Array(3.5, 5.0, 5.0, 5.0))
+    }
   }
 
   test("read/write") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index 0e0be58dbf022..d42cb1714478f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -19,18 +19,16 @@ package org.apache.spark.ml.regression
 
 import scala.util.Random
 
-import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{DenseVector, Vector, Vectors}
 import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
-import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext}
+import org.apache.spark.mllib.util.LinearDataGenerator
 import org.apache.spark.sql.{DataFrame, Row}
 
-class LinearRegressionSuite
-  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class LinearRegressionSuite extends MLTest with DefaultReadWriteTest {
 
   import testImplicits._
 
@@ -42,6 +40,7 @@ class LinearRegressionSuite
   @transient var datasetWithWeight: DataFrame = _
   @transient var datasetWithWeightConstantLabel: DataFrame = _
   @transient var datasetWithWeightZeroLabel: DataFrame = _
+  @transient var datasetWithOutlier: DataFrame = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
@@ -108,6 +107,16 @@ class LinearRegressionSuite
       Instance(0.0, 3.0, Vectors.dense(2.0, 11.0)),
       Instance(0.0, 4.0, Vectors.dense(3.0, 13.0))
     ), 2).toDF()
+
+    datasetWithOutlier = {
+      val inlierData = LinearDataGenerator.generateLinearInput(
+        intercept = 6.3, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3),
+        xVariance = Array(0.7, 1.2), nPoints = 900, seed, eps = 0.1)
+      val outlierData = LinearDataGenerator.generateLinearInput(
+        intercept = -2.1, weights = Array(0.6, -1.2), xMean = Array(0.9, -1.3),
+        xVariance = Array(1.5, 0.8), nPoints = 100, seed, eps = 0.1)
+      sc.parallelize(inlierData ++ outlierData, 2).map(_.asML).toDF()
+    }
   }
 
   /**
@@ -128,6 +137,10 @@ class LinearRegressionSuite
     datasetWithSparseFeature.rdd.map { case Row(label: Double, features: Vector) =>
       label + "," + features.toArray.mkString(",")
     }.repartition(1).saveAsTextFile("target/tmp/LinearRegressionSuite/datasetWithSparseFeature")
+
+    datasetWithOutlier.rdd.map { case Row(label: Double, features: Vector) =>
+      label + "," + features.toArray.mkString(",")
+    }.repartition(1).saveAsTextFile("target/tmp/LinearRegressionSuite/datasetWithOutlier")
   }
 
   test("params") {
@@ -145,7 +158,9 @@ class LinearRegressionSuite
     assert(lir.getElasticNetParam === 0.0)
     assert(lir.getFitIntercept)
     assert(lir.getStandardization)
-    assert(lir.getSolver == "auto")
+    assert(lir.getSolver === "auto")
+    assert(lir.getLoss === "squaredError")
+    assert(lir.getEpsilon === 1.35)
     val model = lir.fit(datasetWithDenseFeature)
 
     MLTestingUtils.checkCopyAndUids(lir, model)
@@ -161,11 +176,27 @@ class LinearRegressionSuite
     assert(model.getFeaturesCol === "features")
     assert(model.getPredictionCol === "prediction")
     assert(model.intercept !== 0.0)
+    assert(model.scale === 1.0)
     assert(model.hasParent)
     val numFeatures = datasetWithDenseFeature.select("features").first().getAs[Vector](0).size
     assert(model.numFeatures === numFeatures)
   }
 
+  test("linear regression: illegal params") {
+    withClue("LinearRegression with huber loss only supports L2 regularization") {
+      intercept[IllegalArgumentException] {
+        new LinearRegression().setLoss("huber").setElasticNetParam(0.5)
+          .fit(datasetWithDenseFeature)
+      }
+    }
+
+    withClue("LinearRegression with huber loss doesn't support normal solver") {
+      intercept[IllegalArgumentException] {
+        new LinearRegression().setLoss("huber").setSolver("normal").fit(datasetWithDenseFeature)
+      }
+    }
+  }
+
   test("linear regression handles singular matrices") {
     // check for both constant columns with intercept (zero std) and collinear
     val singularDataConstantColumn = sc.parallelize(Seq(
@@ -233,7 +264,8 @@ class LinearRegressionSuite
       assert(model2.intercept ~== interceptR relTol 1E-3)
       assert(model2.coefficients ~= coefficientsR relTol 1E-3)
 
-      model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
+      testTransformer[(Double, Vector)](datasetWithDenseFeature, model1,
+        "features", "prediction") {
         case Row(features: DenseVector, prediction1: Double) =>
           val prediction2 =
             features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
@@ -330,8 +362,8 @@ class LinearRegressionSuite
       assert(model2.intercept ~== interceptR2 relTol 1E-3)
       assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)
 
-      model1.transform(datasetWithDenseFeature).select("features", "prediction")
-        .collect().foreach {
+      testTransformer[(Double, Vector)](datasetWithDenseFeature, model1,
+        "features", "prediction") {
           case Row(features: DenseVector, prediction1: Double) =>
             val prediction2 =
               features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
@@ -383,8 +415,8 @@ class LinearRegressionSuite
       assert(model2.intercept ~== interceptR2 absTol 1E-2)
       assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)
 
-      model1.transform(datasetWithDenseFeature).select("features", "prediction")
-        .collect().foreach {
+      testTransformer[(Double, Vector)](datasetWithDenseFeature, model1,
+        "features", "prediction") {
           case Row(features: DenseVector, prediction1: Double) =>
             val prediction2 =
               features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
@@ -434,7 +466,8 @@ class LinearRegressionSuite
       assert(model2.intercept ~== interceptR2 relTol 1E-2)
       assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)
 
-      model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
+      testTransformer[(Double, Vector)](datasetWithDenseFeature, model1,
+        "features", "prediction") {
         case Row(features: DenseVector, prediction1: Double) =>
           val prediction2 =
             features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
@@ -485,7 +518,8 @@ class LinearRegressionSuite
       assert(model2.intercept ~== interceptR2 absTol 1E-2)
       assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)
 
-      model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
+      testTransformer[(Double, Vector)](datasetWithDenseFeature, model1,
+        "features", "prediction") {
         case Row(features: DenseVector, prediction1: Double) =>
           val prediction2 =
             features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
@@ -537,8 +571,8 @@ class LinearRegressionSuite
       assert(model2.intercept ~== interceptR2 relTol 1E-2)
       assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)
 
-      model1.transform(datasetWithDenseFeature).select("features", "prediction")
-        .collect().foreach {
+      testTransformer[(Double, Vector)](datasetWithDenseFeature, model1,
+        "features", "prediction") {
         case Row(features: DenseVector, prediction1: Double) =>
           val prediction2 =
             features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
@@ -591,8 +625,8 @@ class LinearRegressionSuite
       assert(model2.intercept ~== interceptR2 absTol 1E-2)
       assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)
 
-      model1.transform(datasetWithDenseFeature).select("features", "prediction")
-        .collect().foreach {
+      testTransformer[(Double, Vector)](datasetWithDenseFeature, model1,
+        "features", "prediction") {
         case Row(features: DenseVector, prediction1: Double) =>
           val prediction2 =
             features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
@@ -837,6 +871,7 @@ class LinearRegressionSuite
       (1.0, 0.21, true, true)
     )
 
+    // For squaredError loss
     for (solver <- Seq("auto", "l-bfgs", "normal");
          (elasticNetParam, regParam, fitIntercept, standardization) <- testParams) {
       val estimator = new LinearRegression()
@@ -852,6 +887,22 @@ class LinearRegressionSuite
       MLTestingUtils.testOversamplingVsWeighting[LinearRegressionModel, LinearRegression](
         datasetWithStrongNoise.as[LabeledPoint], estimator, modelEquals, seed)
     }
+
+    // For huber loss
+    for ((_, regParam, fitIntercept, standardization) <- testParams) {
+      val estimator = new LinearRegression()
+        .setLoss("huber")
+        .setFitIntercept(fitIntercept)
+        .setStandardization(standardization)
+        .setRegParam(regParam)
+      MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression](
+        datasetWithOutlier.as[LabeledPoint], estimator, modelEquals)
+      MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression](
+        datasetWithOutlier.as[LabeledPoint], estimator, numClasses, modelEquals,
+        outlierRatio = 3)
+      MLTestingUtils.testOversamplingVsWeighting[LinearRegressionModel, LinearRegression](
+        datasetWithOutlier.as[LabeledPoint], estimator, modelEquals, seed)
+    }
   }
 
   test("linear regression model with l-bfgs with big feature datasets") {
@@ -1004,6 +1055,198 @@ class LinearRegressionSuite
       }
     }
   }
+
+  test("linear regression (huber loss) with intercept without regularization") {
+    val trainer1 = (new LinearRegression).setLoss("huber")
+      .setFitIntercept(true).setStandardization(true)
+    val trainer2 = (new LinearRegression).setLoss("huber")
+      .setFitIntercept(true).setStandardization(false)
+
+    val model1 = trainer1.fit(datasetWithOutlier)
+    val model2 = trainer2.fit(datasetWithOutlier)
+
+    /*
+      Using the following Python code to load the data and train the model using
+      scikit-learn package.
+
+      import pandas as pd
+      import numpy as np
+      from sklearn.linear_model import HuberRegressor
+      df = pd.read_csv("path", header = None)
+      X = df[df.columns[1:3]]
+      y = np.array(df[df.columns[0]])
+      huber = HuberRegressor(fit_intercept=True, alpha=0.0, max_iter=100, epsilon=1.35)
+      huber.fit(X, y)
+
+      >>> huber.coef_
+      array([ 4.68998007,  7.19429011])
+      >>> huber.intercept_
+      6.3002404351083037
+      >>> huber.scale_
+      0.077810159205220747
+     */
+    val coefficientsPy = Vectors.dense(4.68998007, 7.19429011)
+    val interceptPy = 6.30024044
+    val scalePy = 0.07781016
+
+    assert(model1.coefficients ~= coefficientsPy relTol 1E-3)
+    assert(model1.intercept ~== interceptPy relTol 1E-3)
+    assert(model1.scale ~== scalePy relTol 1E-3)
+
+    // Without regularization, with or without standardization will converge to the same solution.
+    assert(model2.coefficients ~= coefficientsPy relTol 1E-3)
+    assert(model2.intercept ~== interceptPy relTol 1E-3)
+    assert(model2.scale ~== scalePy relTol 1E-3)
+  }
+
+  test("linear regression (huber loss) without intercept without regularization") {
+    val trainer1 = (new LinearRegression).setLoss("huber")
+      .setFitIntercept(false).setStandardization(true)
+    val trainer2 = (new LinearRegression).setLoss("huber")
+      .setFitIntercept(false).setStandardization(false)
+
+    val model1 = trainer1.fit(datasetWithOutlier)
+    val model2 = trainer2.fit(datasetWithOutlier)
+
+    /*
+      huber = HuberRegressor(fit_intercept=False, alpha=0.0, max_iter=100, epsilon=1.35)
+      huber.fit(X, y)
+
+      >>> huber.coef_
+      array([ 6.71756703,  5.08873222])
+      >>> huber.intercept_
+      0.0
+      >>> huber.scale_
+      2.5560209922722317
+     */
+    val coefficientsPy = Vectors.dense(6.71756703, 5.08873222)
+    val interceptPy = 0.0
+    val scalePy = 2.55602099
+
+    assert(model1.coefficients ~= coefficientsPy relTol 1E-3)
+    assert(model1.intercept === interceptPy)
+    assert(model1.scale ~== scalePy relTol 1E-3)
+
+    // Without regularization, with or without standardization will converge to the same solution.
+    assert(model2.coefficients ~= coefficientsPy relTol 1E-3)
+    assert(model2.intercept === interceptPy)
+    assert(model2.scale ~== scalePy relTol 1E-3)
+  }
+
+  test("linear regression (huber loss) with intercept with L2 regularization") {
+    val trainer1 = (new LinearRegression).setLoss("huber")
+      .setFitIntercept(true).setRegParam(0.21).setStandardization(true)
+    val trainer2 = (new LinearRegression).setLoss("huber")
+      .setFitIntercept(true).setRegParam(0.21).setStandardization(false)
+
+    val model1 = trainer1.fit(datasetWithOutlier)
+    val model2 = trainer2.fit(datasetWithOutlier)
+
+    /*
+      Since scikit-learn HuberRegressor does not support standardization,
+      we do it manually out of the estimator.
+
+      xStd = np.std(X, axis=0)
+      scaledX = X / xStd
+      huber = HuberRegressor(fit_intercept=True, alpha=210, max_iter=100, epsilon=1.35)
+      huber.fit(scaledX, y)
+
+      >>> np.array(huber.coef_ / xStd)
+      array([ 1.97732633,  3.38816722])
+      >>> huber.intercept_
+      3.7527581430531227
+      >>> huber.scale_
+      3.787363673371801
+     */
+    val coefficientsPy1 = Vectors.dense(1.97732633, 3.38816722)
+    val interceptPy1 = 3.75275814
+    val scalePy1 = 3.78736367
+
+    assert(model1.coefficients ~= coefficientsPy1 relTol 1E-2)
+    assert(model1.intercept ~== interceptPy1 relTol 1E-2)
+    assert(model1.scale ~== scalePy1 relTol 1E-2)
+
+    /*
+      huber = HuberRegressor(fit_intercept=True, alpha=210, max_iter=100, epsilon=1.35)
+      huber.fit(X, y)
+
+      >>> huber.coef_
+      array([ 1.73346444,  3.63746999])
+      >>> huber.intercept_
+      4.3017134790781739
+      >>> huber.scale_
+      3.6472742809286793
+     */
+    val coefficientsPy2 = Vectors.dense(1.73346444, 3.63746999)
+    val interceptPy2 = 4.30171347
+    val scalePy2 = 3.64727428
+
+    assert(model2.coefficients ~= coefficientsPy2 relTol 1E-3)
+    assert(model2.intercept ~== interceptPy2 relTol 1E-3)
+    assert(model2.scale ~== scalePy2 relTol 1E-3)
+  }
+
+  test("linear regression (huber loss) without intercept with L2 regularization") {
+    val trainer1 = (new LinearRegression).setLoss("huber")
+      .setFitIntercept(false).setRegParam(0.21).setStandardization(true)
+    val trainer2 = (new LinearRegression).setLoss("huber")
+      .setFitIntercept(false).setRegParam(0.21).setStandardization(false)
+
+    val model1 = trainer1.fit(datasetWithOutlier)
+    val model2 = trainer2.fit(datasetWithOutlier)
+
+    /*
+      Since scikit-learn HuberRegressor does not support standardization,
+      we do it manually out of the estimator.
+
+      xStd = np.std(X, axis=0)
+      scaledX = X / xStd
+      huber = HuberRegressor(fit_intercept=False, alpha=210, max_iter=100, epsilon=1.35)
+      huber.fit(scaledX, y)
+
+      >>> np.array(huber.coef_ / xStd)
+      array([ 2.59679008,  2.26973102])
+      >>> huber.intercept_
+      0.0
+      >>> huber.scale_
+      4.5766311924091791
+     */
+    val coefficientsPy1 = Vectors.dense(2.59679008, 2.26973102)
+    val interceptPy1 = 0.0
+    val scalePy1 = 4.57663119
+
+    assert(model1.coefficients ~= coefficientsPy1 relTol 1E-2)
+    assert(model1.intercept === interceptPy1)
+    assert(model1.scale ~== scalePy1 relTol 1E-2)
+
+    /*
+      huber = HuberRegressor(fit_intercept=False, alpha=210, max_iter=100, epsilon=1.35)
+      huber.fit(X, y)
+
+      >>> huber.coef_
+      array([ 2.28423908,  2.25196887])
+      >>> huber.intercept_
+      0.0
+      >>> huber.scale_
+      4.5979643506051753
+     */
+    val coefficientsPy2 = Vectors.dense(2.28423908, 2.25196887)
+    val interceptPy2 = 0.0
+    val scalePy2 = 4.59796435
+
+    assert(model2.coefficients ~= coefficientsPy2 relTol 1E-3)
+    assert(model2.intercept === interceptPy2)
+    assert(model2.scale ~== scalePy2 relTol 1E-3)
+  }
+
+  test("huber loss model match squared error for large epsilon") {
+    val trainer1 = new LinearRegression().setLoss("huber").setEpsilon(1E5)
+    val model1 = trainer1.fit(datasetWithOutlier)
+    val trainer2 = new LinearRegression()
+    val model2 = trainer2.fit(datasetWithOutlier)
+    assert(model1.coefficients ~== model2.coefficients relTol 1E-3)
+    assert(model1.intercept ~== model2.intercept relTol 1E-3)
+  }
 }
 
 object LinearRegressionSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
index 1ea851ef2d676..5e4f402989697 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
@@ -17,16 +17,13 @@
 
 package org.apache.spark.ml.stat
 
-import org.scalatest.exceptions.TestFailedException
-
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}
 import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, Statistics}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.{DataFrame, Row}
-import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.Row
 
 class SummarizerSuite extends SparkFunSuite with MLlibTestSparkContext {
 
@@ -35,237 +32,262 @@ class SummarizerSuite extends SparkFunSuite with MLlibTestSparkContext {
   import SummaryBuilderImpl._
 
   private case class ExpectedMetrics(
-      mean: Seq[Double],
-      variance: Seq[Double],
+      mean: Vector,
+      variance: Vector,
       count: Long,
-      numNonZeros: Seq[Long],
-      max: Seq[Double],
-      min: Seq[Double],
-      normL2: Seq[Double],
-      normL1: Seq[Double])
+      numNonZeros: Vector,
+      max: Vector,
+      min: Vector,
+      normL2: Vector,
+      normL1: Vector)
 
   /**
-   * The input is expected to be either a sparse vector, a dense vector or an array of doubles
-   * (which will be converted to a dense vector)
-   * The expected is the list of all the known metrics.
+   * The input is expected to be either a sparse vector, a dense vector.
    *
-   * The tests take an list of input vectors and a list of all the summary values that
-   * are expected for this input. They currently test against some fixed subset of the
-   * metrics, but should be made fuzzy in the future.
+   * The tests take an list of input vectors, and compare results with
+   * `mllib.stat.MultivariateOnlineSummarizer`. They currently test against some fixed subset
+   * of the metrics, but should be made fuzzy in the future.
    */
-  private def testExample(name: String, input: Seq[Any], exp: ExpectedMetrics): Unit = {
+  private def testExample(name: String, inputVec: Seq[(Vector, Double)],
+      exp: ExpectedMetrics, expWithoutWeight: ExpectedMetrics): Unit = {
 
-    def inputVec: Seq[Vector] = input.map {
-      case x: Array[Double @unchecked] => Vectors.dense(x)
-      case x: Seq[Double @unchecked] => Vectors.dense(x.toArray)
-      case x: Vector => x
-      case x => throw new Exception(x.toString)
+    val summarizer = {
+      val _summarizer = new MultivariateOnlineSummarizer
+      inputVec.foreach(v => _summarizer.add(OldVectors.fromML(v._1), v._2))
+      _summarizer
     }
 
-    val summarizer = {
+    val summarizerWithoutWeight = {
       val _summarizer = new MultivariateOnlineSummarizer
-      inputVec.foreach(v => _summarizer.add(OldVectors.fromML(v)))
+      inputVec.foreach(v => _summarizer.add(OldVectors.fromML(v._1)))
       _summarizer
     }
 
     // Because the Spark context is reset between tests, we cannot hold a reference onto it.
     def wrappedInit() = {
-      val df = inputVec.map(Tuple1.apply).toDF("features")
-      val col = df.col("features")
-      (df, col)
+      val df = inputVec.toDF("features", "weight")
+      val featuresCol = df.col("features")
+      val weightCol = df.col("weight")
+      (df, featuresCol, weightCol)
     }
 
     registerTest(s"$name - mean only") {
-      val (df, c) = wrappedInit()
-      compare(df.select(metrics("mean").summary(c), mean(c)), Seq(Row(exp.mean), summarizer.mean))
+      val (df, c, w) = wrappedInit()
+      compareRow(df.select(metrics("mean").summary(c, w), mean(c, w)).first(),
+        Row(Row(summarizer.mean), exp.mean))
     }
 
-    registerTest(s"$name - mean only (direct)") {
-      val (df, c) = wrappedInit()
-      compare(df.select(mean(c)), Seq(exp.mean))
+    registerTest(s"$name - mean only w/o weight") {
+      val (df, c, _) = wrappedInit()
+      compareRow(df.select(metrics("mean").summary(c), mean(c)).first(),
+        Row(Row(summarizerWithoutWeight.mean), expWithoutWeight.mean))
     }
 
     registerTest(s"$name - variance only") {
-      val (df, c) = wrappedInit()
-      compare(df.select(metrics("variance").summary(c), variance(c)),
-        Seq(Row(exp.variance), summarizer.variance))
+      val (df, c, w) = wrappedInit()
+      compareRow(df.select(metrics("variance").summary(c, w), variance(c, w)).first(),
+        Row(Row(summarizer.variance), exp.variance))
     }
 
-    registerTest(s"$name - variance only (direct)") {
-      val (df, c) = wrappedInit()
-      compare(df.select(variance(c)), Seq(summarizer.variance))
+    registerTest(s"$name - variance only w/o weight") {
+      val (df, c, _) = wrappedInit()
+      compareRow(df.select(metrics("variance").summary(c), variance(c)).first(),
+        Row(Row(summarizerWithoutWeight.variance), expWithoutWeight.variance))
     }
 
     registerTest(s"$name - count only") {
-      val (df, c) = wrappedInit()
-      compare(df.select(metrics("count").summary(c), count(c)),
-        Seq(Row(exp.count), exp.count))
+      val (df, c, w) = wrappedInit()
+      compareRow(df.select(metrics("count").summary(c, w), count(c, w)).first(),
+        Row(Row(summarizer.count), exp.count))
     }
 
-    registerTest(s"$name - count only (direct)") {
-      val (df, c) = wrappedInit()
-      compare(df.select(count(c)),
-        Seq(exp.count))
+    registerTest(s"$name - count only w/o weight") {
+      val (df, c, _) = wrappedInit()
+      compareRow(df.select(metrics("count").summary(c), count(c)).first(),
+        Row(Row(summarizerWithoutWeight.count), expWithoutWeight.count))
     }
 
     registerTest(s"$name - numNonZeros only") {
-      val (df, c) = wrappedInit()
-      compare(df.select(metrics("numNonZeros").summary(c), numNonZeros(c)),
-        Seq(Row(exp.numNonZeros), exp.numNonZeros))
+      val (df, c, w) = wrappedInit()
+      compareRow(df.select(metrics("numNonZeros").summary(c, w), numNonZeros(c, w)).first(),
+        Row(Row(summarizer.numNonzeros), exp.numNonZeros))
     }
 
-    registerTest(s"$name - numNonZeros only (direct)") {
-      val (df, c) = wrappedInit()
-      compare(df.select(numNonZeros(c)),
-        Seq(exp.numNonZeros))
+    registerTest(s"$name - numNonZeros only w/o weight") {
+      val (df, c, _) = wrappedInit()
+      compareRow(df.select(metrics("numNonZeros").summary(c), numNonZeros(c)).first(),
+        Row(Row(summarizerWithoutWeight.numNonzeros), expWithoutWeight.numNonZeros))
     }
 
     registerTest(s"$name - min only") {
-      val (df, c) = wrappedInit()
-      compare(df.select(metrics("min").summary(c), min(c)),
-        Seq(Row(exp.min), exp.min))
+      val (df, c, w) = wrappedInit()
+      compareRow(df.select(metrics("min").summary(c, w), min(c, w)).first(),
+        Row(Row(summarizer.min), exp.min))
+    }
+
+    registerTest(s"$name - min only w/o weight") {
+      val (df, c, _) = wrappedInit()
+      compareRow(df.select(metrics("min").summary(c), min(c)).first(),
+        Row(Row(summarizerWithoutWeight.min), expWithoutWeight.min))
     }
 
     registerTest(s"$name - max only") {
-      val (df, c) = wrappedInit()
-      compare(df.select(metrics("max").summary(c), max(c)),
-        Seq(Row(exp.max), exp.max))
+      val (df, c, w) = wrappedInit()
+      compareRow(df.select(metrics("max").summary(c, w), max(c, w)).first(),
+        Row(Row(summarizer.max), exp.max))
     }
 
-    registerTest(s"$name - normL1 only") {
-      val (df, c) = wrappedInit()
-      compare(df.select(metrics("normL1").summary(c), normL1(c)),
-        Seq(Row(exp.normL1), exp.normL1))
+    registerTest(s"$name - max only w/o weight") {
+      val (df, c, _) = wrappedInit()
+      compareRow(df.select(metrics("max").summary(c), max(c)).first(),
+        Row(Row(summarizerWithoutWeight.max), expWithoutWeight.max))
     }
 
-    registerTest(s"$name - normL2 only") {
-      val (df, c) = wrappedInit()
-      compare(df.select(metrics("normL2").summary(c), normL2(c)),
-        Seq(Row(exp.normL2), exp.normL2))
+    registerTest(s"$name - normL1 only") {
+      val (df, c, w) = wrappedInit()
+      compareRow(df.select(metrics("normL1").summary(c, w), normL1(c, w)).first(),
+        Row(Row(summarizer.normL1), exp.normL1))
     }
 
-    registerTest(s"$name - all metrics at once") {
-      val (df, c) = wrappedInit()
-      compare(df.select(
-        metrics("mean", "variance", "count", "numNonZeros").summary(c),
-        mean(c), variance(c), count(c), numNonZeros(c)),
-        Seq(Row(exp.mean, exp.variance, exp.count, exp.numNonZeros),
-          exp.mean, exp.variance, exp.count, exp.numNonZeros))
+    registerTest(s"$name - normL1 only w/o weight") {
+      val (df, c, _) = wrappedInit()
+      compareRow(df.select(metrics("normL1").summary(c), normL1(c)).first(),
+        Row(Row(summarizerWithoutWeight.normL1), expWithoutWeight.normL1))
     }
-  }
 
-  private def denseData(input: Seq[Seq[Double]]): DataFrame = {
-    input.map(_.toArray).map(Vectors.dense).map(Tuple1.apply).toDF("features")
-  }
+    registerTest(s"$name - normL2 only") {
+      val (df, c, w) = wrappedInit()
+      compareRow(df.select(metrics("normL2").summary(c, w), normL2(c, w)).first(),
+        Row(Row(summarizer.normL2), exp.normL2))
+    }
 
-  private def compare(df: DataFrame, exp: Seq[Any]): Unit = {
-    val coll = df.collect().toSeq
-    val Seq(row) = coll
-    val res = row.toSeq
-    val names = df.schema.fieldNames.zipWithIndex.map { case (n, idx) => s"$n ($idx)" }
-    assert(res.size === exp.size, (res.size, exp.size))
-    for (((x1, x2), name) <- res.zip(exp).zip(names)) {
-      compareStructures(x1, x2, name)
+    registerTest(s"$name - normL2 only w/o weight") {
+      val (df, c, _) = wrappedInit()
+      compareRow(df.select(metrics("normL2").summary(c), normL2(c)).first(),
+        Row(Row(summarizerWithoutWeight.normL2), expWithoutWeight.normL2))
     }
-  }
 
-  // Compares structured content.
-  private def compareStructures(x1: Any, x2: Any, name: String): Unit = (x1, x2) match {
-    case (y1: Seq[Double @unchecked], v1: OldVector) =>
-      compareStructures(y1, v1.toArray.toSeq, name)
-    case (d1: Double, d2: Double) =>
-      assert2(Vectors.dense(d1) ~== Vectors.dense(d2) absTol 1e-4, name)
-    case (r1: GenericRowWithSchema, r2: Row) =>
-      assert(r1.size === r2.size, (r1, r2))
-      for (((fname, x1), x2) <- r1.schema.fieldNames.zip(r1.toSeq).zip(r2.toSeq)) {
-        compareStructures(x1, x2, s"$name.$fname")
-      }
-    case (r1: Row, r2: Row) =>
-      assert(r1.size === r2.size, (r1, r2))
-      for ((x1, x2) <- r1.toSeq.zip(r2.toSeq)) { compareStructures(x1, x2, name) }
-    case (v1: Vector, v2: Vector) =>
-      assert2(v1 ~== v2 absTol 1e-4, name)
-    case (l1: Long, l2: Long) => assert(l1 === l2)
-    case (s1: Seq[_], s2: Seq[_]) =>
-      assert(s1.size === s2.size, s"$name ${(s1, s2)}")
-      for (((x1, idx), x2) <- s1.zipWithIndex.zip(s2)) {
-        compareStructures(x1, x2, s"$name.$idx")
-      }
-    case (arr1: Array[_], arr2: Array[_]) =>
-      assert(arr1.toSeq === arr2.toSeq)
-    case _ => throw new Exception(s"$name: ${x1.getClass} ${x2.getClass} $x1 $x2")
-  }
+    registerTest(s"$name - multiple metrics at once") {
+      val (df, c, w) = wrappedInit()
+      compareRow(df.select(
+        metrics("mean", "variance", "count", "numNonZeros").summary(c, w)).first(),
+        Row(Row(exp.mean, exp.variance, exp.count, exp.numNonZeros))
+      )
+    }
 
-  private def assert2(x: => Boolean, hint: String): Unit = {
-    try {
-      assert(x, hint)
-    } catch {
-      case tfe: TestFailedException =>
-        throw new TestFailedException(Some(s"Failure with hint $hint"), Some(tfe), 1)
+    registerTest(s"$name - multiple metrics at once w/o weight") {
+      val (df, c, _) = wrappedInit()
+      compareRow(df.select(
+        metrics("mean", "variance", "count", "numNonZeros").summary(c)).first(),
+        Row(Row(expWithoutWeight.mean, expWithoutWeight.variance,
+          expWithoutWeight.count, expWithoutWeight.numNonZeros))
+      )
     }
   }
 
-  test("debugging test") {
-    val df = denseData(Nil)
-    val c = df.col("features")
-    val c1 = metrics("mean").summary(c)
-    val res = df.select(c1)
-    intercept[SparkException] {
-      compare(res, Seq.empty)
+  private def compareRow(r1: Row, r2: Row): Unit = {
+    assert(r1.size === r2.size, (r1, r2))
+    r1.toSeq.zip(r2.toSeq).foreach {
+      case (v1: Vector, v2: Vector) =>
+        assert(v1 ~== v2 absTol 1e-4)
+      case (v1: Vector, v2: OldVector) =>
+        assert(v1 ~== v2.asML absTol 1e-4)
+      case (l1: Long, l2: Long) =>
+        assert(l1 === l2)
+      case (r1: Row, r2: Row) =>
+        compareRow(r1, r2)
+      case (x1: Any, x2: Any) =>
+        throw new Exception(s"type mismatch: ${x1.getClass} ${x2.getClass} $x1 $x2")
     }
   }
 
-  test("basic error handling") {
-    val df = denseData(Nil)
+  test("no element") {
+    val df = Seq[Tuple1[Vector]]().toDF("features")
     val c = df.col("features")
-    val res = df.select(metrics("mean").summary(c), mean(c))
     intercept[SparkException] {
-      compare(res, Seq.empty)
+      df.select(metrics("mean").summary(c), mean(c)).first()
     }
+    compareRow(df.select(metrics("count").summary(c), count(c)).first(),
+      Row(Row(0L), 0L))
   }
 
-  test("no element, working metrics") {
-    val df = denseData(Nil)
-    val c = df.col("features")
-    val res = df.select(metrics("count").summary(c), count(c))
-    compare(res, Seq(Row(0L), 0L))
-  }
+  val singleElem = Vectors.dense(0.0, 1.0, 2.0)
+  testExample("single element", Seq((singleElem, 2.0)),
+    ExpectedMetrics(
+      mean = singleElem,
+      variance = Vectors.dense(0.0, 0.0, 0.0),
+      count = 1L,
+      numNonZeros = Vectors.dense(0.0, 1.0, 1.0),
+      max = singleElem,
+      min = singleElem,
+      normL1 = Vectors.dense(0.0, 2.0, 4.0),
+      normL2 = Vectors.dense(0.0, 1.414213, 2.828427)
+    ),
+    ExpectedMetrics(
+      mean = singleElem,
+      variance = Vectors.dense(0.0, 0.0, 0.0),
+      count = 1L,
+      numNonZeros = Vectors.dense(0.0, 1.0, 1.0),
+      max = singleElem,
+      min = singleElem,
+      normL1 = singleElem,
+      normL2 = singleElem
+    )
+  )
+
+  testExample("multiple elements (dense)",
+    Seq(
+      (Vectors.dense(-1.0, 0.0, 6.0), 0.5),
+      (Vectors.dense(3.0, -3.0, 0.0), 2.8),
+      (Vectors.dense(1.0, -3.0, 0.0), 0.0)
+    ),
+    ExpectedMetrics(
+      mean = Vectors.dense(2.393939, -2.545454, 0.909090),
+      variance = Vectors.dense(8.0, 4.5, 18.0),
+      count = 2L,
+      numNonZeros = Vectors.dense(2.0, 1.0, 1.0),
+      max = Vectors.dense(3.0, 0.0, 6.0),
+      min = Vectors.dense(-1.0, -3.0, 0.0),
+      normL1 = Vectors.dense(8.9, 8.4, 3.0),
+      normL2 = Vectors.dense(5.069516, 5.019960, 4.242640)
+    ),
+    ExpectedMetrics(
+      mean = Vectors.dense(1.0, -2.0, 2.0),
+      variance = Vectors.dense(4.0, 3.0, 12.0),
+      count = 3L,
+      numNonZeros = Vectors.dense(3.0, 2.0, 1.0),
+      max = Vectors.dense(3.0, 0.0, 6.0),
+      min = Vectors.dense(-1.0, -3.0, 0.0),
+      normL1 = Vectors.dense(5.0, 6.0, 6.0),
+      normL2 = Vectors.dense(3.316624, 4.242640, 6.0)
+    )
+  )
 
-  val singleElem = Seq(0.0, 1.0, 2.0)
-  testExample("single element", Seq(singleElem), ExpectedMetrics(
-    mean = singleElem,
-    variance = Seq(0.0, 0.0, 0.0),
-    count = 1,
-    numNonZeros = Seq(0, 1, 1),
-    max = singleElem,
-    min = singleElem,
-    normL1 = singleElem,
-    normL2 = singleElem
-  ))
-
-  testExample("two elements", Seq(Seq(0.0, 1.0, 2.0), Seq(0.0, -1.0, -2.0)), ExpectedMetrics(
-    mean = Seq(0.0, 0.0, 0.0),
-    // TODO: I have a doubt about these values, they are not normalized.
-    variance = Seq(0.0, 2.0, 8.0),
-    count = 2,
-    numNonZeros = Seq(0, 2, 2),
-    max = Seq(0.0, 1.0, 2.0),
-    min = Seq(0.0, -1.0, -2.0),
-    normL1 = Seq(0.0, 2.0, 4.0),
-    normL2 = Seq(0.0, math.sqrt(2.0), math.sqrt(2.0) * 2.0)
-  ))
-
-  testExample("dense vector input",
-    Seq(Seq(-1.0, 0.0, 6.0), Seq(3.0, -3.0, 0.0)),
+  testExample("multiple elements (sparse)",
+    Seq(
+      (Vectors.dense(-1.0, 0.0, 6.0).toSparse, 0.5),
+      (Vectors.dense(3.0, -3.0, 0.0).toSparse, 2.8),
+      (Vectors.dense(1.0, -3.0, 0.0).toSparse, 0.0)
+    ),
+    ExpectedMetrics(
+      mean = Vectors.dense(2.393939, -2.545454, 0.909090),
+      variance = Vectors.dense(8.0, 4.5, 18.0),
+      count = 2L,
+      numNonZeros = Vectors.dense(2.0, 1.0, 1.0),
+      max = Vectors.dense(3.0, 0.0, 6.0),
+      min = Vectors.dense(-1.0, -3.0, 0.0),
+      normL1 = Vectors.dense(8.9, 8.4, 3.0),
+      normL2 = Vectors.dense(5.069516, 5.019960, 4.242640)
+    ),
     ExpectedMetrics(
-      mean = Seq(1.0, -1.5, 3.0),
-      variance = Seq(8.0, 4.5, 18.0),
-      count = 2,
-      numNonZeros = Seq(2, 1, 1),
-      max = Seq(3.0, 0.0, 6.0),
-      min = Seq(-1.0, -3, 0.0),
-      normL1 = Seq(4.0, 3.0, 6.0),
-      normL2 = Seq(math.sqrt(10), 3, 6.0)
+      mean = Vectors.dense(1.0, -2.0, 2.0),
+      variance = Vectors.dense(4.0, 3.0, 12.0),
+      count = 3L,
+      numNonZeros = Vectors.dense(3.0, 2.0, 1.0),
+      max = Vectors.dense(3.0, 0.0, 6.0),
+      min = Vectors.dense(-1.0, -3.0, 0.0),
+      normL1 = Vectors.dense(5.0, 6.0, 6.0),
+      normL2 = Vectors.dense(3.316624, 4.242640, 6.0)
     )
   )
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala
new file mode 100644
index 0000000000000..f41abe48f2c58
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.tree.impl
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.serializer.KryoSerializer
+
+class TreePointSuite extends SparkFunSuite {
+  test("Kryo class register") {
+    val conf = new SparkConf(false)
+    conf.set("spark.kryo.registrationRequired", "true")
+
+    val ser = new KryoSerializer(conf).newInstance()
+
+    val point = new TreePoint(1.0, Array(1, 2, 3))
+    val point2 = ser.deserialize[TreePoint](ser.serialize(point))
+    assert(point.label === point2.label)
+    assert(point.binnedFeatures === point2.binnedFeatures)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
new file mode 100644
index 0000000000000..17678aa611a48
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.util
+
+import java.io.File
+
+import org.scalatest.Suite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.ml.{PipelineModel, Transformer}
+import org.apache.spark.sql.{DataFrame, Encoder, Row}
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.sql.test.TestSparkSession
+import org.apache.spark.util.Utils
+
+trait MLTest extends StreamTest with TempDirectory { self: Suite =>
+
+  @transient var sc: SparkContext = _
+  @transient var checkpointDir: String = _
+
+  protected override def createSparkSession: TestSparkSession = {
+    new TestSparkSession(new SparkContext("local[2]", "MLlibUnitTest", sparkConf))
+  }
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    sc = spark.sparkContext
+    checkpointDir = Utils.createDirectory(tempDir.getCanonicalPath, "checkpoints").toString
+    sc.setCheckpointDir(checkpointDir)
+  }
+
+  override def afterAll() {
+    try {
+      Utils.deleteRecursively(new File(checkpointDir))
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  private[util] def testTransformerOnStreamData[A : Encoder](
+      dataframe: DataFrame,
+      transformer: Transformer,
+      firstResultCol: String,
+      otherResultCols: String*)
+      (globalCheckFunction: Seq[Row] => Unit): Unit = {
+
+    val columnNames = dataframe.schema.fieldNames
+    val stream = MemoryStream[A]
+    val streamDF = stream.toDS().toDF(columnNames: _*)
+
+    val data = dataframe.as[A].collect()
+
+    val streamOutput = transformer.transform(streamDF)
+      .select(firstResultCol, otherResultCols: _*)
+    testStream(streamOutput) (
+      AddData(stream, data: _*),
+      CheckAnswer(globalCheckFunction)
+    )
+  }
+
+  private[util] def testTransformerOnDF(
+      dataframe: DataFrame,
+      transformer: Transformer,
+      firstResultCol: String,
+      otherResultCols: String*)
+      (globalCheckFunction: Seq[Row] => Unit): Unit = {
+    val dfOutput = transformer.transform(dataframe)
+    val outputs = dfOutput.select(firstResultCol, otherResultCols: _*).collect()
+    globalCheckFunction(outputs)
+  }
+
+  def testTransformer[A : Encoder](
+      dataframe: DataFrame,
+      transformer: Transformer,
+      firstResultCol: String,
+      otherResultCols: String*)
+      (checkFunction: Row => Unit): Unit = {
+    testTransformerByGlobalCheckFunc(
+      dataframe,
+      transformer,
+      firstResultCol,
+      otherResultCols: _*) { rows: Seq[Row] => rows.foreach(checkFunction(_)) }
+  }
+
+  def testTransformerByGlobalCheckFunc[A : Encoder](
+      dataframe: DataFrame,
+      transformer: Transformer,
+      firstResultCol: String,
+      otherResultCols: String*)
+      (globalCheckFunction: Seq[Row] => Unit): Unit = {
+    testTransformerOnStreamData(dataframe, transformer, firstResultCol,
+      otherResultCols: _*)(globalCheckFunction)
+    testTransformerOnDF(dataframe, transformer, firstResultCol,
+      otherResultCols: _*)(globalCheckFunction)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala
new file mode 100644
index 0000000000000..20c5b5395f6a4
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.util
+
+import org.apache.spark.ml.feature.StringIndexer
+import org.apache.spark.sql.Row
+
+class MLTestSuite extends MLTest {
+
+  import testImplicits._
+
+  test("test transformer on stream data") {
+
+    val data = Seq((0, "a"), (1, "b"), (2, "c"), (3, "d"), (4, "e"), (5, "f"))
+      .toDF("id", "label")
+    val indexer = new StringIndexer().setStringOrderType("alphabetAsc")
+      .setInputCol("label").setOutputCol("indexed")
+    val indexerModel = indexer.fit(data)
+    testTransformer[(Int, String)](data, indexerModel, "id", "indexed") {
+      case Row(id: Int, indexed: Double) =>
+        assert(id === indexed.toInt)
+    }
+    testTransformerByGlobalCheckFunc[(Int, String)] (data, indexerModel, "id", "indexed") { rows =>
+      assert(rows.map(_.getDouble(1)).max === 5.0)
+    }
+
+    intercept[Exception] {
+      testTransformerOnStreamData[(Int, String)](data, indexerModel, "id", "indexed") {
+        case Row(id: Int, indexed: Double) =>
+          assert(id != indexed.toInt)
+      }
+    }
+    intercept[Exception] {
+      testTransformerOnStreamData[(Int, String)](data, indexerModel, "id", "indexed") {
+        rows: Seq[Row] =>
+          assert(rows.map(_.getDouble(1)).max === 1.0)
+      }
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index 48bd41dc3e3bf..00d7e2f2d3864 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -19,10 +19,11 @@ package org.apache.spark.mllib.clustering
 
 import scala.util.Random
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
 import org.apache.spark.mllib.util.TestingUtils._
+import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.util.Utils
 
 class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext {
@@ -311,6 +312,21 @@ class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(returnModel.clusterCenters(1) === initialModel.clusterCenters(1))
   }
 
+  test("Kryo class register") {
+    val conf = new SparkConf(false)
+    conf.set("spark.kryo.registrationRequired", "true")
+
+    val ser = new KryoSerializer(conf).newInstance()
+
+    val vec1 = new VectorWithNorm(Vectors.dense(Array(1.0, 2.0)))
+    val vec2 = new VectorWithNorm(Vectors.sparse(10, Array(5, 8), Array(1.0, 2.0)))
+
+    Seq(vec1, vec2).foreach { v =>
+      val v2 = ser.deserialize[VectorWithNorm](ser.serialize(v))
+      assert(v2.norm === v.norm)
+      assert(v2.vector === v.vector)
+    }
+  }
 }
 
 object KMeansSuite extends SparkFunSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
index 252a068dcd72f..c1449ece740d4 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.mllib.regression
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.ml.feature.{LabeledPoint => NewLabeledPoint}
 import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.serializer.KryoSerializer
 
 class LabeledPointSuite extends SparkFunSuite {
 
@@ -53,4 +54,19 @@ class LabeledPointSuite extends SparkFunSuite {
       assert(p1 === LabeledPoint.fromML(p2))
     }
   }
+
+  test("Kryo class register") {
+    val conf = new SparkConf(false)
+    conf.set("spark.kryo.registrationRequired", "true")
+
+    val ser = new KryoSerializer(conf).newInstance()
+
+    val labeled1 = LabeledPoint(1.0, Vectors.dense(Array(1.0, 2.0)))
+    val labeled2 = LabeledPoint(1.0, Vectors.sparse(10, Array(5, 7), Array(1.0, 2.0)))
+
+    Seq(labeled1, labeled2).foreach { l =>
+      val l2 = ser.deserialize[LabeledPoint](ser.serialize(l))
+      assert(l === l2)
+    }
+  }
 }
diff --git a/pom.xml b/pom.xml
index dd1847faee402..8e0d76a3f410b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.3.0-SNAPSHOT</version>
+  <version>2.4.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
@@ -132,11 +132,11 @@
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.9.1-palantir3</parquet.version>
     <feign.version>8.18.0</feign.version>
-    <okhttp3.version>3.9.0</okhttp3.version>
+    <okhttp3.version>3.9.1</okhttp3.version>
     <okhttp.version>2.7.5</okhttp.version>
     <okio.version>1.13.0</okio.version>
     <retrofit.version>2.3.0</retrofit.version>
-    <bouncycastle.version>1.54</bouncycastle.version>
+    <bouncycastle.version>1.58</bouncycastle.version>
     <jetty.version>9.4.8.v20171121</jetty.version>
     <orc.version>1.4.1</orc.version>
     <orc.classifier>nohive</orc.classifier>
@@ -149,7 +149,7 @@
     <spark-influx-sink.version>0.3.12</spark-influx-sink.version>
     <avro.version>1.8.1</avro.version>
     <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
-    <jets3t.version>0.9.3</jets3t.version>
+    <jets3t.version>0.9.4</jets3t.version>
     <aws.kinesis.client.version>1.7.3</aws.kinesis.client.version>
     <!-- Should be consistent with Kinesis client dependency -->
     <aws.java.sdk.version>1.11.76</aws.java.sdk.version>
@@ -158,8 +158,8 @@
     <commons-beanutils.version>1.9.3</commons-beanutils.version>
     <commons-logging.version>1.2</commons-logging.version>
     <!--  org.apache.httpcomponents/httpclient-->
-    <commons.httpclient.version>4.5.3</commons.httpclient.version>
-    <commons.httpcore.version>4.4.6</commons.httpcore.version>
+    <commons.httpclient.version>4.5.4</commons.httpclient.version>
+    <commons.httpcore.version>4.4.8</commons.httpcore.version>
     <!--  commons-httpclient/commons-httpclient-->
     <httpclient.classic.version>3.1</httpclient.classic.version>
     <commons.math3.version>3.4.1</commons.math3.version>
@@ -184,7 +184,7 @@
     <datanucleus-core.version>3.2.10</datanucleus-core.version>
     <javax-activation.version>1.1.1</javax-activation.version>
     <objenesis.version>2.5.1</objenesis.version>
-    <janino.version>3.0.0</janino.version>
+    <janino.version>3.0.8</janino.version>
     <jersey.version>2.25.1</jersey.version>
     <joda.version>2.9.9</joda.version>
     <jodd.version>3.5.2</jodd.version>
@@ -200,7 +200,7 @@
     <paranamer.version>2.8</paranamer.version>
     <maven-antrun.version>1.8</maven-antrun.version>
     <commons-crypto.version>1.0.0</commons-crypto.version>
-    <arrow.version>0.4.0</arrow.version>
+    <arrow.version>0.8.0</arrow.version>
 
     <test.java.home>${java.home}</test.java.home>
     <test.exclude.tags></test.exclude.tags>
@@ -411,6 +411,7 @@
       <dependency>
         <groupId>org.bouncycastle</groupId>
         <artifactId>bcprov-jdk15on</artifactId>
+        <!-- brought in by jets3t; see SPARK-22634 -->
         <version>${bouncycastle.version}</version>
       </dependency>
       <!-- This artifact is a shaded version of ASM 5.0.4. The POM that was used to produce this
@@ -709,7 +710,7 @@
       <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty-all</artifactId>
-        <version>4.0.50.Final</version>
+        <version>4.1.17.Final</version>
       </dependency>
       <dependency>
         <groupId>io.netty</groupId>
@@ -916,7 +917,13 @@
       <dependency>
         <groupId>org.json4s</groupId>
         <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
-        <version>3.2.11</version>
+        <version>3.5.3</version>
+        <exclusions>
+          <exclusion>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>org.scala-lang</groupId>
@@ -943,11 +950,6 @@
         <artifactId>scala-parser-combinators_${scala.binary.version}</artifactId>
         <version>1.0.4</version>
       </dependency>
-      <dependency>
-        <groupId>org.scala-lang</groupId>
-        <artifactId>scalap</artifactId>
-        <version>${scala.version}</version>
-      </dependency>
       <!-- SPARK-16770 affecting Scala 2.11.x -->
       <dependency>
         <groupId>jline</groupId>
@@ -2240,6 +2242,14 @@
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-databind</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-buffer</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-common</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>io.netty</groupId>
             <artifactId>netty-handler</artifactId>
@@ -2555,6 +2565,9 @@
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-assembly-plugin</artifactId>
           <version>3.1.0</version>
+          <configuration>
+            <tarLongFileMode>posix</tarLongFileMode>
+          </configuration>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index 2ef0e7b40d940..adde213e361f0 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -88,7 +88,7 @@ object MimaBuild {
 
   def mimaSettings(sparkHome: File, projectRef: ProjectRef) = {
     val organization = "org.apache.spark"
-    val previousSparkVersion = "2.0.0"
+    val previousSparkVersion = "2.2.0"
     val project = projectRef.project
     val fullId = "spark-" + project + "_2.11"
     mimaDefaultSettings ++
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 965d1e44ac894..7d75ef8ddb7a8 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -34,8 +34,33 @@ import com.typesafe.tools.mima.core.ProblemFilters._
  */
 object MimaExcludes {
 
+  // Exclude rules for 2.4.x
+  lazy val v24excludes = v23excludes ++ Seq(
+    // Converted from case object to case class
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.productElement"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.productArity"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.canEqual"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.productIterator"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.productPrefix"),
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.toString")
+  )
+
   // Exclude rules for 2.3.x
   lazy val v23excludes = v22excludes ++ Seq(
+    // [SPARK-22897] Expose stageAttemptId in TaskContext
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.stageAttemptNumber"),
+
+    // SPARK-22789: Map-only continuous processing execution
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.startQuery$default$8"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.startQuery$default$6"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.startQuery$default$9"),
+
+    // SPARK-22372: Make cluster submission use SparkApplication.
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.getSecretKeyFromUserCredentials"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.isYarnMode"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.getCurrentUserCredentials"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.addSecretKeyToUserCredentials"),
+
     // SPARK-18085: Better History Server scalability for many / large applications
     ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ExecutorSummary.executorLogs"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.history.HistoryServer.getSparkUI"),
@@ -45,6 +70,8 @@ object MimaExcludes {
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.storage.StorageStatusListener"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.ExecutorStageSummary.this"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.JobData.this"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkStatusTracker.this"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ui.jobs.JobProgressListener"),
 
     // [SPARK-20495][SQL] Add StorageLevel to cacheTable API
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.cacheTable"),
@@ -82,7 +109,40 @@ object MimaExcludes {
 
     // [SPARK-21087] CrossValidator, TrainValidationSplit expose sub models after fitting: Scala
     ProblemFilters.exclude[FinalClassProblem]("org.apache.spark.ml.tuning.CrossValidatorModel$CrossValidatorModelWriter"),
-    ProblemFilters.exclude[FinalClassProblem]("org.apache.spark.ml.tuning.TrainValidationSplitModel$TrainValidationSplitModelWriter")
+    ProblemFilters.exclude[FinalClassProblem]("org.apache.spark.ml.tuning.TrainValidationSplitModel$TrainValidationSplitModelWriter"),
+
+    // [SPARK-21728][CORE] Allow SparkSubmit to use Logging
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkSubmit.downloadFileList"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkSubmit.downloadFile"),
+
+    // [SPARK-21714][CORE][YARN] Avoiding re-uploading remote resources in yarn client mode
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment"),
+
+    // [SPARK-22324][SQL][PYTHON] Upgrade Arrow to 0.8.0
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.network.util.AbstractFileRegion.transfered"),
+
+    // [SPARK-20643][CORE] Add listener implementation to collect app state
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.TaskData.<init>$default$5"),
+
+    // [SPARK-20648][CORE] Port JobsTab and StageTab to the new UI backend
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.TaskData.<init>$default$12"),
+
+    // [SPARK-21462][SS] Added batchId to StreamingQueryProgress.json
+    // [SPARK-21409][SS] Expose state store memory usage in SQL metrics and progress updates
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StateOperatorProgress.this"),
+
+    // [SPARK-22278][SS] Expose current event time watermark and current processing time in GroupState
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.GroupState.getCurrentWatermarkMs"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.GroupState.getCurrentProcessingTimeMs"),
+
+    // [SPARK-20542][ML][SQL] Add an API to Bucketizer that can bin multiple columns
+    ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasOutputCols.org$apache$spark$ml$param$shared$HasOutputCols$_setter_$outputCols_="),
+
+    // [SPARK-18619][ML] Make QuantileDiscretizer/Bucketizer/StringIndexer/RFormula inherit from HasHandleInvalid
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.feature.Bucketizer.getHandleInvalid"),
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.feature.StringIndexer.getHandleInvalid"),
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.feature.QuantileDiscretizer.getHandleInvalid"),
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.feature.StringIndexerModel.getHandleInvalid")
   )
 
   // Exclude rules for 2.2.x
@@ -1060,10 +1120,16 @@ object MimaExcludes {
       // [SPARK-21680][ML][MLLIB]optimzie Vector coompress
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.toSparseWithSize"),
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Vector.toSparseWithSize")
+    ) ++ Seq(
+      // [SPARK-3181][ML]Implement huber loss for LinearRegression.
+      ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasLoss.org$apache$spark$ml$param$shared$HasLoss$_setter_$loss_="),
+      ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasLoss.getLoss"),
+      ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasLoss.loss")
     )
   }
 
   def excludes(version: String) = version match {
+    case v if v.startsWith("2.4") => v24excludes
     case v if v.startsWith("2.3") => v23excludes
     case v if v.startsWith("2.2") => v22excludes
     case v if v.startsWith("2.1") => v21excludes
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index cf000e5751e2a..a0ae82639034e 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -54,16 +54,15 @@ object BuildCommons {
     "tags", "sketch", "kvstore"
   ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects
 
-  val optionallyEnabledProjects@Seq(mesos, yarn,
+  val optionallyEnabledProjects@Seq(kubernetes, mesos, yarn,
     streamingFlumeSink, streamingFlume,
     streamingKafka, sparkGangliaLgpl, streamingKinesisAsl,
-    dockerIntegrationTests, hadoopCloud,
-    kubernetes, _*) =
-    Seq("mesos", "yarn",
+    dockerIntegrationTests, hadoopCloud, _*) =
+    Seq("kubernetes", "mesos", "yarn",
       "streaming-flume-sink", "streaming-flume",
       "streaming-kafka-0-8", "ganglia-lgpl", "streaming-kinesis-asl",
       "docker-integration-tests", "hadoop-cloud",
-      "kubernetes", "kubernetes-integration-tests",
+      "kubernetes-integration-tests",
       "kubernetes-integration-tests-spark-jobs", "kubernetes-integration-tests-spark-jobs-helpers",
       "kubernetes-docker-minimal-bundle"
     ).map(ProjectRef(buildLocation, _))
@@ -247,14 +246,14 @@ object SparkBuild extends PomBuild {
 
     javacOptions in Compile ++= Seq(
       "-encoding", "UTF-8",
-      "-source", javacJVMVersion.value,
-      "-Xlint:unchecked"
+      "-source", javacJVMVersion.value
     ),
-    // This -target option cannot be set in the Compile configuration scope since `javadoc` doesn't
-    // play nicely with it; see https://github.com/sbt/sbt/issues/355#issuecomment-3817629 for
-    // additional discussion and explanation.
+    // This -target and Xlint:unchecked options cannot be set in the Compile configuration scope since
+    // `javadoc` doesn't play nicely with them; see https://github.com/sbt/sbt/issues/355#issuecomment-3817629
+    // for additional discussion and explanation.
     javacOptions in (Compile, compile) ++= Seq(
-      "-target", javacJVMVersion.value
+      "-target", javacJVMVersion.value,
+      "-Xlint:unchecked"
     ),
 
     scalacOptions in Compile ++= Seq(
@@ -262,6 +261,21 @@ object SparkBuild extends PomBuild {
       "-sourcepath", (baseDirectory in ThisBuild).value.getAbsolutePath  // Required for relative source links in scaladoc
     ),
 
+    // Remove certain packages from Scaladoc
+    scalacOptions in (Compile, doc) := Seq(
+      "-groups",
+      "-skip-packages", Seq(
+        "org.apache.spark.api.python",
+        "org.apache.spark.network",
+        "org.apache.spark.deploy",
+        "org.apache.spark.util.collection"
+      ).mkString(":"),
+      "-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc"
+    ) ++ {
+      // Do not attempt to scaladoc javadoc comments under 2.12 since it can't handle inner classes
+      if (scalaBinaryVersion.value == "2.12") Seq("-no-java-comments") else Seq.empty
+    },
+
     // Implements -Xfatal-warnings, ignoring deprecation warnings.
     // Code snippet taken from https://issues.scala-lang.org/browse/SI-8410.
     compile in Compile := {
@@ -688,9 +702,9 @@ object Unidoc {
     publish := {},
 
     unidocProjectFilter in(ScalaUnidoc, unidoc) :=
-      inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, streamingFlumeSink, yarn, tags, streamingKafka010, sqlKafka010),
+      inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, streamingFlumeSink, kubernetes, yarn, tags, streamingKafka010, sqlKafka010),
     unidocProjectFilter in(JavaUnidoc, unidoc) :=
-      inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, streamingFlumeSink, yarn, tags, streamingKafka010, sqlKafka010),
+      inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, streamingFlumeSink, kubernetes, yarn, tags, streamingKafka010, sqlKafka010),
 
     unidocAllClasspaths in (ScalaUnidoc, unidoc) := {
       ignoreClasspaths((unidocAllClasspaths in (ScalaUnidoc, unidoc)).value)
@@ -845,18 +859,7 @@ object TestSettings {
       }
       Seq.empty[File]
     }).value,
-    concurrentRestrictions in Global += Tags.limit(Tags.Test, 1),
-    // Remove certain packages from Scaladoc
-    scalacOptions in (Compile, doc) := Seq(
-      "-groups",
-      "-skip-packages", Seq(
-        "org.apache.spark.api.python",
-        "org.apache.spark.network",
-        "org.apache.spark.deploy",
-        "org.apache.spark.util.collection"
-      ).mkString(":"),
-      "-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc"
-    )
+    concurrentRestrictions in Global += Tags.limit(Tags.Test, 1)
   )
 
 }
diff --git a/python/README.md b/python/README.md
index 84ec88141cb00..3f17fdb98a081 100644
--- a/python/README.md
+++ b/python/README.md
@@ -29,4 +29,4 @@ The Python packaging for Spark is not intended to replace all of the other use c
 
 ## Python Requirements
 
-At its core PySpark depends on Py4J (currently version 0.10.6), but additional sub-packages have their own requirements (including numpy and pandas).
+At its core PySpark depends on Py4J (currently version 0.10.6), but some additional sub-packages have their own extra requirements for some features (including numpy, pandas, and pyarrow).
diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py
index a6767cee9bf28..d4470b5bf2900 100644
--- a/python/pyspark/ml/base.py
+++ b/python/pyspark/ml/base.py
@@ -18,13 +18,52 @@
 from abc import ABCMeta, abstractmethod
 
 import copy
+import threading
 
 from pyspark import since
-from pyspark.ml.param import Params
 from pyspark.ml.param.shared import *
 from pyspark.ml.common import inherit_doc
 from pyspark.sql.functions import udf
-from pyspark.sql.types import StructField, StructType, DoubleType
+from pyspark.sql.types import StructField, StructType
+
+
+class _FitMultipleIterator(object):
+    """
+    Used by default implementation of Estimator.fitMultiple to produce models in a thread safe
+    iterator. This class handles the simple case of fitMultiple where each param map should be
+    fit independently.
+
+    :param fitSingleModel: Function: (int => Model) which fits an estimator to a dataset.
+        `fitSingleModel` may be called up to `numModels` times, with a unique index each time.
+        Each call to `fitSingleModel` with an index should return the Model associated with
+        that index.
+    :param numModel: Number of models this iterator should produce.
+
+    See Estimator.fitMultiple for more info.
+    """
+    def __init__(self, fitSingleModel, numModels):
+        """
+
+        """
+        self.fitSingleModel = fitSingleModel
+        self.numModel = numModels
+        self.counter = 0
+        self.lock = threading.Lock()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        with self.lock:
+            index = self.counter
+            if index >= self.numModel:
+                raise StopIteration("No models remaining.")
+            self.counter += 1
+        return index, self.fitSingleModel(index)
+
+    def next(self):
+        """For python2 compatibility."""
+        return self.__next__()
 
 
 @inherit_doc
@@ -47,6 +86,27 @@ def _fit(self, dataset):
         """
         raise NotImplementedError()
 
+    @since("2.3.0")
+    def fitMultiple(self, dataset, paramMaps):
+        """
+        Fits a model to the input dataset for each param map in `paramMaps`.
+
+        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`.
+        :param paramMaps: A Sequence of param maps.
+        :return: A thread safe iterable which contains one model for each param map. Each
+                 call to `next(modelIterator)` will return `(index, model)` where model was fit
+                 using `paramMaps[index]`. `index` values may not be sequential.
+
+        .. note:: DeveloperApi
+        .. note:: Experimental
+        """
+        estimator = self.copy()
+
+        def fitSingleModel(index):
+            return estimator.fit(dataset, paramMaps[index])
+
+        return _FitMultipleIterator(fitSingleModel, len(paramMaps))
+
     @since("1.3.0")
     def fit(self, dataset, params=None):
         """
@@ -61,7 +121,10 @@ def fit(self, dataset, params=None):
         if params is None:
             params = dict()
         if isinstance(params, (list, tuple)):
-            return [self.fit(dataset, paramMap) for paramMap in params]
+            models = [None] * len(params)
+            for index, model in self.fitMultiple(dataset, params):
+                models[index] = model
+            return models
         elif isinstance(params, dict):
             if params:
                 return self.copy(params)._fit(dataset)
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 608f2a5715497..eb79b193103e2 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -45,6 +45,7 @@
            'NGram',
            'Normalizer',
            'OneHotEncoder',
+           'OneHotEncoderEstimator', 'OneHotEncoderModel',
            'PCA', 'PCAModel',
            'PolynomialExpansion',
            'QuantileDiscretizer',
@@ -57,6 +58,7 @@
            'Tokenizer',
            'VectorAssembler',
            'VectorIndexer', 'VectorIndexerModel',
+           'VectorSizeHint',
            'VectorSlicer',
            'Word2Vec', 'Word2VecModel']
 
@@ -713,9 +715,9 @@ class FeatureHasher(JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures,
 
     * Numeric columns:
         For numeric features, the hash value of the column name is used to map the
-        feature value to its index in the feature vector. Numeric features are never
-        treated as categorical, even when they are integers. You must explicitly
-        convert numeric columns containing categorical features to strings first.
+        feature value to its index in the feature vector. By default, numeric features
+        are not treated as categorical (even when they are integers). To treat them
+        as categorical, specify the relevant columns in `categoricalCols`.
 
     * String columns:
         For categorical features, the hash value of the string "column_name=value"
@@ -740,6 +742,8 @@ class FeatureHasher(JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures,
     >>> hasher = FeatureHasher(inputCols=cols, outputCol="features")
     >>> hasher.transform(df).head().features
     SparseVector(262144, {51871: 1.0, 63643: 1.0, 174475: 2.0, 253195: 1.0})
+    >>> hasher.setCategoricalCols(["real"]).transform(df).head().features
+    SparseVector(262144, {51871: 1.0, 63643: 1.0, 171257: 1.0, 253195: 1.0})
     >>> hasherPath = temp_path + "/hasher"
     >>> hasher.save(hasherPath)
     >>> loadedHasher = FeatureHasher.load(hasherPath)
@@ -751,10 +755,14 @@ class FeatureHasher(JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures,
     .. versionadded:: 2.3.0
     """
 
+    categoricalCols = Param(Params._dummy(), "categoricalCols",
+                            "numeric columns to treat as categorical",
+                            typeConverter=TypeConverters.toListString)
+
     @keyword_only
-    def __init__(self, numFeatures=1 << 18, inputCols=None, outputCol=None):
+    def __init__(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None):
         """
-        __init__(self, numFeatures=1 << 18, inputCols=None, outputCol=None)
+        __init__(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None)
         """
         super(FeatureHasher, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.FeatureHasher", self.uid)
@@ -764,14 +772,28 @@ def __init__(self, numFeatures=1 << 18, inputCols=None, outputCol=None):
 
     @keyword_only
     @since("2.3.0")
-    def setParams(self, numFeatures=1 << 18, inputCols=None, outputCol=None):
+    def setParams(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None):
         """
-        setParams(self, numFeatures=1 << 18, inputCols=None, outputCol=None)
+        setParams(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None)
         Sets params for this FeatureHasher.
         """
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    @since("2.3.0")
+    def setCategoricalCols(self, value):
+        """
+        Sets the value of :py:attr:`categoricalCols`.
+        """
+        return self._set(categoricalCols=value)
+
+    @since("2.3.0")
+    def getCategoricalCols(self):
+        """
+        Gets the value of binary or its default value.
+        """
+        return self.getOrDefault(self.categoricalCols)
+
 
 @inherit_doc
 class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, JavaMLReadable,
@@ -1556,6 +1578,9 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
     .. note:: This is different from scikit-learn's OneHotEncoder,
         which keeps all categories. The output vectors are sparse.
 
+    .. note:: Deprecated in 2.3.0. :py:class:`OneHotEncoderEstimator` will be renamed to
+        :py:class:`OneHotEncoder` and this :py:class:`OneHotEncoder` will be removed in 3.0.0.
+
     .. seealso::
 
        :py:class:`StringIndexer` for converting categorical values into
@@ -1620,6 +1645,118 @@ def getDropLast(self):
         return self.getOrDefault(self.dropLast)
 
 
+@inherit_doc
+class OneHotEncoderEstimator(JavaEstimator, HasInputCols, HasOutputCols, HasHandleInvalid,
+                             JavaMLReadable, JavaMLWritable):
+    """
+    A one-hot encoder that maps a column of category indices to a column of binary vectors, with
+    at most a single one-value per row that indicates the input category index.
+    For example with 5 categories, an input value of 2.0 would map to an output vector of
+    `[0.0, 0.0, 1.0, 0.0]`.
+    The last category is not included by default (configurable via `dropLast`),
+    because it makes the vector entries sum up to one, and hence linearly dependent.
+    So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
+
+    Note: This is different from scikit-learn's OneHotEncoder, which keeps all categories.
+    The output vectors are sparse.
+
+    When `handleInvalid` is configured to 'keep', an extra "category" indicating invalid values is
+    added as last category. So when `dropLast` is true, invalid values are encoded as all-zeros
+    vector.
+
+    Note: When encoding multi-column by using `inputCols` and `outputCols` params, input/output
+    cols come in pairs, specified by the order in the arrays, and each pair is treated
+    independently.
+
+    See `StringIndexer` for converting categorical values into category indices
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([(0.0,), (1.0,), (2.0,)], ["input"])
+    >>> ohe = OneHotEncoderEstimator(inputCols=["input"], outputCols=["output"])
+    >>> model = ohe.fit(df)
+    >>> model.transform(df).head().output
+    SparseVector(2, {0: 1.0})
+    >>> ohePath = temp_path + "/oheEstimator"
+    >>> ohe.save(ohePath)
+    >>> loadedOHE = OneHotEncoderEstimator.load(ohePath)
+    >>> loadedOHE.getInputCols() == ohe.getInputCols()
+    True
+    >>> modelPath = temp_path + "/ohe-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = OneHotEncoderModel.load(modelPath)
+    >>> loadedModel.categorySizes == model.categorySizes
+    True
+
+    .. versionadded:: 2.3.0
+    """
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "How to handle invalid data during " +
+                          "transform(). Options are 'keep' (invalid data presented as an extra " +
+                          "categorical feature) or error (throw an error). Note that this Param " +
+                          "is only used during transform; during fitting, invalid data will " +
+                          "result in an error.",
+                          typeConverter=TypeConverters.toString)
+
+    dropLast = Param(Params._dummy(), "dropLast", "whether to drop the last category",
+                     typeConverter=TypeConverters.toBoolean)
+
+    @keyword_only
+    def __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True):
+        """
+        __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True)
+        """
+        super(OneHotEncoderEstimator, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.feature.OneHotEncoderEstimator", self.uid)
+        self._setDefault(handleInvalid="error", dropLast=True)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.3.0")
+    def setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True):
+        """
+        setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True)
+        Sets params for this OneHotEncoderEstimator.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.3.0")
+    def setDropLast(self, value):
+        """
+        Sets the value of :py:attr:`dropLast`.
+        """
+        return self._set(dropLast=value)
+
+    @since("2.3.0")
+    def getDropLast(self):
+        """
+        Gets the value of dropLast or its default value.
+        """
+        return self.getOrDefault(self.dropLast)
+
+    def _create_model(self, java_model):
+        return OneHotEncoderModel(java_model)
+
+
+class OneHotEncoderModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    Model fitted by :py:class:`OneHotEncoderEstimator`.
+
+    .. versionadded:: 2.3.0
+    """
+
+    @property
+    @since("2.3.0")
+    def categorySizes(self):
+        """
+        Original number of categories for each feature being encoded.
+        The array contains one value for each input column, in order.
+        """
+        return self._call_java("categorySizes")
+
+
 @inherit_doc
 class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
                           JavaMLWritable):
@@ -3466,6 +3603,84 @@ def selectedFeatures(self):
         return self._call_java("selectedFeatures")
 
 
+@inherit_doc
+class VectorSizeHint(JavaTransformer, HasInputCol, HasHandleInvalid, JavaMLReadable,
+                     JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    A feature transformer that adds size information to the metadata of a vector column.
+    VectorAssembler needs size information for its input columns and cannot be used on streaming
+    dataframes without this metadata.
+
+    .. note:: VectorSizeHint modifies `inputCol` to include size metadata and does not have an
+        outputCol.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.ml import Pipeline, PipelineModel
+    >>> data = [(Vectors.dense([1., 2., 3.]), 4.)]
+    >>> df = spark.createDataFrame(data, ["vector", "float"])
+    >>>
+    >>> sizeHint = VectorSizeHint(inputCol="vector", size=3, handleInvalid="skip")
+    >>> vecAssembler = VectorAssembler(inputCols=["vector", "float"], outputCol="assembled")
+    >>> pipeline = Pipeline(stages=[sizeHint, vecAssembler])
+    >>>
+    >>> pipelineModel = pipeline.fit(df)
+    >>> pipelineModel.transform(df).head().assembled
+    DenseVector([1.0, 2.0, 3.0, 4.0])
+    >>> vectorSizeHintPath = temp_path + "/vector-size-hint-pipeline"
+    >>> pipelineModel.save(vectorSizeHintPath)
+    >>> loadedPipeline = PipelineModel.load(vectorSizeHintPath)
+    >>> loaded = loadedPipeline.transform(df).head().assembled
+    >>> expected = pipelineModel.transform(df).head().assembled
+    >>> loaded == expected
+    True
+
+    .. versionadded:: 2.3.0
+    """
+
+    size = Param(Params._dummy(), "size", "Size of vectors in column.",
+                 typeConverter=TypeConverters.toInt)
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid",
+                          "How to handle invalid vectors in inputCol. Invalid vectors include "
+                          "nulls and vectors with the wrong size. The options are `skip` (filter "
+                          "out rows with invalid vectors), `error` (throw an error) and "
+                          "`optimistic` (do not check the vector size, and keep all rows). "
+                          "`error` by default.",
+                          TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, inputCol=None, size=None, handleInvalid="error"):
+        """
+        __init__(self, inputCol=None, size=None, handleInvalid="error")
+        """
+        super(VectorSizeHint, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorSizeHint", self.uid)
+        self._setDefault(handleInvalid="error")
+        self.setParams(**self._input_kwargs)
+
+    @keyword_only
+    @since("2.3.0")
+    def setParams(self, inputCol=None, size=None, handleInvalid="error"):
+        """
+        setParams(self, inputCol=None, size=None, handleInvalid="error")
+        Sets params for this VectorSizeHint.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.3.0")
+    def getSize(self):
+        """ Gets size param, the size of vectors in `inputCol`."""
+        self.getOrDefault(self.size)
+
+    @since("2.3.0")
+    def setSize(self, value):
+        """ Sets size param, the size of vectors in `inputCol`."""
+        self._set(size=value)
+
+
 if __name__ == "__main__":
     import doctest
     import tempfile
diff --git a/python/pyspark/ml/image.py b/python/pyspark/ml/image.py
index 7d14f05295572..c9b840276f675 100644
--- a/python/pyspark/ml/image.py
+++ b/python/pyspark/ml/image.py
@@ -108,12 +108,23 @@ def toNDArray(self, image):
         """
         Converts an image to an array with metadata.
 
-        :param image: The image to be converted.
+        :param `Row` image: A row that contains the image to be converted. It should
+            have the attributes specified in `ImageSchema.imageSchema`.
         :return: a `numpy.ndarray` that is an image.
 
         .. versionadded:: 2.3.0
         """
 
+        if not isinstance(image, Row):
+            raise TypeError(
+                "image argument should be pyspark.sql.types.Row; however, "
+                "it got [%s]." % type(image))
+
+        if any(not hasattr(image, f) for f in self.imageFields):
+            raise ValueError(
+                "image argument should have attributes specified in "
+                "ImageSchema.imageSchema [%s]." % ", ".join(self.imageFields))
+
         height = image.height
         width = image.width
         nChannels = image.nChannels
@@ -127,15 +138,20 @@ def toImage(self, array, origin=""):
         """
         Converts an array with metadata to a two-dimensional image.
 
-        :param array array: The array to convert to image.
+        :param `numpy.ndarray` array: The array to convert to image.
         :param str origin: Path to the image, optional.
         :return: a :class:`Row` that is a two dimensional image.
 
         .. versionadded:: 2.3.0
         """
 
+        if not isinstance(array, np.ndarray):
+            raise TypeError(
+                "array argument should be numpy.ndarray; however, it got [%s]." % type(array))
+
         if array.ndim != 3:
             raise ValueError("Invalid array shape")
+
         height, width, nChannels = array.shape
         ocvTypes = ImageSchema.ocvTypes
         if nChannels == 1:
@@ -146,7 +162,12 @@ def toImage(self, array, origin=""):
             mode = ocvTypes["CV_8UC4"]
         else:
             raise ValueError("Invalid number of channels")
-        data = bytearray(array.astype(dtype=np.uint8).ravel())
+
+        # Running `bytearray(numpy.array([1]))` fails in specific Python versions
+        # with a specific Numpy version, for example in Python 3.6.0 and NumPy 1.13.3.
+        # Here, it avoids it by converting it to bytes.
+        data = bytearray(array.astype(dtype=np.uint8).ravel().tobytes())
+
         # Creating new Row with _create_row(), because Row(name = value, ... )
         # orders fields by name, which conflicts with expected schema order
         # when the new DataFrame is created by UDF
@@ -180,9 +201,8 @@ def readImages(self, path, recursive=False, numPartitions=-1,
         .. versionadded:: 2.3.0
         """
 
-        ctx = SparkContext._active_spark_context
-        spark = SparkSession(ctx)
-        image_schema = ctx._jvm.org.apache.spark.ml.image.ImageSchema
+        spark = SparkSession.builder.getOrCreate()
+        image_schema = spark._jvm.org.apache.spark.ml.image.ImageSchema
         jsession = spark._jsparkSession
         jresult = image_schema.readImages(path, jsession, recursive, numPartitions,
                                           dropImageFailures, float(sampleRatio), seed)
@@ -192,7 +212,7 @@ def readImages(self, path, recursive=False, numPartitions=-1,
 ImageSchema = _ImageSchema()
 
 
-# Monkey patch to disallow instantization of this class.
+# Monkey patch to disallow instantiation of this class.
 def _disallow_instance(_):
     raise RuntimeError("Creating instance of _ImageSchema class is disallowed.")
 _ImageSchema.__init__ = _disallow_instance
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 130d1a0bae7f0..db951d81de1e7 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -119,10 +119,12 @@ def get$Name(self):
         ("inputCol", "input column name.", None, "TypeConverters.toString"),
         ("inputCols", "input column names.", None, "TypeConverters.toListString"),
         ("outputCol", "output column name.", "self.uid + '__output'", "TypeConverters.toString"),
+        ("outputCols", "output column names.", None, "TypeConverters.toListString"),
         ("numFeatures", "number of features.", None, "TypeConverters.toInt"),
         ("checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). " +
-         "E.g. 10 means that the cache will get checkpointed every 10 iterations.", None,
-         "TypeConverters.toInt"),
+         "E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: " +
+         "this setting will be ignored if the checkpoint directory is not set in the SparkContext.",
+         None, "TypeConverters.toInt"),
         ("seed", "random seed.", "hash(type(self).__name__)", "TypeConverters.toInt"),
         ("tol", "the convergence tolerance for iterative algorithms (>= 0).", None,
          "TypeConverters.toFloat"),
@@ -154,7 +156,8 @@ def get$Name(self):
         ("aggregationDepth", "suggested depth for treeAggregate (>= 2).", "2",
          "TypeConverters.toInt"),
         ("parallelism", "the number of threads to use when running parallel algorithms (>= 1).",
-         "1", "TypeConverters.toInt")]
+         "1", "TypeConverters.toInt"),
+        ("loss", "the loss function to be optimized.", None, "TypeConverters.toString")]
 
     code = []
     for name, doc, defaultValueStr, typeConverter in shared:
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 4041d9c43b236..474c38764e5a1 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -256,6 +256,29 @@ def getOutputCol(self):
         return self.getOrDefault(self.outputCol)
 
 
+class HasOutputCols(Params):
+    """
+    Mixin for param outputCols: output column names.
+    """
+
+    outputCols = Param(Params._dummy(), "outputCols", "output column names.", typeConverter=TypeConverters.toListString)
+
+    def __init__(self):
+        super(HasOutputCols, self).__init__()
+
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    def getOutputCols(self):
+        """
+        Gets the value of outputCols or its default value.
+        """
+        return self.getOrDefault(self.outputCols)
+
+
 class HasNumFeatures(Params):
     """
     Mixin for param numFeatures: number of features.
@@ -281,10 +304,10 @@ def getNumFeatures(self):
 
 class HasCheckpointInterval(Params):
     """
-    Mixin for param checkpointInterval: set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations.
+    Mixin for param checkpointInterval: set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext.
     """
 
-    checkpointInterval = Param(Params._dummy(), "checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations.", typeConverter=TypeConverters.toInt)
+    checkpointInterval = Param(Params._dummy(), "checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext.", typeConverter=TypeConverters.toInt)
 
     def __init__(self):
         super(HasCheckpointInterval, self).__init__()
@@ -632,6 +655,29 @@ def getParallelism(self):
         return self.getOrDefault(self.parallelism)
 
 
+class HasLoss(Params):
+    """
+    Mixin for param loss: the loss function to be optimized.
+    """
+
+    loss = Param(Params._dummy(), "loss", "the loss function to be optimized.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasLoss, self).__init__()
+
+    def setLoss(self, value):
+        """
+        Sets the value of :py:attr:`loss`.
+        """
+        return self._set(loss=value)
+
+    def getLoss(self):
+        """
+        Gets the value of loss or its default value.
+        """
+        return self.getOrDefault(self.loss)
+
+
 class DecisionTreeParams(Params):
     """
     Mixin for Decision Tree parameters.
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 9d5b768091cf4..f0812bd1d4a39 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -39,23 +39,26 @@
 @inherit_doc
 class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
                        HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept,
-                       HasStandardization, HasSolver, HasWeightCol, HasAggregationDepth,
+                       HasStandardization, HasSolver, HasWeightCol, HasAggregationDepth, HasLoss,
                        JavaMLWritable, JavaMLReadable):
     """
     Linear regression.
 
-    The learning objective is to minimize the squared error, with regularization.
-    The specific squared error loss function used is: L = 1/2n ||A coefficients - y||^2^
+    The learning objective is to minimize the specified loss function, with regularization.
+    This supports two kinds of loss:
 
-    This supports multiple types of regularization:
-
-     * none (a.k.a. ordinary least squares)
+    * squaredError (a.k.a squared loss)
+    * huber (a hybrid of squared error for relatively small errors and absolute error for \
+    relatively large ones, and we estimate the scale parameter from training data)
 
-     * L2 (ridge regression)
+    This supports multiple types of regularization:
 
-     * L1 (Lasso)
+    * none (a.k.a. ordinary least squares)
+    * L2 (ridge regression)
+    * L1 (Lasso)
+    * L2 + L1 (elastic net)
 
-     * L2 + L1 (elastic net)
+    Note: Fitting with huber loss only supports none and L2 regularization.
 
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
@@ -98,19 +101,28 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
     solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
                    "options: auto, normal, l-bfgs.", typeConverter=TypeConverters.toString)
 
+    loss = Param(Params._dummy(), "loss", "The loss function to be optimized. Supported " +
+                 "options: squaredError, huber.", typeConverter=TypeConverters.toString)
+
+    epsilon = Param(Params._dummy(), "epsilon", "The shape parameter to control the amount of " +
+                    "robustness. Must be > 1.0. Only valid when loss is huber",
+                    typeConverter=TypeConverters.toFloat)
+
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
-                 standardization=True, solver="auto", weightCol=None, aggregationDepth=2):
+                 standardization=True, solver="auto", weightCol=None, aggregationDepth=2,
+                 loss="squaredError", epsilon=1.35):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
-                 standardization=True, solver="auto", weightCol=None, aggregationDepth=2)
+                 standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
+                 loss="squaredError", epsilon=1.35)
         """
         super(LinearRegression, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.regression.LinearRegression", self.uid)
-        self._setDefault(maxIter=100, regParam=0.0, tol=1e-6)
+        self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, loss="squaredError", epsilon=1.35)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -118,11 +130,13 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
     @since("1.4.0")
     def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
-                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2):
+                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2,
+                  loss="squaredError", epsilon=1.35):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
-                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2)
+                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
+                  loss="squaredError", epsilon=1.35)
         Sets params for linear regression.
         """
         kwargs = self._input_kwargs
@@ -131,6 +145,20 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
     def _create_model(self, java_model):
         return LinearRegressionModel(java_model)
 
+    @since("2.3.0")
+    def setEpsilon(self, value):
+        """
+        Sets the value of :py:attr:`epsilon`.
+        """
+        return self._set(epsilon=value)
+
+    @since("2.3.0")
+    def getEpsilon(self):
+        """
+        Gets the value of epsilon or its default value.
+        """
+        return self.getOrDefault(self.epsilon)
+
 
 class LinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable):
     """
@@ -155,6 +183,14 @@ def intercept(self):
         """
         return self._call_java("intercept")
 
+    @property
+    @since("2.3.0")
+    def scale(self):
+        """
+        The value by which \|y - X'w\| is scaled down when loss is "huber", otherwise 1.0.
+        """
+        return self._call_java("scale")
+
     @property
     @since("2.0.0")
     def summary(self):
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 2258d61c95333..1af2b91da900d 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -44,6 +44,7 @@
 import numpy as np
 from numpy import abs, all, arange, array, array_equal, inf, ones, tile, zeros
 import inspect
+import py4j
 
 from pyspark import keyword_only, SparkContext
 from pyspark.ml import Estimator, Model, Pipeline, PipelineModel, Transformer, UnaryTransformer
@@ -67,11 +68,11 @@
 from pyspark.ml.util import *
 from pyspark.ml.wrapper import JavaParams, JavaWrapper
 from pyspark.serializers import PickleSerializer
-from pyspark.sql import DataFrame, Row, SparkSession
+from pyspark.sql import DataFrame, Row, SparkSession, HiveContext
 from pyspark.sql.functions import rand
 from pyspark.sql.types import DoubleType, IntegerType
 from pyspark.storagelevel import *
-from pyspark.tests import ReusedPySparkTestCase as PySparkTestCase
+from pyspark.tests import QuietTest, ReusedPySparkTestCase as PySparkTestCase
 
 ser = PickleSerializer()
 
@@ -1725,6 +1726,27 @@ def test_offset(self):
         self.assertTrue(np.isclose(model.intercept, -1.561613, atol=1E-4))
 
 
+class LinearRegressionTest(SparkSessionTestCase):
+
+    def test_linear_regression_with_huber_loss(self):
+
+        data_path = "data/mllib/sample_linear_regression_data.txt"
+        df = self.spark.read.format("libsvm").load(data_path)
+
+        lir = LinearRegression(loss="huber", epsilon=2.0)
+        model = lir.fit(df)
+
+        expectedCoefficients = [0.136, 0.7648, -0.7761, 2.4236, 0.537,
+                                1.2612, -0.333, -0.5694, -0.6311, 0.6053]
+        expectedIntercept = 0.1607
+        expectedScale = 9.758
+
+        self.assertTrue(
+            np.allclose(model.coefficients.toArray(), expectedCoefficients, atol=1E-3))
+        self.assertTrue(np.isclose(model.intercept, expectedIntercept, atol=1E-3))
+        self.assertTrue(np.isclose(model.scale, expectedScale, atol=1E-3))
+
+
 class LogisticRegressionTest(SparkSessionTestCase):
 
     def test_binomial_logistic_regression_with_bound(self):
@@ -1836,6 +1858,56 @@ def test_read_images(self):
         self.assertEqual(ImageSchema.imageFields, expected)
         self.assertEqual(ImageSchema.undefinedImageType, "Undefined")
 
+        with QuietTest(self.sc):
+            self.assertRaisesRegexp(
+                TypeError,
+                "image argument should be pyspark.sql.types.Row; however",
+                lambda: ImageSchema.toNDArray("a"))
+
+        with QuietTest(self.sc):
+            self.assertRaisesRegexp(
+                ValueError,
+                "image argument should have attributes specified in",
+                lambda: ImageSchema.toNDArray(Row(a=1)))
+
+        with QuietTest(self.sc):
+            self.assertRaisesRegexp(
+                TypeError,
+                "array argument should be numpy.ndarray; however, it got",
+                lambda: ImageSchema.toImage("a"))
+
+
+class ImageReaderTest2(PySparkTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        super(ImageReaderTest2, cls).setUpClass()
+        # Note that here we enable Hive's support.
+        cls.spark = None
+        try:
+            cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
+        except py4j.protocol.Py4JError:
+            cls.tearDownClass()
+            raise unittest.SkipTest("Hive is not available")
+        except TypeError:
+            cls.tearDownClass()
+            raise unittest.SkipTest("Hive is not available")
+        cls.spark = HiveContext._createForTesting(cls.sc)
+
+    @classmethod
+    def tearDownClass(cls):
+        super(ImageReaderTest2, cls).tearDownClass()
+        if cls.spark is not None:
+            cls.spark.sparkSession.stop()
+            cls.spark = None
+
+    def test_read_images_multiple_times(self):
+        # This test case is to check if `ImageSchema.readImages` tries to
+        # initiate Hive client multiple times. See SPARK-22651.
+        data_path = 'data/mllib/images/kittens'
+        ImageSchema.readImages(data_path, recursive=True, dropImageFailures=True)
+        ImageSchema.readImages(data_path, recursive=True, dropImageFailures=True)
+
 
 class ALSTest(SparkSessionTestCase):
 
@@ -2308,6 +2380,21 @@ def test_unary_transformer_transform(self):
             self.assertEqual(res.input + shiftVal, res.output)
 
 
+class EstimatorTest(unittest.TestCase):
+
+    def testDefaultFitMultiple(self):
+        N = 4
+        data = MockDataset()
+        estimator = MockEstimator()
+        params = [{estimator.fake: i} for i in range(N)]
+        modelIter = estimator.fitMultiple(data, params)
+        indexList = []
+        for index, model in modelIter:
+            self.assertEqual(model.getFake(), index)
+            indexList.append(index)
+        self.assertEqual(sorted(indexList), list(range(N)))
+
+
 if __name__ == "__main__":
     from pyspark.ml.tests import *
     if xmlrunner:
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 47351133524e7..6c0cad6cbaaa1 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -31,6 +31,28 @@
            'TrainValidationSplitModel']
 
 
+def _parallelFitTasks(est, train, eva, validation, epm):
+    """
+    Creates a list of callables which can be called from different threads to fit and evaluate
+    an estimator in parallel. Each callable returns an `(index, metric)` pair.
+
+    :param est: Estimator, the estimator to be fit.
+    :param train: DataFrame, training data set, used for fitting.
+    :param eva: Evaluator, used to compute `metric`
+    :param validation: DataFrame, validation data set, used for evaluation.
+    :param epm: Sequence of ParamMap, params maps to be used during fitting & evaluation.
+    :return: (int, float), an index into `epm` and the associated metric value.
+    """
+    modelIter = est.fitMultiple(train, epm)
+
+    def singleTask():
+        index, model = next(modelIter)
+        metric = eva.evaluate(model.transform(validation, epm[index]))
+        return index, metric
+
+    return [singleTask] * len(epm)
+
+
 class ParamGridBuilder(object):
     r"""
     Builder for a param grid used in grid search-based model selection.
@@ -266,15 +288,9 @@ def _fit(self, dataset):
             validation = df.filter(condition).cache()
             train = df.filter(~condition).cache()
 
-            def singleTrain(paramMap):
-                model = est.fit(train, paramMap)
-                # TODO: duplicate evaluator to take extra params from input
-                metric = eva.evaluate(model.transform(validation, paramMap))
-                return metric
-
-            currentFoldMetrics = pool.map(singleTrain, epm)
-            for j in range(numModels):
-                metrics[j] += (currentFoldMetrics[j] / nFolds)
+            tasks = _parallelFitTasks(est, train, eva, validation, epm)
+            for j, metric in pool.imap_unordered(lambda f: f(), tasks):
+                metrics[j] += (metric / nFolds)
             validation.unpersist()
             train.unpersist()
 
@@ -523,13 +539,11 @@ def _fit(self, dataset):
         validation = df.filter(condition).cache()
         train = df.filter(~condition).cache()
 
-        def singleTrain(paramMap):
-            model = est.fit(train, paramMap)
-            metric = eva.evaluate(model.transform(validation, paramMap))
-            return metric
-
+        tasks = _parallelFitTasks(est, train, eva, validation, epm)
         pool = ThreadPool(processes=min(self.getParallelism(), numModels))
-        metrics = pool.map(singleTrain, epm)
+        metrics = [None] * numModels
+        for j, metric in pool.imap_unordered(lambda f: f(), tasks):
+            metrics[j] = metric
         train.unpersist()
         validation.unpersist()
 
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 37e7cf3fa662e..88d6a191babca 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -223,27 +223,14 @@ def _create_batch(series, timezone):
         series = [series]
     series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series)
 
-    # If a nullable integer series has been promoted to floating point with NaNs, need to cast
-    # NOTE: this is not necessary with Arrow >= 0.7
-    def cast_series(s, t):
-        if type(t) == pa.TimestampType:
-            # NOTE: convert to 'us' with astype here, unit ignored in `from_pandas` see ARROW-1680
-            return _check_series_convert_timestamps_internal(s.fillna(0), timezone)\
-                .values.astype('datetime64[us]', copy=False)
-        # NOTE: can not compare None with pyarrow.DataType(), fixed with Arrow >= 0.7.1
-        elif t is not None and t == pa.date32():
-            # TODO: this converts the series to Python objects, possibly avoid with Arrow >= 0.8
-            return s.dt.date
-        elif t is None or s.dtype == t.to_pandas_dtype():
-            return s
-        else:
-            return s.fillna(0).astype(t.to_pandas_dtype(), copy=False)
-
-    # Some object types don't support masks in Arrow, see ARROW-1721
     def create_array(s, t):
-        casted = cast_series(s, t)
-        mask = None if casted.dtype == 'object' else s.isnull()
-        return pa.Array.from_pandas(casted, mask=mask, type=t)
+        mask = s.isnull()
+        # Ensure timestamp series are in expected form for Spark internal representation
+        if t is not None and pa.types.is_timestamp(t):
+            s = _check_series_convert_timestamps_internal(s.fillna(0), timezone)
+            # TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2
+            return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False)
+        return pa.Array.from_pandas(s, mask=mask, type=t)
 
     arrs = [create_array(s, t) for s, t in series]
     return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 659bc65701a0c..156603128d063 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -227,15 +227,15 @@ def dropGlobalTempView(self, viewName):
     @ignore_unicode_prefix
     @since(2.0)
     def registerFunction(self, name, f, returnType=StringType()):
-        """Registers a python function (including lambda function) as a UDF
-        so it can be used in SQL statements.
+        """Registers a Python function (including lambda function) or a :class:`UserDefinedFunction`
+        as a UDF. The registered UDF can be used in SQL statement.
 
         In addition to a name and the function itself, the return type can be optionally specified.
         When the return type is not given it default to a string and conversion will automatically
         be done.  For any other return type, the produced object must match the specified type.
 
         :param name: name of the UDF
-        :param f: python function
+        :param f: a Python function, or a wrapped/native UserDefinedFunction
         :param returnType: a :class:`pyspark.sql.types.DataType` object
         :return: a wrapped :class:`UserDefinedFunction`
 
@@ -255,9 +255,26 @@ def registerFunction(self, name, f, returnType=StringType()):
         >>> _ = spark.udf.register("stringLengthInt", len, IntegerType())
         >>> spark.sql("SELECT stringLengthInt('test')").collect()
         [Row(stringLengthInt(test)=4)]
+
+        >>> import random
+        >>> from pyspark.sql.functions import udf
+        >>> from pyspark.sql.types import IntegerType, StringType
+        >>> random_udf = udf(lambda: random.randint(0, 100), IntegerType()).asNondeterministic()
+        >>> newRandom_udf = spark.catalog.registerFunction("random_udf", random_udf, StringType())
+        >>> spark.sql("SELECT random_udf()").collect()  # doctest: +SKIP
+        [Row(random_udf()=u'82')]
+        >>> spark.range(1).select(newRandom_udf()).collect()  # doctest: +SKIP
+        [Row(random_udf()=u'62')]
         """
-        udf = UserDefinedFunction(f, returnType=returnType, name=name,
-                                  evalType=PythonEvalType.SQL_BATCHED_UDF)
+
+        # This is to check whether the input function is a wrapped/native UserDefinedFunction
+        if hasattr(f, 'asNondeterministic'):
+            udf = UserDefinedFunction(f.func, returnType=returnType, name=name,
+                                      evalType=PythonEvalType.SQL_BATCHED_UDF,
+                                      deterministic=f.deterministic)
+        else:
+            udf = UserDefinedFunction(f, returnType=returnType, name=name,
+                                      evalType=PythonEvalType.SQL_BATCHED_UDF)
         self._jsparkSession.udf().registerPython(name, udf._judf)
         return udf._wrapped()
 
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index b1e723cdecef3..b8d86cc098e94 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -175,15 +175,15 @@ def range(self, start, end=None, step=1, numPartitions=None):
     @ignore_unicode_prefix
     @since(1.2)
     def registerFunction(self, name, f, returnType=StringType()):
-        """Registers a python function (including lambda function) as a UDF
-        so it can be used in SQL statements.
+        """Registers a Python function (including lambda function) or a :class:`UserDefinedFunction`
+        as a UDF. The registered UDF can be used in SQL statement.
 
         In addition to a name and the function itself, the return type can be optionally specified.
         When the return type is not given it default to a string and conversion will automatically
         be done.  For any other return type, the produced object must match the specified type.
 
         :param name: name of the UDF
-        :param f: python function
+        :param f: a Python function, or a wrapped/native UserDefinedFunction
         :param returnType: a :class:`pyspark.sql.types.DataType` object
         :return: a wrapped :class:`UserDefinedFunction`
 
@@ -203,6 +203,16 @@ def registerFunction(self, name, f, returnType=StringType()):
         >>> _ = sqlContext.udf.register("stringLengthInt", lambda x: len(x), IntegerType())
         >>> sqlContext.sql("SELECT stringLengthInt('test')").collect()
         [Row(stringLengthInt(test)=4)]
+
+        >>> import random
+        >>> from pyspark.sql.functions import udf
+        >>> from pyspark.sql.types import IntegerType, StringType
+        >>> random_udf = udf(lambda: random.randint(0, 100), IntegerType()).asNondeterministic()
+        >>> newRandom_udf = sqlContext.registerFunction("random_udf", random_udf, StringType())
+        >>> sqlContext.sql("SELECT random_udf()").collect()  # doctest: +SKIP
+        [Row(random_udf()=u'82')]
+        >>> sqlContext.range(1).select(newRandom_udf()).collect()  # doctest: +SKIP
+        [Row(random_udf()=u'62')]
         """
         return self.sparkSession.catalog.registerFunction(name, f, returnType)
 
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 9864dc98c1f33..95eca76fa9888 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -368,6 +368,20 @@ def checkpoint(self, eager=True):
         jdf = self._jdf.checkpoint(eager)
         return DataFrame(jdf, self.sql_ctx)
 
+    @since(2.3)
+    def localCheckpoint(self, eager=True):
+        """Returns a locally checkpointed version of this Dataset. Checkpointing can be used to
+        truncate the logical plan of this DataFrame, which is especially useful in iterative
+        algorithms where the plan may grow exponentially. Local checkpoints are stored in the
+        executors using the caching subsystem and therefore they are not reliable.
+
+        :param eager: Whether to checkpoint this DataFrame immediately
+
+        .. note:: Experimental
+        """
+        jdf = self._jdf.localCheckpoint(eager)
+        return DataFrame(jdf, self.sql_ctx)
+
     @since(2.1)
     def withWatermark(self, eventTime, delayThreshold):
         """Defines an event time watermark for this :class:`DataFrame`. A watermark tracks a point
@@ -1892,7 +1906,9 @@ def toPandas(self):
         if self.sql_ctx.getConf("spark.sql.execution.arrow.enabled", "false").lower() == "true":
             try:
                 from pyspark.sql.types import _check_dataframe_localize_timestamps
+                from pyspark.sql.utils import require_minimum_pyarrow_version
                 import pyarrow
+                require_minimum_pyarrow_version()
                 tables = self._collectAsArrow()
                 if tables:
                     table = pyarrow.concat_tables(tables)
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 4e0faddb1c0df..f7b3f29764040 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1099,7 +1099,7 @@ def trunc(date, format):
     """
     Returns date truncated to the unit specified by the format.
 
-    :param format: 'year', 'YYYY', 'yy' or 'month', 'mon', 'mm'
+    :param format: 'year', 'yyyy', 'yy' or 'month', 'mon', 'mm'
 
     >>> df = spark.createDataFrame([('1997-02-28',)], ['d'])
     >>> df.select(trunc(df.d, 'year').alias('year')).collect()
@@ -1111,6 +1111,24 @@ def trunc(date, format):
     return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
 
 
+@since(2.3)
+def date_trunc(format, timestamp):
+    """
+    Returns timestamp truncated to the unit specified by the format.
+
+    :param format: 'year', 'yyyy', 'yy', 'month', 'mon', 'mm',
+        'day', 'dd', 'hour', 'minute', 'second', 'week', 'quarter'
+
+    >>> df = spark.createDataFrame([('1997-02-28 05:02:11',)], ['t'])
+    >>> df.select(date_trunc('year', df.t).alias('year')).collect()
+    [Row(year=datetime.datetime(1997, 1, 1, 0, 0))]
+    >>> df.select(date_trunc('mon', df.t).alias('month')).collect()
+    [Row(month=datetime.datetime(1997, 2, 1, 0, 0))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.date_trunc(format, _to_java_column(timestamp)))
+
+
 @since(1.5)
 def next_day(date, dayOfWeek):
     """
@@ -1356,7 +1374,8 @@ def hash(*cols):
 @ignore_unicode_prefix
 def concat(*cols):
     """
-    Concatenates multiple input string columns together into a single string column.
+    Concatenates multiple input columns together into a single column.
+    If all inputs are binary, concat returns an output as binary. Otherwise, it returns as string.
 
     >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
     >>> df.select(concat(df.s, df.d).alias('s')).collect()
@@ -1830,14 +1849,14 @@ def explode_outer(col):
     +---+----------+----+-----+
 
     >>> df.select("id", "a_map", explode_outer("an_array")).show()
-    +---+-------------+----+
-    | id|        a_map| col|
-    +---+-------------+----+
-    |  1|Map(x -> 1.0)| foo|
-    |  1|Map(x -> 1.0)| bar|
-    |  2|        Map()|null|
-    |  3|         null|null|
-    +---+-------------+----+
+    +---+----------+----+
+    | id|     a_map| col|
+    +---+----------+----+
+    |  1|[x -> 1.0]| foo|
+    |  1|[x -> 1.0]| bar|
+    |  2|        []|null|
+    |  3|      null|null|
+    +---+----------+----+
     """
     sc = SparkContext._active_spark_context
     jc = sc._jvm.functions.explode_outer(_to_java_column(col))
@@ -1862,14 +1881,14 @@ def posexplode_outer(col):
     |  3|      null|null|null| null|
     +---+----------+----+----+-----+
     >>> df.select("id", "a_map", posexplode_outer("an_array")).show()
-    +---+-------------+----+----+
-    | id|        a_map| pos| col|
-    +---+-------------+----+----+
-    |  1|Map(x -> 1.0)|   0| foo|
-    |  1|Map(x -> 1.0)|   1| bar|
-    |  2|        Map()|null|null|
-    |  3|         null|null|null|
-    +---+-------------+----+----+
+    +---+----------+----+----+
+    | id|     a_map| pos| col|
+    +---+----------+----+----+
+    |  1|[x -> 1.0]|   0| foo|
+    |  1|[x -> 1.0]|   1| bar|
+    |  2|        []|null|null|
+    |  3|      null|null|null|
+    +---+----------+----+----+
     """
     sc = SparkContext._active_spark_context
     jc = sc._jvm.functions.posexplode_outer(_to_java_column(col))
@@ -2075,9 +2094,14 @@ class PandasUDFType(object):
 def udf(f=None, returnType=StringType()):
     """Creates a user defined function (UDF).
 
-    .. note:: The user-defined functions must be deterministic. Due to optimization,
-        duplicate invocations may be eliminated or the function may even be invoked more times than
-        it is present in the query.
+    .. note:: The user-defined functions are considered deterministic by default. Due to
+        optimization, duplicate invocations may be eliminated or the function may even be invoked
+        more times than it is present in the query. If your function is not deterministic, call
+        `asNondeterministic` on the user defined function. E.g.:
+
+    >>> from pyspark.sql.types import IntegerType
+    >>> import random
+    >>> random_udf = udf(lambda: int(random.random() * 100), IntegerType()).asNondeterministic()
 
     .. note:: The user-defined functions do not support conditional expressions or short curcuiting
         in boolean expressions and it ends up with being executed all internally. If the functions
@@ -2141,16 +2165,17 @@ def pandas_udf(f=None, returnType=None, functionType=None):
 
        >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
        >>> from pyspark.sql.types import IntegerType, StringType
-       >>> slen = pandas_udf(lambda s: s.str.len(), IntegerType())
-       >>> @pandas_udf(StringType())
+       >>> slen = pandas_udf(lambda s: s.str.len(), IntegerType())  # doctest: +SKIP
+       >>> @pandas_udf(StringType())  # doctest: +SKIP
        ... def to_upper(s):
        ...     return s.str.upper()
        ...
-       >>> @pandas_udf("integer", PandasUDFType.SCALAR)
+       >>> @pandas_udf("integer", PandasUDFType.SCALAR)  # doctest: +SKIP
        ... def add_one(x):
        ...     return x + 1
        ...
-       >>> df = spark.createDataFrame([(1, "John Doe", 21)], ("id", "name", "age"))
+       >>> df = spark.createDataFrame([(1, "John Doe", 21)],
+       ...                            ("id", "name", "age"))  # doctest: +SKIP
        >>> df.select(slen("name").alias("slen(name)"), to_upper("name"), add_one("age")) \\
        ...     .show()  # doctest: +SKIP
        +----------+--------------+------------+
@@ -2159,6 +2184,11 @@ def pandas_udf(f=None, returnType=None, functionType=None):
        |         8|      JOHN DOE|          22|
        +----------+--------------+------------+
 
+       .. note:: The length of `pandas.Series` within a scalar UDF is not that of the whole input
+           column, but is the length of an internal batch used for each call to the function.
+           Therefore, this can be used, for example, to ensure the length of each returned
+           `pandas.Series`, and can not be used as the column length.
+
     2. GROUP_MAP
 
        A group map UDF defines transformation: A `pandas.DataFrame` -> A `pandas.DataFrame`
@@ -2171,8 +2201,8 @@ def pandas_udf(f=None, returnType=None, functionType=None):
        >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
        >>> df = spark.createDataFrame(
        ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
-       ...     ("id", "v"))
-       >>> @pandas_udf("id long, v double", PandasUDFType.GROUP_MAP)
+       ...     ("id", "v"))  # doctest: +SKIP
+       >>> @pandas_udf("id long, v double", PandasUDFType.GROUP_MAP)  # doctest: +SKIP
        ... def normalize(pdf):
        ...     v = pdf.v
        ...     return pdf.assign(v=(v - v.mean()) / v.std())
@@ -2189,7 +2219,17 @@ def pandas_udf(f=None, returnType=None, functionType=None):
 
        .. seealso:: :meth:`pyspark.sql.GroupedData.apply`
 
-    .. note:: The user-defined function must be deterministic.
+    .. note:: The user-defined functions are considered deterministic by default. Due to
+        optimization, duplicate invocations may be eliminated or the function may even be invoked
+        more times than it is present in the query. If your function is not deterministic, call
+        `asNondeterministic` on the user defined function. E.g.:
+
+    >>> @pandas_udf('double', PandasUDFType.SCALAR)  # doctest: +SKIP
+    ... def random(v):
+    ...     import numpy as np
+    ...     import pandas as pd
+    ...     return pd.Series(np.random.randn(len(v))
+    >>> random = random.asNondeterministic()  # doctest: +SKIP
 
     .. note:: The user-defined functions do not support conditional expressions or short curcuiting
         in boolean expressions and it ends up with being executed all internally. If the functions
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 4d47dd6a3e878..09fae46adf014 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -218,7 +218,7 @@ def apply(self, udf):
         >>> df = spark.createDataFrame(
         ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
         ...     ("id", "v"))
-        >>> @pandas_udf("id long, v double", PandasUDFType.GROUP_MAP)
+        >>> @pandas_udf("id long, v double", PandasUDFType.GROUP_MAP)  # doctest: +SKIP
         ... def normalize(pdf):
         ...     v = pdf.v
         ...     return pdf.assign(v=(v - v.mean()) / v.std())
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 1ad974e9aa4c7..49af1bcee5ef8 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -304,7 +304,7 @@ def parquet(self, *paths):
 
     @ignore_unicode_prefix
     @since(1.6)
-    def text(self, paths):
+    def text(self, paths, wholetext=False):
         """
         Loads text files and returns a :class:`DataFrame` whose schema starts with a
         string column named "value", and followed by partitioned columns if there
@@ -313,11 +313,16 @@ def text(self, paths):
         Each line in the text file is a new row in the resulting DataFrame.
 
         :param paths: string, or list of strings, for input path(s).
+        :param wholetext: if true, read each file from input path(s) as a single row.
 
         >>> df = spark.read.text('python/test_support/sql/text-test.txt')
         >>> df.collect()
         [Row(value=u'hello'), Row(value=u'this')]
+        >>> df = spark.read.text('python/test_support/sql/text-test.txt', wholetext=True)
+        >>> df.collect()
+        [Row(value=u'hello\\nthis')]
         """
+        self._set_opts(wholetext=wholetext)
         if isinstance(paths, basestring):
             paths = [paths]
         return self._df(self._jreader.text(self._spark._sc._jvm.PythonUtils.toSeq(paths)))
@@ -328,7 +333,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             ignoreTrailingWhiteSpace=None, nullValue=None, nanValue=None, positiveInf=None,
             negativeInf=None, dateFormat=None, timestampFormat=None, maxColumns=None,
             maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None,
-            columnNameOfCorruptRecord=None, multiLine=None):
+            columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None):
         """Loads a CSV file and returns the result as a  :class:`DataFrame`.
 
         This function will go through the input once to determine the input schema if
@@ -339,17 +344,17 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                      or RDD of Strings storing CSV rows.
         :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema
                        or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-        :param sep: sets the single character as a separator for each field and value.
+        :param sep: sets a single character as a separator for each field and value.
                     If None is set, it uses the default value, ``,``.
         :param encoding: decodes the CSV files by the given encoding type. If None is set,
                          it uses the default value, ``UTF-8``.
-        :param quote: sets the single character used for escaping quoted values where the
+        :param quote: sets a single character used for escaping quoted values where the
                       separator can be part of the value. If None is set, it uses the default
                       value, ``"``. If you would like to turn off quotations, you need to set an
                       empty string.
-        :param escape: sets the single character used for escaping quotes inside an already
+        :param escape: sets a single character used for escaping quotes inside an already
                        quoted value. If None is set, it uses the default value, ``\``.
-        :param comment: sets the single character used for skipping lines beginning with this
+        :param comment: sets a single character used for skipping lines beginning with this
                         character. By default (None), it is disabled.
         :param header: uses the first line as names of columns. If None is set, it uses the
                        default value, ``false``.
@@ -405,6 +410,10 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                                           ``spark.sql.columnNameOfCorruptRecord``.
         :param multiLine: parse records, which may span multiple lines. If None is
                           set, it uses the default value, ``false``.
+        :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for
+                                          the quote character. If None is set, the default value is
+                                          escape character when escape and quote characters are
+                                          different, ``\0`` otherwise.
 
         >>> df = spark.read.csv('python/test_support/sql/ages.csv')
         >>> df.dtypes
@@ -422,7 +431,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             dateFormat=dateFormat, timestampFormat=timestampFormat, maxColumns=maxColumns,
             maxCharsPerColumn=maxCharsPerColumn,
             maxMalformedLogPerPartition=maxMalformedLogPerPartition, mode=mode,
-            columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine)
+            columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine,
+            charToEscapeQuoteEscaping=charToEscapeQuoteEscaping)
         if isinstance(path, basestring):
             path = [path]
         if type(path) == list:
@@ -809,7 +819,8 @@ def text(self, path, compression=None):
     @since(2.0)
     def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=None,
             header=None, nullValue=None, escapeQuotes=None, quoteAll=None, dateFormat=None,
-            timestampFormat=None, ignoreLeadingWhiteSpace=None, ignoreTrailingWhiteSpace=None):
+            timestampFormat=None, ignoreLeadingWhiteSpace=None, ignoreTrailingWhiteSpace=None,
+            charToEscapeQuoteEscaping=None):
         """Saves the content of the :class:`DataFrame` in CSV format at the specified path.
 
         :param path: the path in any Hadoop supported file system
@@ -824,12 +835,12 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No
         :param compression: compression codec to use when saving to file. This can be one of the
                             known case-insensitive shorten names (none, bzip2, gzip, lz4,
                             snappy and deflate).
-        :param sep: sets the single character as a separator for each field and value. If None is
+        :param sep: sets a single character as a separator for each field and value. If None is
                     set, it uses the default value, ``,``.
-        :param quote: sets the single character used for escaping quoted values where the
+        :param quote: sets a single character used for escaping quoted values where the
                       separator can be part of the value. If None is set, it uses the default
                       value, ``"``. If an empty string is set, it uses ``u0000`` (null character).
-        :param escape: sets the single character used for escaping quotes inside an already
+        :param escape: sets a single character used for escaping quotes inside an already
                        quoted value. If None is set, it uses the default value, ``\``
         :param escapeQuotes: a flag indicating whether values containing quotes should always
                              be enclosed in quotes. If None is set, it uses the default value
@@ -855,6 +866,10 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No
         :param ignoreTrailingWhiteSpace: a flag indicating whether or not trailing whitespaces from
                                          values being written should be skipped. If None is set, it
                                          uses the default value, ``true``.
+        :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for
+                                          the quote character. If None is set, the default value is
+                                          escape character when escape and quote characters are
+                                          different, ``\0`` otherwise..
 
         >>> df.write.csv(os.path.join(tempfile.mkdtemp(), 'data'))
         """
@@ -863,7 +878,8 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No
                        nullValue=nullValue, escapeQuotes=escapeQuotes, quoteAll=quoteAll,
                        dateFormat=dateFormat, timestampFormat=timestampFormat,
                        ignoreLeadingWhiteSpace=ignoreLeadingWhiteSpace,
-                       ignoreTrailingWhiteSpace=ignoreTrailingWhiteSpace)
+                       ignoreTrailingWhiteSpace=ignoreTrailingWhiteSpace,
+                       charToEscapeQuoteEscaping=charToEscapeQuoteEscaping)
         self._jwrite.csv(path)
 
     @since(1.5)
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index e2435e09af23d..604021c1f45cc 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -325,11 +325,12 @@ def range(self, start, end=None, step=1, numPartitions=None):
 
         return DataFrame(jdf, self._wrapped)
 
-    def _inferSchemaFromList(self, data):
+    def _inferSchemaFromList(self, data, names=None):
         """
         Infer schema from list of Row or tuple.
 
         :param data: list of Row or tuple
+        :param names: list of column names
         :return: :class:`pyspark.sql.types.StructType`
         """
         if not data:
@@ -338,12 +339,12 @@ def _inferSchemaFromList(self, data):
         if type(first) is dict:
             warnings.warn("inferring schema from dict is deprecated,"
                           "please use pyspark.sql.Row instead")
-        schema = reduce(_merge_type, map(_infer_schema, data))
+        schema = reduce(_merge_type, (_infer_schema(row, names) for row in data))
         if _has_nulltype(schema):
             raise ValueError("Some of types cannot be determined after inferring")
         return schema
 
-    def _inferSchema(self, rdd, samplingRatio=None):
+    def _inferSchema(self, rdd, samplingRatio=None, names=None):
         """
         Infer schema from an RDD of Row or tuple.
 
@@ -360,10 +361,10 @@ def _inferSchema(self, rdd, samplingRatio=None):
                           "Use pyspark.sql.Row instead")
 
         if samplingRatio is None:
-            schema = _infer_schema(first)
+            schema = _infer_schema(first, names=names)
             if _has_nulltype(schema):
                 for row in rdd.take(100)[1:]:
-                    schema = _merge_type(schema, _infer_schema(row))
+                    schema = _merge_type(schema, _infer_schema(row, names=names))
                     if not _has_nulltype(schema):
                         break
                 else:
@@ -372,7 +373,7 @@ def _inferSchema(self, rdd, samplingRatio=None):
         else:
             if samplingRatio < 0.99:
                 rdd = rdd.sample(False, float(samplingRatio))
-            schema = rdd.map(_infer_schema).reduce(_merge_type)
+            schema = rdd.map(lambda row: _infer_schema(row, names)).reduce(_merge_type)
         return schema
 
     def _createFromRDD(self, rdd, schema, samplingRatio):
@@ -380,7 +381,7 @@ def _createFromRDD(self, rdd, schema, samplingRatio):
         Create an RDD for DataFrame from an existing RDD, returns the RDD and schema.
         """
         if schema is None or isinstance(schema, (list, tuple)):
-            struct = self._inferSchema(rdd, samplingRatio)
+            struct = self._inferSchema(rdd, samplingRatio, names=schema)
             converter = _create_converter(struct)
             rdd = rdd.map(converter)
             if isinstance(schema, (list, tuple)):
@@ -406,7 +407,7 @@ def _createFromLocal(self, data, schema):
             data = list(data)
 
         if schema is None or isinstance(schema, (list, tuple)):
-            struct = self._inferSchemaFromList(data)
+            struct = self._inferSchemaFromList(data, names=schema)
             converter = _create_converter(struct)
             data = map(converter, data)
             if isinstance(schema, (list, tuple)):
@@ -458,21 +459,23 @@ def _convert_from_pandas(self, pdf, schema, timezone):
                     # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
                     if isinstance(field.dataType, TimestampType):
                         s = _check_series_convert_timestamps_tz_local(pdf[field.name], timezone)
-                        if not copied and s is not pdf[field.name]:
-                            # Copy once if the series is modified to prevent the original Pandas
-                            # DataFrame from being updated
-                            pdf = pdf.copy()
-                            copied = True
-                        pdf[field.name] = s
+                        if s is not pdf[field.name]:
+                            if not copied:
+                                # Copy once if the series is modified to prevent the original
+                                # Pandas DataFrame from being updated
+                                pdf = pdf.copy()
+                                copied = True
+                            pdf[field.name] = s
             else:
                 for column, series in pdf.iteritems():
-                    s = _check_series_convert_timestamps_tz_local(pdf[column], timezone)
-                    if not copied and s is not pdf[column]:
-                        # Copy once if the series is modified to prevent the original Pandas
-                        # DataFrame from being updated
-                        pdf = pdf.copy()
-                        copied = True
-                    pdf[column] = s
+                    s = _check_series_convert_timestamps_tz_local(series, timezone)
+                    if s is not series:
+                        if not copied:
+                            # Copy once if the series is modified to prevent the original
+                            # Pandas DataFrame from being updated
+                            pdf = pdf.copy()
+                            copied = True
+                        pdf[column] = s
 
         # Convert pandas.DataFrame to list of numpy records
         np_records = pdf.to_records(index=False)
@@ -493,12 +496,14 @@ def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
         data types will be used to coerce the data in Pandas to Arrow conversion.
         """
         from pyspark.serializers import ArrowSerializer, _create_batch
-        from pyspark.sql.types import from_arrow_schema, to_arrow_type, \
-            _old_pandas_exception_message, TimestampType
-        try:
-            from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
-        except ImportError as e:
-            raise ImportError(_old_pandas_exception_message(e))
+        from pyspark.sql.types import from_arrow_schema, to_arrow_type, TimestampType
+        from pyspark.sql.utils import require_minimum_pandas_version, \
+            require_minimum_pyarrow_version
+
+        require_minimum_pandas_version()
+        require_minimum_pyarrow_version()
+
+        from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
 
         # Determine arrow types to coerce data when creating batches
         if isinstance(schema, StructType):
@@ -643,7 +648,9 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
 
             # If no schema supplied by user then get the names of columns only
             if schema is None:
-                schema = [x.encode('utf-8') if not isinstance(x, str) else x for x in data.columns]
+                schema = [str(x) if not isinstance(x, basestring) else
+                          (x.encode('utf-8') if not isinstance(x, str) else x)
+                          for x in data.columns]
 
             if self.conf.get("spark.sql.execution.arrow.enabled", "false").lower() == "true" \
                     and len(data) > 0:
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 0cf702143c773..24ae3776a217b 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -490,6 +490,23 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
         else:
             raise TypeError("path can be only a single string")
 
+    @since(2.3)
+    def orc(self, path):
+        """Loads a ORC file stream, returning the result as a :class:`DataFrame`.
+
+        .. note:: Evolving.
+
+        >>> orc_sdf = spark.readStream.schema(sdf_schema).orc(tempfile.mkdtemp())
+        >>> orc_sdf.isStreaming
+        True
+        >>> orc_sdf.schema == sdf_schema
+        True
+        """
+        if isinstance(path, basestring):
+            return self._df(self._jreader.orc(path))
+        else:
+            raise TypeError("path can be only a single string")
+
     @since(2.0)
     def parquet(self, path):
         """Loads a Parquet file stream, returning the result as a :class:`DataFrame`.
@@ -543,7 +560,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             ignoreTrailingWhiteSpace=None, nullValue=None, nanValue=None, positiveInf=None,
             negativeInf=None, dateFormat=None, timestampFormat=None, maxColumns=None,
             maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None,
-            columnNameOfCorruptRecord=None, multiLine=None):
+            columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None):
         """Loads a CSV file stream and returns the result as a  :class:`DataFrame`.
 
         This function will go through the input once to determine the input schema if
@@ -555,17 +572,17 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         :param path: string, or list of strings, for input path(s).
         :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema
                        or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-        :param sep: sets the single character as a separator for each field and value.
+        :param sep: sets a single character as a separator for each field and value.
                     If None is set, it uses the default value, ``,``.
         :param encoding: decodes the CSV files by the given encoding type. If None is set,
                          it uses the default value, ``UTF-8``.
-        :param quote: sets the single character used for escaping quoted values where the
+        :param quote: sets a single character used for escaping quoted values where the
                       separator can be part of the value. If None is set, it uses the default
                       value, ``"``. If you would like to turn off quotations, you need to set an
                       empty string.
-        :param escape: sets the single character used for escaping quotes inside an already
+        :param escape: sets a single character used for escaping quotes inside an already
                        quoted value. If None is set, it uses the default value, ``\``.
-        :param comment: sets the single character used for skipping lines beginning with this
+        :param comment: sets a single character used for skipping lines beginning with this
                         character. By default (None), it is disabled.
         :param header: uses the first line as names of columns. If None is set, it uses the
                        default value, ``false``.
@@ -621,6 +638,10 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                                           ``spark.sql.columnNameOfCorruptRecord``.
         :param multiLine: parse one record, which may span multiple lines. If None is
                           set, it uses the default value, ``false``.
+        :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for
+                                          the quote character. If None is set, the default value is
+                                          escape character when escape and quote characters are
+                                          different, ``\0`` otherwise..
 
         >>> csv_sdf = spark.readStream.csv(tempfile.mkdtemp(), schema = sdf_schema)
         >>> csv_sdf.isStreaming
@@ -636,7 +657,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             dateFormat=dateFormat, timestampFormat=timestampFormat, maxColumns=maxColumns,
             maxCharsPerColumn=maxCharsPerColumn,
             maxMalformedLogPerPartition=maxMalformedLogPerPartition, mode=mode,
-            columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine)
+            columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine,
+            charToEscapeQuoteEscaping=charToEscapeQuoteEscaping)
         if isinstance(path, basestring):
             return self._df(self._jreader.csv(path))
         else:
@@ -771,6 +793,10 @@ def trigger(self, processingTime=None, once=None):
         .. note:: Evolving.
 
         :param processingTime: a processing time interval as a string, e.g. '5 seconds', '1 minute'.
+                               Set a trigger that runs a query periodically based on the processing
+                               time. Only one trigger can be set.
+        :param once: if set to True, set a trigger that processes only one batch of data in a
+                     streaming query then terminates the query. Only one trigger can be set.
 
         >>> # trigger the query for execution every 5 seconds
         >>> writer = sdf.writeStream.trigger(processingTime='5 seconds')
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index b4d32d8de8a22..80a94a91a87b3 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -53,7 +53,8 @@
 try:
     import pandas
     try:
-        import pandas.api
+        from pyspark.sql.utils import require_minimum_pandas_version
+        require_minimum_pandas_version()
         _have_pandas = True
     except:
         _have_old_pandas = True
@@ -67,6 +68,7 @@
 from pyspark.sql.types import UserDefinedType, _infer_type, _make_type_verifier
 from pyspark.sql.types import _array_signed_int_typecode_ctype_mappings, _array_type_mappings
 from pyspark.sql.types import _array_unsigned_int_typecode_ctype_mappings
+from pyspark.sql.types import _merge_type
 from pyspark.tests import QuietTest, ReusedPySparkTestCase, SparkSubmitTests
 from pyspark.sql.functions import UserDefinedFunction, sha2, lit
 from pyspark.sql.window import Window
@@ -377,6 +379,55 @@ def test_udf2(self):
         [res] = self.spark.sql("SELECT strlen(a) FROM test WHERE strlen(a) > 1").collect()
         self.assertEqual(4, res[0])
 
+    def test_udf3(self):
+        twoargs = self.spark.catalog.registerFunction(
+            "twoArgs", UserDefinedFunction(lambda x, y: len(x) + y), IntegerType())
+        self.assertEqual(twoargs.deterministic, True)
+        [row] = self.spark.sql("SELECT twoArgs('test', 1)").collect()
+        self.assertEqual(row[0], 5)
+
+    def test_nondeterministic_udf(self):
+        # Test that nondeterministic UDFs are evaluated only once in chained UDF evaluations
+        from pyspark.sql.functions import udf
+        import random
+        udf_random_col = udf(lambda: int(100 * random.random()), IntegerType()).asNondeterministic()
+        self.assertEqual(udf_random_col.deterministic, False)
+        df = self.spark.createDataFrame([Row(1)]).select(udf_random_col().alias('RAND'))
+        udf_add_ten = udf(lambda rand: rand + 10, IntegerType())
+        [row] = df.withColumn('RAND_PLUS_TEN', udf_add_ten('RAND')).collect()
+        self.assertEqual(row[0] + 10, row[1])
+
+    def test_nondeterministic_udf2(self):
+        import random
+        from pyspark.sql.functions import udf
+        random_udf = udf(lambda: random.randint(6, 6), IntegerType()).asNondeterministic()
+        self.assertEqual(random_udf.deterministic, False)
+        random_udf1 = self.spark.catalog.registerFunction("randInt", random_udf, StringType())
+        self.assertEqual(random_udf1.deterministic, False)
+        [row] = self.spark.sql("SELECT randInt()").collect()
+        self.assertEqual(row[0], "6")
+        [row] = self.spark.range(1).select(random_udf1()).collect()
+        self.assertEqual(row[0], "6")
+        [row] = self.spark.range(1).select(random_udf()).collect()
+        self.assertEqual(row[0], 6)
+        # render_doc() reproduces the help() exception without printing output
+        pydoc.render_doc(udf(lambda: random.randint(6, 6), IntegerType()))
+        pydoc.render_doc(random_udf)
+        pydoc.render_doc(random_udf1)
+        pydoc.render_doc(udf(lambda x: x).asNondeterministic)
+
+    def test_nondeterministic_udf_in_aggregate(self):
+        from pyspark.sql.functions import udf, sum
+        import random
+        udf_random_col = udf(lambda: int(100 * random.random()), 'int').asNondeterministic()
+        df = self.spark.range(10)
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(AnalysisException, "nondeterministic"):
+                df.groupby('id').agg(sum(udf_random_col())).collect()
+            with self.assertRaisesRegexp(AnalysisException, "nondeterministic"):
+                df.agg(sum(udf_random_col())).collect()
+
     def test_chained_udf(self):
         self.spark.catalog.registerFunction("double", lambda x: x + x, IntegerType())
         [row] = self.spark.sql("SELECT double(1)").collect()
@@ -557,7 +608,6 @@ def test_read_multiple_orc_file(self):
 
     def test_udf_with_input_file_name(self):
         from pyspark.sql.functions import udf, input_file_name
-        from pyspark.sql.types import StringType
         sourceFile = udf(lambda path: path, StringType())
         filePath = "python/test_support/sql/people1.json"
         row = self.spark.read.json(filePath).select(sourceFile(input_file_name())).first()
@@ -565,7 +615,6 @@ def test_udf_with_input_file_name(self):
 
     def test_udf_with_input_file_name_for_hadooprdd(self):
         from pyspark.sql.functions import udf, input_file_name
-        from pyspark.sql.types import StringType
 
         def filename(path):
             return path
@@ -625,7 +674,6 @@ def test_udf_with_string_return_type(self):
 
     def test_udf_shouldnt_accept_noncallable_object(self):
         from pyspark.sql.functions import UserDefinedFunction
-        from pyspark.sql.types import StringType
 
         non_callable = None
         self.assertRaises(TypeError, UserDefinedFunction, non_callable, StringType())
@@ -851,6 +899,15 @@ def test_infer_schema(self):
         result = self.spark.sql("SELECT l[0].a from test2 where d['key'].d = '2'")
         self.assertEqual(1, result.head()[0])
 
+    def test_infer_schema_not_enough_names(self):
+        df = self.spark.createDataFrame([["a", "b"]], ["col1"])
+        self.assertEqual(df.columns, ['col1', '_2'])
+
+    def test_infer_schema_fails(self):
+        with self.assertRaisesRegexp(TypeError, 'field a'):
+            self.spark.createDataFrame(self.spark.sparkContext.parallelize([[1, 1], ["x", 1]]),
+                                       schema=["a", "b"], samplingRatio=0.99)
+
     def test_infer_nested_schema(self):
         NestedRow = Row("f1", "f2")
         nestedRdd1 = self.sc.parallelize([NestedRow([1, 2], {"row1": 1.0}),
@@ -871,6 +928,10 @@ def test_infer_nested_schema(self):
         df = self.spark.createDataFrame(rdd)
         self.assertEqual(Row(field1=1, field2=u'row1'), df.first())
 
+    def test_create_dataframe_from_dict_respects_schema(self):
+        df = self.spark.createDataFrame([{'a': 1}], ["b"])
+        self.assertEqual(df.columns, ['b'])
+
     def test_create_dataframe_from_objects(self):
         data = [MyObject(1, "1"), MyObject(2, "2")]
         df = self.spark.createDataFrame(data)
@@ -1289,7 +1350,6 @@ def test_between_function(self):
                          df.filter(df.a.between(df.b, df.c)).collect())
 
     def test_struct_type(self):
-        from pyspark.sql.types import StructType, StringType, StructField
         struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
         struct2 = StructType([StructField("f1", StringType(), True),
                               StructField("f2", StringType(), True, None)])
@@ -1358,7 +1418,6 @@ def test_parse_datatype_string(self):
             _parse_datatype_string("a INT, c DOUBLE"))
 
     def test_metadata_null(self):
-        from pyspark.sql.types import StructType, StringType, StructField
         schema = StructType([StructField("f1", StringType(), True, None),
                              StructField("f2", StringType(), True, {'a': None})])
         rdd = self.sc.parallelize([["a", "b"], ["c", "d"]])
@@ -1727,6 +1786,92 @@ def test_infer_long_type(self):
         self.assertEqual(_infer_type(2**61), LongType())
         self.assertEqual(_infer_type(2**71), LongType())
 
+    def test_merge_type(self):
+        self.assertEqual(_merge_type(LongType(), NullType()), LongType())
+        self.assertEqual(_merge_type(NullType(), LongType()), LongType())
+
+        self.assertEqual(_merge_type(LongType(), LongType()), LongType())
+
+        self.assertEqual(_merge_type(
+            ArrayType(LongType()),
+            ArrayType(LongType())
+        ), ArrayType(LongType()))
+        with self.assertRaisesRegexp(TypeError, 'element in array'):
+            _merge_type(ArrayType(LongType()), ArrayType(DoubleType()))
+
+        self.assertEqual(_merge_type(
+            MapType(StringType(), LongType()),
+            MapType(StringType(), LongType())
+        ), MapType(StringType(), LongType()))
+        with self.assertRaisesRegexp(TypeError, 'key of map'):
+            _merge_type(
+                MapType(StringType(), LongType()),
+                MapType(DoubleType(), LongType()))
+        with self.assertRaisesRegexp(TypeError, 'value of map'):
+            _merge_type(
+                MapType(StringType(), LongType()),
+                MapType(StringType(), DoubleType()))
+
+        self.assertEqual(_merge_type(
+            StructType([StructField("f1", LongType()), StructField("f2", StringType())]),
+            StructType([StructField("f1", LongType()), StructField("f2", StringType())])
+        ), StructType([StructField("f1", LongType()), StructField("f2", StringType())]))
+        with self.assertRaisesRegexp(TypeError, 'field f1'):
+            _merge_type(
+                StructType([StructField("f1", LongType()), StructField("f2", StringType())]),
+                StructType([StructField("f1", DoubleType()), StructField("f2", StringType())]))
+
+        self.assertEqual(_merge_type(
+            StructType([StructField("f1", StructType([StructField("f2", LongType())]))]),
+            StructType([StructField("f1", StructType([StructField("f2", LongType())]))])
+        ), StructType([StructField("f1", StructType([StructField("f2", LongType())]))]))
+        with self.assertRaisesRegexp(TypeError, 'field f2 in field f1'):
+            _merge_type(
+                StructType([StructField("f1", StructType([StructField("f2", LongType())]))]),
+                StructType([StructField("f1", StructType([StructField("f2", StringType())]))]))
+
+        self.assertEqual(_merge_type(
+            StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())]),
+            StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())])
+        ), StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())]))
+        with self.assertRaisesRegexp(TypeError, 'element in array field f1'):
+            _merge_type(
+                StructType([
+                    StructField("f1", ArrayType(LongType())),
+                    StructField("f2", StringType())]),
+                StructType([
+                    StructField("f1", ArrayType(DoubleType())),
+                    StructField("f2", StringType())]))
+
+        self.assertEqual(_merge_type(
+            StructType([
+                StructField("f1", MapType(StringType(), LongType())),
+                StructField("f2", StringType())]),
+            StructType([
+                StructField("f1", MapType(StringType(), LongType())),
+                StructField("f2", StringType())])
+        ), StructType([
+            StructField("f1", MapType(StringType(), LongType())),
+            StructField("f2", StringType())]))
+        with self.assertRaisesRegexp(TypeError, 'value of map field f1'):
+            _merge_type(
+                StructType([
+                    StructField("f1", MapType(StringType(), LongType())),
+                    StructField("f2", StringType())]),
+                StructType([
+                    StructField("f1", MapType(StringType(), DoubleType())),
+                    StructField("f2", StringType())]))
+
+        self.assertEqual(_merge_type(
+            StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]),
+            StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))])
+        ), StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]))
+        with self.assertRaisesRegexp(TypeError, 'key of map element in array field f1'):
+            _merge_type(
+                StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]),
+                StructType([StructField("f1", ArrayType(MapType(DoubleType(), LongType())))])
+            )
+
     def test_filter_with_datetime(self):
         time = datetime.datetime(2015, 4, 17, 23, 1, 2, 3000)
         date = time.date()
@@ -2600,7 +2745,7 @@ def test_to_pandas(self):
     @unittest.skipIf(not _have_old_pandas, "Old Pandas not installed")
     def test_to_pandas_old(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(ImportError, 'Pandas \(.*\) must be installed'):
+            with self.assertRaisesRegexp(ImportError, 'Pandas >= .* must be installed'):
                 self._to_pandas()
 
     @unittest.skipIf(not _have_pandas, "Pandas not installed")
@@ -2643,7 +2788,7 @@ def test_create_dataframe_from_old_pandas(self):
         pdf = pd.DataFrame({"ts": [datetime(2017, 10, 31, 1, 1, 1)],
                             "d": [pd.Timestamp.now().date()]})
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(ImportError, 'Pandas \(.*\) must be installed'):
+            with self.assertRaisesRegexp(ImportError, 'Pandas >= .* must be installed'):
                 self.spark.createDataFrame(pdf)
 
 
@@ -3141,6 +3286,7 @@ class ArrowTests(ReusedSQLTestCase):
     @classmethod
     def setUpClass(cls):
         from datetime import datetime
+        from decimal import Decimal
         ReusedSQLTestCase.setUpClass()
 
         # Synchronize default timezone between Python and Java
@@ -3157,11 +3303,15 @@ def setUpClass(cls):
             StructField("3_long_t", LongType(), True),
             StructField("4_float_t", FloatType(), True),
             StructField("5_double_t", DoubleType(), True),
-            StructField("6_date_t", DateType(), True),
-            StructField("7_timestamp_t", TimestampType(), True)])
-        cls.data = [(u"a", 1, 10, 0.2, 2.0, datetime(1969, 1, 1), datetime(1969, 1, 1, 1, 1, 1)),
-                    (u"b", 2, 20, 0.4, 4.0, datetime(2012, 2, 2), datetime(2012, 2, 2, 2, 2, 2)),
-                    (u"c", 3, 30, 0.8, 6.0, datetime(2100, 3, 3), datetime(2100, 3, 3, 3, 3, 3))]
+            StructField("6_decimal_t", DecimalType(38, 18), True),
+            StructField("7_date_t", DateType(), True),
+            StructField("8_timestamp_t", TimestampType(), True)])
+        cls.data = [(u"a", 1, 10, 0.2, 2.0, Decimal("2.0"),
+                     datetime(1969, 1, 1), datetime(1969, 1, 1, 1, 1, 1)),
+                    (u"b", 2, 20, 0.4, 4.0, Decimal("4.0"),
+                     datetime(2012, 2, 2), datetime(2012, 2, 2, 2, 2, 2)),
+                    (u"c", 3, 30, 0.8, 6.0, Decimal("6.0"),
+                     datetime(2100, 3, 3), datetime(2100, 3, 3, 3, 3, 3))]
 
     @classmethod
     def tearDownClass(cls):
@@ -3189,10 +3339,11 @@ def create_pandas_data_frame(self):
         return pd.DataFrame(data=data_dict)
 
     def test_unsupported_datatype(self):
-        schema = StructType([StructField("decimal", DecimalType(), True)])
+        schema = StructType([StructField("map", MapType(StringType(), IntegerType()), True)])
         df = self.spark.createDataFrame([(None,)], schema=schema)
         with QuietTest(self.sc):
-            self.assertRaises(Exception, lambda: df.toPandas())
+            with self.assertRaisesRegexp(Exception, 'Unsupported data type'):
+                df.toPandas()
 
     def test_null_conversion(self):
         df_null = self.spark.createDataFrame([tuple([None for _ in range(len(self.data[0]))])] +
@@ -3292,7 +3443,7 @@ def test_createDataFrame_respect_session_timezone(self):
             self.assertNotEqual(result_ny, result_la)
 
             # Correct result_la by adjusting 3 hours difference between Los Angeles and New York
-            result_la_corrected = [Row(**{k: v - timedelta(hours=3) if k == '7_timestamp_t' else v
+            result_la_corrected = [Row(**{k: v - timedelta(hours=3) if k == '8_timestamp_t' else v
                                           for k, v in row.asDict().items()})
                                    for row in result_la]
             self.assertEqual(result_ny, result_la_corrected)
@@ -3316,11 +3467,11 @@ def test_createDataFrame_with_incorrect_schema(self):
     def test_createDataFrame_with_names(self):
         pdf = self.create_pandas_data_frame()
         # Test that schema as a list of column names gets applied
-        df = self.spark.createDataFrame(pdf, schema=list('abcdefg'))
-        self.assertEquals(df.schema.fieldNames(), list('abcdefg'))
+        df = self.spark.createDataFrame(pdf, schema=list('abcdefgh'))
+        self.assertEquals(df.schema.fieldNames(), list('abcdefgh'))
         # Test that schema as tuple of column names gets applied
-        df = self.spark.createDataFrame(pdf, schema=tuple('abcdefg'))
-        self.assertEquals(df.schema.fieldNames(), list('abcdefg'))
+        df = self.spark.createDataFrame(pdf, schema=tuple('abcdefgh'))
+        self.assertEquals(df.schema.fieldNames(), list('abcdefgh'))
 
     def test_createDataFrame_column_name_encoding(self):
         import pandas as pd
@@ -3339,10 +3490,11 @@ def test_createDataFrame_with_single_data_type(self):
                 self.spark.createDataFrame(pd.DataFrame({"a": [1]}), schema="int")
 
     def test_createDataFrame_does_not_modify_input(self):
+        import pandas as pd
         # Some series get converted for Spark to consume, this makes sure input is unchanged
         pdf = self.create_pandas_data_frame()
         # Use a nanosecond value to make sure it is not truncated
-        pdf.ix[0, '7_timestamp_t'] = 1
+        pdf.ix[0, '8_timestamp_t'] = pd.Timestamp(1)
         # Integers with nulls will get NaNs filled with 0 and will be casted
         pdf.ix[1, '2_int_t'] = None
         pdf_copy = pdf.copy(deep=True)
@@ -3355,7 +3507,42 @@ def test_schema_conversion_roundtrip(self):
         schema_rt = from_arrow_schema(arrow_schema)
         self.assertEquals(self.schema, schema_rt)
 
+    def test_createDataFrame_with_array_type(self):
+        import pandas as pd
+        pdf = pd.DataFrame({"a": [[1, 2], [3, 4]], "b": [[u"x", u"y"], [u"y", u"z"]]})
+        df, df_arrow = self._createDataFrame_toggle(pdf)
+        result = df.collect()
+        result_arrow = df_arrow.collect()
+        expected = [tuple(list(e) for e in rec) for rec in pdf.to_records(index=False)]
+        for r in range(len(expected)):
+            for e in range(len(expected[r])):
+                self.assertTrue(expected[r][e] == result_arrow[r][e] and
+                                result[r][e] == result_arrow[r][e])
+
+    def test_toPandas_with_array_type(self):
+        expected = [([1, 2], [u"x", u"y"]), ([3, 4], [u"y", u"z"])]
+        array_schema = StructType([StructField("a", ArrayType(IntegerType())),
+                                   StructField("b", ArrayType(StringType()))])
+        df = self.spark.createDataFrame(expected, schema=array_schema)
+        pdf, pdf_arrow = self._toPandas_arrow_toggle(df)
+        result = [tuple(list(e) for e in rec) for rec in pdf.to_records(index=False)]
+        result_arrow = [tuple(list(e) for e in rec) for rec in pdf_arrow.to_records(index=False)]
+        for r in range(len(expected)):
+            for e in range(len(expected[r])):
+                self.assertTrue(expected[r][e] == result_arrow[r][e] and
+                                result[r][e] == result_arrow[r][e])
+
+    def test_createDataFrame_with_int_col_names(self):
+        import numpy as np
+        import pandas as pd
+        pdf = pd.DataFrame(np.random.rand(4, 2))
+        df, df_arrow = self._createDataFrame_toggle(pdf)
+        pdf_col_names = [str(c) for c in pdf.columns]
+        self.assertEqual(pdf_col_names, df.columns)
+        self.assertEqual(pdf_col_names, df_arrow.columns)
+
 
+@unittest.skipIf(not _have_pandas or not _have_arrow, "Pandas or Arrow not installed")
 class PandasUDFTests(ReusedSQLTestCase):
     def test_pandas_udf_basic(self):
         from pyspark.rdd import PythonEvalType
@@ -3503,6 +3690,18 @@ def tearDownClass(cls):
         time.tzset()
         ReusedSQLTestCase.tearDownClass()
 
+    @property
+    def random_udf(self):
+        from pyspark.sql.functions import pandas_udf
+
+        @pandas_udf('double')
+        def random_udf(v):
+            import pandas as pd
+            import numpy as np
+            return pd.Series(np.random.random(len(v)))
+        random_udf = random_udf.asNondeterministic()
+        return random_udf
+
     def test_vectorized_udf_basic(self):
         from pyspark.sql.functions import pandas_udf, col
         df = self.spark.range(10).select(
@@ -3511,6 +3710,7 @@ def test_vectorized_udf_basic(self):
             col('id').alias('long'),
             col('id').cast('float').alias('float'),
             col('id').cast('double').alias('double'),
+            col('id').cast('decimal').alias('decimal'),
             col('id').cast('boolean').alias('bool'))
         f = lambda x: x
         str_f = pandas_udf(f, StringType())
@@ -3518,10 +3718,12 @@ def test_vectorized_udf_basic(self):
         long_f = pandas_udf(f, LongType())
         float_f = pandas_udf(f, FloatType())
         double_f = pandas_udf(f, DoubleType())
+        decimal_f = pandas_udf(f, DecimalType())
         bool_f = pandas_udf(f, BooleanType())
         res = df.select(str_f(col('str')), int_f(col('int')),
                         long_f(col('long')), float_f(col('float')),
-                        double_f(col('double')), bool_f(col('bool')))
+                        double_f(col('double')), decimal_f('decimal'),
+                        bool_f(col('bool')))
         self.assertEquals(df.collect(), res.collect())
 
     def test_vectorized_udf_null_boolean(self):
@@ -3587,6 +3789,16 @@ def test_vectorized_udf_null_double(self):
         res = df.select(double_f(col('double')))
         self.assertEquals(df.collect(), res.collect())
 
+    def test_vectorized_udf_null_decimal(self):
+        from decimal import Decimal
+        from pyspark.sql.functions import pandas_udf, col
+        data = [(Decimal(3.0),), (Decimal(5.0),), (Decimal(-1.0),), (None,)]
+        schema = StructType().add("decimal", DecimalType(38, 18))
+        df = self.spark.createDataFrame(data, schema)
+        decimal_f = pandas_udf(lambda x: x, DecimalType(38, 18))
+        res = df.select(decimal_f(col('decimal')))
+        self.assertEquals(df.collect(), res.collect())
+
     def test_vectorized_udf_null_string(self):
         from pyspark.sql.functions import pandas_udf, col
         data = [("foo",), (None,), ("bar",), ("bar",)]
@@ -3604,6 +3816,7 @@ def test_vectorized_udf_datatype_string(self):
             col('id').alias('long'),
             col('id').cast('float').alias('float'),
             col('id').cast('double').alias('double'),
+            col('id').cast('decimal').alias('decimal'),
             col('id').cast('boolean').alias('bool'))
         f = lambda x: x
         str_f = pandas_udf(f, 'string')
@@ -3611,12 +3824,32 @@ def test_vectorized_udf_datatype_string(self):
         long_f = pandas_udf(f, 'long')
         float_f = pandas_udf(f, 'float')
         double_f = pandas_udf(f, 'double')
+        decimal_f = pandas_udf(f, 'decimal(38, 18)')
         bool_f = pandas_udf(f, 'boolean')
         res = df.select(str_f(col('str')), int_f(col('int')),
                         long_f(col('long')), float_f(col('float')),
-                        double_f(col('double')), bool_f(col('bool')))
+                        double_f(col('double')), decimal_f('decimal'),
+                        bool_f(col('bool')))
         self.assertEquals(df.collect(), res.collect())
 
+    def test_vectorized_udf_array_type(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [([1, 2],), ([3, 4],)]
+        array_schema = StructType([StructField("array", ArrayType(IntegerType()))])
+        df = self.spark.createDataFrame(data, schema=array_schema)
+        array_f = pandas_udf(lambda x: x, ArrayType(IntegerType()))
+        result = df.select(array_f(col('array')))
+        self.assertEquals(df.collect(), result.collect())
+
+    def test_vectorized_udf_null_array(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [([1, 2],), (None,), (None,), ([3, 4],), (None,)]
+        array_schema = StructType([StructField("array", ArrayType(IntegerType()))])
+        df = self.spark.createDataFrame(data, schema=array_schema)
+        array_f = pandas_udf(lambda x: x, ArrayType(IntegerType()))
+        result = df.select(array_f(col('array')))
+        self.assertEquals(df.collect(), result.collect())
+
     def test_vectorized_udf_complex(self):
         from pyspark.sql.functions import pandas_udf, col, expr
         df = self.spark.range(10).select(
@@ -3671,9 +3904,9 @@ def test_vectorized_udf_chained(self):
     def test_vectorized_udf_wrong_return_type(self):
         from pyspark.sql.functions import pandas_udf, col
         df = self.spark.range(10)
-        f = pandas_udf(lambda x: x * 1.0, StringType())
+        f = pandas_udf(lambda x: x * 1.0, MapType(LongType(), LongType()))
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(Exception, 'Invalid.*type'):
+            with self.assertRaisesRegexp(Exception, 'Unsupported.*type.*conversion'):
                 df.select(f(col('id'))).collect()
 
     def test_vectorized_udf_return_scalar(self):
@@ -3710,12 +3943,12 @@ def test_vectorized_udf_varargs(self):
 
     def test_vectorized_udf_unsupported_types(self):
         from pyspark.sql.functions import pandas_udf, col
-        schema = StructType([StructField("dt", DecimalType(), True)])
+        schema = StructType([StructField("map", MapType(StringType(), IntegerType()), True)])
         df = self.spark.createDataFrame([(None,)], schema=schema)
-        f = pandas_udf(lambda x: x, DecimalType())
+        f = pandas_udf(lambda x: x, MapType(StringType(), IntegerType()))
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(Exception, 'Unsupported data type'):
-                df.select(f(col('dt'))).collect()
+                df.select(f(col('map'))).collect()
 
     def test_vectorized_udf_null_date(self):
         from pyspark.sql.functions import pandas_udf, col
@@ -3791,6 +4024,7 @@ def gen_timestamps(id):
 
     def test_vectorized_udf_check_config(self):
         from pyspark.sql.functions import pandas_udf, col
+        import pandas as pd
         orig_value = self.spark.conf.get("spark.sql.execution.arrow.maxRecordsPerBatch", None)
         self.spark.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", 3)
         try:
@@ -3798,11 +4032,11 @@ def test_vectorized_udf_check_config(self):
 
             @pandas_udf(returnType=LongType())
             def check_records_per_batch(x):
-                self.assertTrue(x.size <= 3)
-                return x
+                return pd.Series(x.size).repeat(x.size)
 
-            result = df.select(check_records_per_batch(col("id")))
-            self.assertEqual(df.collect(), result.collect())
+            result = df.select(check_records_per_batch(col("id"))).collect()
+            for (r,) in result:
+                self.assertTrue(r <= 3)
         finally:
             if orig_value is None:
                 self.spark.conf.unset("spark.sql.execution.arrow.maxRecordsPerBatch")
@@ -3851,6 +4085,33 @@ def test_vectorized_udf_timestamps_respect_session_timezone(self):
         finally:
             self.spark.conf.set("spark.sql.session.timeZone", orig_tz)
 
+    def test_nondeterministic_udf(self):
+        # Test that nondeterministic UDFs are evaluated only once in chained UDF evaluations
+        from pyspark.sql.functions import udf, pandas_udf, col
+
+        @pandas_udf('double')
+        def plus_ten(v):
+            return v + 10
+        random_udf = self.random_udf
+
+        df = self.spark.range(10).withColumn('rand', random_udf(col('id')))
+        result1 = df.withColumn('plus_ten(rand)', plus_ten(df['rand'])).toPandas()
+
+        self.assertEqual(random_udf.deterministic, False)
+        self.assertTrue(result1['plus_ten(rand)'].equals(result1['rand'] + 10))
+
+    def test_nondeterministic_udf_in_aggregate(self):
+        from pyspark.sql.functions import pandas_udf, sum
+
+        df = self.spark.range(10)
+        random_udf = self.random_udf
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(AnalysisException, 'nondeterministic'):
+                df.groupby(df.id).agg(sum(random_udf(df.id))).collect()
+            with self.assertRaisesRegexp(AnalysisException, 'nondeterministic'):
+                df.agg(sum(random_udf(df.id))).collect()
+
 
 @unittest.skipIf(not _have_pandas or not _have_arrow, "Pandas or Arrow not installed")
 class GroupbyApplyTests(ReusedSQLTestCase):
@@ -3974,12 +4235,12 @@ def test_wrong_return_type(self):
 
         foo = pandas_udf(
             lambda pdf: pdf,
-            'id long, v string',
+            'id long, v map<int, int>',
             PandasUDFType.GROUP_MAP
         )
 
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(Exception, 'Invalid.*type'):
+            with self.assertRaisesRegexp(Exception, 'Unsupported.*type.*conversion'):
                 df.groupby('id').apply(foo).sort('id').toPandas()
 
     def test_wrong_args(self):
@@ -4009,7 +4270,8 @@ def test_wrong_args(self):
     def test_unsupported_types(self):
         from pyspark.sql.functions import pandas_udf, col, PandasUDFType
         schema = StructType(
-            [StructField("id", LongType(), True), StructField("dt", DecimalType(), True)])
+            [StructField("id", LongType(), True),
+             StructField("map", MapType(StringType(), IntegerType()), True)])
         df = self.spark.createDataFrame([(1, None,)], schema=schema)
         f = pandas_udf(lambda x: x, df.schema, PandasUDFType.GROUP_MAP)
         with QuietTest(self.sc):
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 78abc32a35a1c..0dc5823f72a3c 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1073,7 +1073,7 @@ def _infer_type(obj):
             raise TypeError("not supported type: %s" % type(obj))
 
 
-def _infer_schema(row):
+def _infer_schema(row, names=None):
     """Infer the schema from dict/namedtuple/object"""
     if isinstance(row, dict):
         items = sorted(row.items())
@@ -1084,7 +1084,10 @@ def _infer_schema(row):
         elif hasattr(row, "_fields"):  # namedtuple
             items = zip(row._fields, tuple(row))
         else:
-            names = ['_%d' % i for i in range(1, len(row) + 1)]
+            if names is None:
+                names = ['_%d' % i for i in range(1, len(row) + 1)]
+            elif len(names) < len(row):
+                names.extend('_%d' % i for i in range(len(names) + 1, len(row) + 1))
             items = zip(names, row)
 
     elif hasattr(row, "__dict__"):  # object
@@ -1109,19 +1112,27 @@ def _has_nulltype(dt):
         return isinstance(dt, NullType)
 
 
-def _merge_type(a, b):
+def _merge_type(a, b, name=None):
+    if name is None:
+        new_msg = lambda msg: msg
+        new_name = lambda n: "field %s" % n
+    else:
+        new_msg = lambda msg: "%s: %s" % (name, msg)
+        new_name = lambda n: "field %s in %s" % (n, name)
+
     if isinstance(a, NullType):
         return b
     elif isinstance(b, NullType):
         return a
     elif type(a) is not type(b):
         # TODO: type cast (such as int -> long)
-        raise TypeError("Can not merge type %s and %s" % (type(a), type(b)))
+        raise TypeError(new_msg("Can not merge type %s and %s" % (type(a), type(b))))
 
     # same type
     if isinstance(a, StructType):
         nfs = dict((f.name, f.dataType) for f in b.fields)
-        fields = [StructField(f.name, _merge_type(f.dataType, nfs.get(f.name, NullType())))
+        fields = [StructField(f.name, _merge_type(f.dataType, nfs.get(f.name, NullType()),
+                                                  name=new_name(f.name)))
                   for f in a.fields]
         names = set([f.name for f in fields])
         for n in nfs:
@@ -1130,11 +1141,12 @@ def _merge_type(a, b):
         return StructType(fields)
 
     elif isinstance(a, ArrayType):
-        return ArrayType(_merge_type(a.elementType, b.elementType), True)
+        return ArrayType(_merge_type(a.elementType, b.elementType,
+                                     name='element in array %s' % name), True)
 
     elif isinstance(a, MapType):
-        return MapType(_merge_type(a.keyType, b.keyType),
-                       _merge_type(a.valueType, b.valueType),
+        return MapType(_merge_type(a.keyType, b.keyType, name='key of map %s' % name),
+                       _merge_type(a.valueType, b.valueType, name='value of map %s' % name),
                        True)
     else:
         return a
@@ -1617,7 +1629,7 @@ def to_arrow_type(dt):
     elif type(dt) == DoubleType:
         arrow_type = pa.float64()
     elif type(dt) == DecimalType:
-        arrow_type = pa.decimal(dt.precision, dt.scale)
+        arrow_type = pa.decimal128(dt.precision, dt.scale)
     elif type(dt) == StringType:
         arrow_type = pa.string()
     elif type(dt) == DateType:
@@ -1625,6 +1637,8 @@ def to_arrow_type(dt):
     elif type(dt) == TimestampType:
         # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
         arrow_type = pa.timestamp('us', tz='UTC')
+    elif type(dt) == ArrayType:
+        arrow_type = pa.list_(to_arrow_type(dt.elementType))
     else:
         raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
     return arrow_type
@@ -1642,30 +1656,31 @@ def to_arrow_schema(schema):
 def from_arrow_type(at):
     """ Convert pyarrow type to Spark data type.
     """
-    # TODO: newer pyarrow has is_boolean(at) functions that would be better to check type
-    import pyarrow as pa
-    if at == pa.bool_():
+    import pyarrow.types as types
+    if types.is_boolean(at):
         spark_type = BooleanType()
-    elif at == pa.int8():
+    elif types.is_int8(at):
         spark_type = ByteType()
-    elif at == pa.int16():
+    elif types.is_int16(at):
         spark_type = ShortType()
-    elif at == pa.int32():
+    elif types.is_int32(at):
         spark_type = IntegerType()
-    elif at == pa.int64():
+    elif types.is_int64(at):
         spark_type = LongType()
-    elif at == pa.float32():
+    elif types.is_float32(at):
         spark_type = FloatType()
-    elif at == pa.float64():
+    elif types.is_float64(at):
         spark_type = DoubleType()
-    elif type(at) == pa.DecimalType:
+    elif types.is_decimal(at):
         spark_type = DecimalType(precision=at.precision, scale=at.scale)
-    elif at == pa.string():
+    elif types.is_string(at):
         spark_type = StringType()
-    elif at == pa.date32():
+    elif types.is_date32(at):
         spark_type = DateType()
-    elif type(at) == pa.TimestampType:
+    elif types.is_timestamp(at):
         spark_type = TimestampType()
+    elif types.is_list(at):
+        spark_type = ArrayType(from_arrow_type(at.value_type))
     else:
         raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
     return spark_type
@@ -1679,13 +1694,6 @@ def from_arrow_schema(arrow_schema):
          for field in arrow_schema])
 
 
-def _old_pandas_exception_message(e):
-    """ Create an error message for importing old Pandas.
-    """
-    msg = "note: Pandas (>=0.19.2) must be installed and available on calling Python process"
-    return "%s\n%s" % (_exception_message(e), msg)
-
-
 def _check_dataframe_localize_timestamps(pdf, timezone):
     """
     Convert timezone aware timestamps to timezone-naive in the specified timezone or local timezone
@@ -1694,10 +1702,10 @@ def _check_dataframe_localize_timestamps(pdf, timezone):
     :param timezone: the timezone to convert. if None then use local timezone
     :return pandas.DataFrame where any timezone aware columns have been converted to tz-naive
     """
-    try:
-        from pandas.api.types import is_datetime64tz_dtype
-    except ImportError as e:
-        raise ImportError(_old_pandas_exception_message(e))
+    from pyspark.sql.utils import require_minimum_pandas_version
+    require_minimum_pandas_version()
+
+    from pandas.api.types import is_datetime64tz_dtype
     tz = timezone or 'tzlocal()'
     for column, series in pdf.iteritems():
         # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
@@ -1715,10 +1723,10 @@ def _check_series_convert_timestamps_internal(s, timezone):
     :param timezone: the timezone to convert. if None then use local timezone
     :return pandas.Series where if it is a timestamp, has been UTC normalized without a time zone
     """
-    try:
-        from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
-    except ImportError as e:
-        raise ImportError(_old_pandas_exception_message(e))
+    from pyspark.sql.utils import require_minimum_pandas_version
+    require_minimum_pandas_version()
+
+    from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
     # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
     if is_datetime64_dtype(s.dtype):
         tz = timezone or 'tzlocal()'
@@ -1738,11 +1746,11 @@ def _check_series_convert_timestamps_localize(s, from_timezone, to_timezone):
     :param to_timezone: the timezone to convert to. if None then use local timezone
     :return pandas.Series where if it is a timestamp, has been converted to tz-naive
     """
-    try:
-        import pandas as pd
-        from pandas.api.types import is_datetime64tz_dtype, is_datetime64_dtype
-    except ImportError as e:
-        raise ImportError(_old_pandas_exception_message(e))
+    from pyspark.sql.utils import require_minimum_pandas_version
+    require_minimum_pandas_version()
+
+    import pandas as pd
+    from pandas.api.types import is_datetime64tz_dtype, is_datetime64_dtype
     from_tz = from_timezone or 'tzlocal()'
     to_tz = to_timezone or 'tzlocal()'
     # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index d98ecc3e86a2a..7f7be9d339b9f 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -34,26 +34,31 @@ def _wrap_function(sc, func, returnType):
 
 
 def _create_udf(f, returnType, evalType):
-    if evalType == PythonEvalType.SQL_PANDAS_SCALAR_UDF:
+
+    if evalType == PythonEvalType.SQL_PANDAS_SCALAR_UDF or \
+            evalType == PythonEvalType.SQL_PANDAS_GROUP_MAP_UDF:
         import inspect
+        from pyspark.sql.utils import require_minimum_pyarrow_version
+
+        require_minimum_pyarrow_version()
         argspec = inspect.getargspec(f)
-        if len(argspec.args) == 0 and argspec.varargs is None:
+
+        if evalType == PythonEvalType.SQL_PANDAS_SCALAR_UDF and len(argspec.args) == 0 and \
+                argspec.varargs is None:
             raise ValueError(
                 "Invalid function: 0-arg pandas_udfs are not supported. "
                 "Instead, create a 1-arg pandas_udf and ignore the arg in your function."
             )
 
-    elif evalType == PythonEvalType.SQL_PANDAS_GROUP_MAP_UDF:
-        import inspect
-        argspec = inspect.getargspec(f)
-        if len(argspec.args) != 1:
+        if evalType == PythonEvalType.SQL_PANDAS_GROUP_MAP_UDF and len(argspec.args) != 1:
             raise ValueError(
                 "Invalid function: pandas_udfs with function type GROUP_MAP "
                 "must take a single arg that is a pandas DataFrame."
             )
 
     # Set the name of the UserDefinedFunction object to be the name of function f
-    udf_obj = UserDefinedFunction(f, returnType=returnType, name=None, evalType=evalType)
+    udf_obj = UserDefinedFunction(
+        f, returnType=returnType, name=None, evalType=evalType, deterministic=True)
     return udf_obj._wrapped()
 
 
@@ -64,8 +69,10 @@ class UserDefinedFunction(object):
     .. versionadded:: 1.3
     """
     def __init__(self, func,
-                 returnType=StringType(), name=None,
-                 evalType=PythonEvalType.SQL_BATCHED_UDF):
+                 returnType=StringType(),
+                 name=None,
+                 evalType=PythonEvalType.SQL_BATCHED_UDF,
+                 deterministic=True):
         if not callable(func):
             raise TypeError(
                 "Invalid function: not a function or callable (__call__ is not defined): "
@@ -89,6 +96,7 @@ def __init__(self, func,
             func.__name__ if hasattr(func, '__name__')
             else func.__class__.__name__)
         self.evalType = evalType
+        self.deterministic = deterministic
 
     @property
     def returnType(self):
@@ -126,7 +134,7 @@ def _create_judf(self):
         wrapped_func = _wrap_function(sc, self.func, self.returnType)
         jdt = spark._jsparkSession.parseDataType(self.returnType.json())
         judf = sc._jvm.org.apache.spark.sql.execution.python.UserDefinedPythonFunction(
-            self._name, wrapped_func, jdt, self.evalType)
+            self._name, wrapped_func, jdt, self.evalType, self.deterministic)
         return judf
 
     def __call__(self, *cols):
@@ -134,6 +142,9 @@ def __call__(self, *cols):
         sc = SparkContext._active_spark_context
         return Column(judf.apply(_to_seq(sc, cols, _to_java_column)))
 
+    # This function is for improving the online help system in the interactive interpreter.
+    # For example, the built-in help / pydoc.help. It wraps the UDF with the docstring and
+    # argument annotation. (See: SPARK-19161)
     def _wrapped(self):
         """
         Wrap this udf with a function and attach docstring from func
@@ -158,5 +169,16 @@ def wrapper(*args):
         wrapper.func = self.func
         wrapper.returnType = self.returnType
         wrapper.evalType = self.evalType
-
+        wrapper.deterministic = self.deterministic
+        wrapper.asNondeterministic = functools.wraps(
+            self.asNondeterministic)(lambda: self.asNondeterministic()._wrapped())
         return wrapper
+
+    def asNondeterministic(self):
+        """
+        Updates UserDefinedFunction to nondeterministic.
+
+        .. versionadded:: 2.3
+        """
+        self.deterministic = False
+        return self
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index 7bc6a59ad3b26..08c34c6dccc5e 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -110,3 +110,23 @@ def toJArray(gateway, jtype, arr):
     for i in range(0, len(arr)):
         jarr[i] = arr[i]
     return jarr
+
+
+def require_minimum_pandas_version():
+    """ Raise ImportError if minimum version of Pandas is not installed
+    """
+    from distutils.version import LooseVersion
+    import pandas
+    if LooseVersion(pandas.__version__) < LooseVersion('0.19.2'):
+        raise ImportError("Pandas >= 0.19.2 must be installed on calling Python process; "
+                          "however, your version was %s." % pandas.__version__)
+
+
+def require_minimum_pyarrow_version():
+    """ Raise ImportError if minimum version of pyarrow is not installed
+    """
+    from distutils.version import LooseVersion
+    import pyarrow
+    if LooseVersion(pyarrow.__version__) < LooseVersion('0.8.0'):
+        raise ImportError("pyarrow >= 0.8.0 must be installed on calling Python process; "
+                          "however, your version was %s." % pyarrow.__version__)
diff --git a/python/pyspark/streaming/flume.py b/python/pyspark/streaming/flume.py
index 5a975d050b0d8..5de448114ece8 100644
--- a/python/pyspark/streaming/flume.py
+++ b/python/pyspark/streaming/flume.py
@@ -20,6 +20,8 @@
     from io import BytesIO
 else:
     from StringIO import StringIO
+import warnings
+
 from py4j.protocol import Py4JJavaError
 
 from pyspark.storagelevel import StorageLevel
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 12dd53b9d2902..b9c2c4ced71d5 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.3.0.dev0"
+__version__ = "2.4.0.dev0"
diff --git a/python/setup.py b/python/setup.py
index 310670e697a83..251d4526d4dd0 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -201,7 +201,7 @@ def _supports_symlinks():
         extras_require={
             'ml': ['numpy>=1.7'],
             'mllib': ['numpy>=1.7'],
-            'sql': ['pandas>=0.19.2']
+            'sql': ['pandas>=0.19.2', 'pyarrow>=0.8.0']
         },
         classifiers=[
             'Development Status :: 5 - Production/Stable',
@@ -210,6 +210,7 @@ def _supports_symlinks():
             'Programming Language :: Python :: 3',
             'Programming Language :: Python :: 3.4',
             'Programming Language :: Python :: 3.5',
+            'Programming Language :: Python :: 3.6',
             'Programming Language :: Python :: Implementation :: CPython',
             'Programming Language :: Python :: Implementation :: PyPy']
     )
diff --git a/repl/pom.xml b/repl/pom.xml
index 1cb0098d0eca3..6f4a863c48bc7 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
new file mode 100644
index 0000000000000..724ce9af49f77
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import scala.tools.nsc.interpreter.{ExprTyper, IR}
+
+trait SparkExprTyper extends ExprTyper {
+
+  import repl._
+  import global.{reporter => _, Import => _, _}
+  import naming.freshInternalVarName
+
+  def doInterpret(code: String): IR.Result = {
+    // interpret/interpretSynthetic may change the phase,
+    // which would have unintended effects on types.
+    val savedPhase = phase
+    try interpretSynthetic(code) finally phase = savedPhase
+  }
+
+  override def symbolOfLine(code: String): Symbol = {
+    def asExpr(): Symbol = {
+      val name = freshInternalVarName()
+      // Typing it with a lazy val would give us the right type, but runs
+      // into compiler bugs with things like existentials, so we compile it
+      // behind a def and strip the NullaryMethodType which wraps the expr.
+      val line = "def " + name + " = " + code
+
+      doInterpret(line) match {
+        case IR.Success =>
+          val sym0 = symbolOfTerm(name)
+          // drop NullaryMethodType
+          sym0.cloneSymbol setInfo exitingTyper(sym0.tpe_*.finalResultType)
+        case _ => NoSymbol
+      }
+    }
+
+    def asDefn(): Symbol = {
+      val old = repl.definedSymbolList.toSet
+
+      doInterpret(code) match {
+        case IR.Success =>
+          repl.definedSymbolList filterNot old match {
+            case Nil => NoSymbol
+            case sym :: Nil => sym
+            case syms => NoSymbol.newOverloaded(NoPrefix, syms)
+          }
+        case _ => NoSymbol
+      }
+    }
+
+    def asError(): Symbol = {
+      doInterpret(code)
+      NoSymbol
+    }
+
+    beSilentDuring(asExpr()) orElse beSilentDuring(asDefn()) orElse asError()
+  }
+
+}
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index 3ce7cc7c85f74..e69441a475e9a 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -35,6 +35,10 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter)
   def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out)
   def this() = this(None, new JPrintWriter(Console.out, true))
 
+  override def createInterpreter(): Unit = {
+    intp = new SparkILoopInterpreter(settings, out)
+  }
+
   val initializationCommands: Seq[String] = Seq(
     """
     @transient val spark = if (org.apache.spark.repl.Main.sparkSession != null) {
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoopInterpreter.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoopInterpreter.scala
new file mode 100644
index 0000000000000..e736607a9a6b9
--- /dev/null
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoopInterpreter.scala
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import scala.collection.mutable
+import scala.tools.nsc.Settings
+import scala.tools.nsc.interpreter._
+
+class SparkILoopInterpreter(settings: Settings, out: JPrintWriter) extends IMain(settings, out) {
+  self =>
+
+  override lazy val memberHandlers = new {
+    val intp: self.type = self
+  } with MemberHandlers {
+    import intp.global._
+
+    override def chooseHandler(member: intp.global.Tree): MemberHandler = member match {
+      case member: Import => new SparkImportHandler(member)
+      case _ => super.chooseHandler(member)
+    }
+
+    class SparkImportHandler(imp: Import) extends ImportHandler(imp: Import) {
+
+      override def targetType: Type = intp.global.rootMirror.getModuleIfDefined("" + expr) match {
+        case NoSymbol => intp.typeOfExpression("" + expr)
+        case sym => sym.tpe
+      }
+
+      private def safeIndexOf(name: Name, s: String): Int = fixIndexOf(name, pos(name, s))
+      private def fixIndexOf(name: Name, idx: Int): Int = if (idx == name.length) -1 else idx
+      private def pos(name: Name, s: String): Int = {
+        var i = name.pos(s.charAt(0), 0)
+        val sLen = s.length()
+        if (sLen == 1) return i
+        while (i + sLen <= name.length) {
+          var j = 1
+          while (s.charAt(j) == name.charAt(i + j)) {
+            j += 1
+            if (j == sLen) return i
+          }
+          i = name.pos(s.charAt(0), i + 1)
+        }
+        name.length
+      }
+
+      private def isFlattenedSymbol(sym: Symbol): Boolean =
+        sym.owner.isPackageClass &&
+          sym.name.containsName(nme.NAME_JOIN_STRING) &&
+          sym.owner.info.member(sym.name.take(
+            safeIndexOf(sym.name, nme.NAME_JOIN_STRING))) != NoSymbol
+
+      private def importableTargetMembers =
+        importableMembers(exitingTyper(targetType)).filterNot(isFlattenedSymbol).toList
+
+      def isIndividualImport(s: ImportSelector): Boolean =
+        s.name != nme.WILDCARD && s.rename != nme.WILDCARD
+      def isWildcardImport(s: ImportSelector): Boolean =
+        s.name == nme.WILDCARD
+
+      // non-wildcard imports
+      private def individualSelectors = selectors filter isIndividualImport
+
+      override val importsWildcard: Boolean = selectors exists isWildcardImport
+
+      lazy val importableSymbolsWithRenames: List[(Symbol, Name)] = {
+        val selectorRenameMap =
+          individualSelectors.flatMap(x => x.name.bothNames zip x.rename.bothNames).toMap
+        importableTargetMembers flatMap (m => selectorRenameMap.get(m.name) map (m -> _))
+      }
+
+      override lazy val individualSymbols: List[Symbol] = importableSymbolsWithRenames map (_._1)
+      override lazy val wildcardSymbols: List[Symbol] =
+        if (importsWildcard) importableTargetMembers else Nil
+
+    }
+
+  }
+
+  object expressionTyper extends {
+    val repl: SparkILoopInterpreter.this.type = self
+  } with SparkExprTyper { }
+
+  override def symbolOfLine(code: String): global.Symbol =
+    expressionTyper.symbolOfLine(code)
+
+  override def typeOfExpression(expr: String, silent: Boolean): global.Type =
+    expressionTyper.typeOfExpression(expr, silent)
+
+
+  import global.Name
+  override def importsCode(wanted: Set[Name], wrapper: Request#Wrapper,
+                           definesClass: Boolean, generousImports: Boolean): ComputedImports = {
+
+    import global._
+    import definitions.{ ObjectClass, ScalaPackage, JavaLangPackage, PredefModule }
+    import memberHandlers._
+
+    val header, code, trailingBraces, accessPath = new StringBuilder
+    val currentImps = mutable.HashSet[Name]()
+    // only emit predef import header if name not resolved in history, loosely
+    var predefEscapes = false
+
+    /**
+     * Narrow down the list of requests from which imports
+     * should be taken.  Removes requests which cannot contribute
+     * useful imports for the specified set of wanted names.
+     */
+    case class ReqAndHandler(req: Request, handler: MemberHandler)
+
+    def reqsToUse: List[ReqAndHandler] = {
+      /**
+       * Loop through a list of MemberHandlers and select which ones to keep.
+       * 'wanted' is the set of names that need to be imported.
+       */
+      def select(reqs: List[ReqAndHandler], wanted: Set[Name]): List[ReqAndHandler] = {
+        // Single symbol imports might be implicits! See bug #1752.  Rather than
+        // try to finesse this, we will mimic all imports for now.
+        def keepHandler(handler: MemberHandler) = handler match {
+          // While defining classes in class based mode - implicits are not needed.
+          case h: ImportHandler if isClassBased && definesClass =>
+            h.importedNames.exists(x => wanted.contains(x))
+          case _: ImportHandler => true
+          case x if generousImports => x.definesImplicit ||
+            (x.definedNames exists (d => wanted.exists(w => d.startsWith(w))))
+          case x => x.definesImplicit ||
+            (x.definedNames exists wanted)
+        }
+
+        reqs match {
+          case Nil =>
+            predefEscapes = wanted contains PredefModule.name ; Nil
+          case rh :: rest if !keepHandler(rh.handler) => select(rest, wanted)
+          case rh :: rest =>
+            import rh.handler._
+            val augment = rh match {
+              case ReqAndHandler(_, _: ImportHandler) => referencedNames
+              case _ => Nil
+            }
+            val newWanted = wanted ++ augment -- definedNames -- importedNames
+            rh :: select(rest, newWanted)
+        }
+      }
+
+      /** Flatten the handlers out and pair each with the original request */
+      select(allReqAndHandlers reverseMap { case (r, h) => ReqAndHandler(r, h) }, wanted).reverse
+    }
+
+    // add code for a new object to hold some imports
+    def addWrapper() {
+      import nme.{ INTERPRETER_IMPORT_WRAPPER => iw }
+      code append (wrapper.prewrap format iw)
+      trailingBraces append wrapper.postwrap
+      accessPath append s".$iw"
+      currentImps.clear()
+    }
+
+    def maybeWrap(names: Name*) = if (names exists currentImps) addWrapper()
+
+    def wrapBeforeAndAfter[T](op: => T): T = {
+      addWrapper()
+      try op finally addWrapper()
+    }
+
+    // imports from Predef are relocated to the template header to allow hiding.
+    def checkHeader(h: ImportHandler) = h.referencedNames contains PredefModule.name
+
+    // loop through previous requests, adding imports for each one
+    wrapBeforeAndAfter {
+      // Reusing a single temporary value when import from a line with multiple definitions.
+      val tempValLines = mutable.Set[Int]()
+      for (ReqAndHandler(req, handler) <- reqsToUse) {
+        val objName = req.lineRep.readPathInstance
+        handler match {
+          case h: ImportHandler if checkHeader(h) =>
+            header.clear()
+            header append f"${h.member}%n"
+          // If the user entered an import, then just use it; add an import wrapping
+          // level if the import might conflict with some other import
+          case x: ImportHandler if x.importsWildcard =>
+            wrapBeforeAndAfter(code append (x.member + "\n"))
+          case x: ImportHandler =>
+            maybeWrap(x.importedNames: _*)
+            code append (x.member + "\n")
+            currentImps ++= x.importedNames
+
+          case x if isClassBased =>
+            for (sym <- x.definedSymbols) {
+              maybeWrap(sym.name)
+              x match {
+                case _: ClassHandler =>
+                  code.append(s"import ${objName}${req.accessPath}.`${sym.name}`\n")
+                case _ =>
+                  val valName = s"${req.lineRep.packageName}${req.lineRep.readName}"
+                  if (!tempValLines.contains(req.lineRep.lineId)) {
+                    code.append(s"val $valName: ${objName}.type = $objName\n")
+                    tempValLines += req.lineRep.lineId
+                  }
+                  code.append(s"import ${valName}${req.accessPath}.`${sym.name}`\n")
+              }
+              currentImps += sym.name
+            }
+          // For other requests, import each defined name.
+          // import them explicitly instead of with _, so that
+          // ambiguity errors will not be generated. Also, quote
+          // the name of the variable, so that we don't need to
+          // handle quoting keywords separately.
+          case x =>
+            for (sym <- x.definedSymbols) {
+              maybeWrap(sym.name)
+              code append s"import ${x.path}\n"
+              currentImps += sym.name
+            }
+        }
+      }
+    }
+
+    val computedHeader = if (predefEscapes) header.toString else ""
+    ComputedImports(computedHeader, code.toString, trailingBraces.toString, accessPath.toString)
+  }
+
+  private def allReqAndHandlers =
+    prevRequestList flatMap (req => req.handlers map (req -> _))
+
+}
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 905b41cdc1594..cdd5cdd841740 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -227,4 +227,35 @@ class ReplSuite extends SparkFunSuite {
     assertDoesNotContain("error: not found: value sc", output)
   }
 
+  test("spark-shell should find imported types in class constructors and extends clause") {
+    val output = runInterpreter("local",
+      """
+        |import org.apache.spark.Partition
+        |class P(p: Partition)
+        |class P(val index: Int) extends Partition
+      """.stripMargin)
+    assertDoesNotContain("error: not found: type Partition", output)
+  }
+
+  test("spark-shell should shadow val/def definitions correctly") {
+    val output1 = runInterpreter("local",
+      """
+        |def myMethod() = "first definition"
+        |val tmp = myMethod(); val out = tmp
+        |def myMethod() = "second definition"
+        |val tmp = myMethod(); val out = s"$tmp aabbcc"
+      """.stripMargin)
+    assertContains("second definition aabbcc", output1)
+
+    val output2 = runInterpreter("local",
+      """
+        |val a = 1
+        |val b = a; val c = b;
+        |val a = 2
+        |val b = a; val c = b;
+        |s"!!$b!!"
+      """.stripMargin)
+    assertContains("!!2!!", output2)
+  }
+
 }
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index a4b18c527c969..2240d0e84eb5c 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
@@ -29,7 +29,7 @@
   <name>Spark Project Kubernetes</name>
   <properties>
     <sbt.project.name>kubernetes</sbt.project.name>
-    <kubernetes.client.version>2.2.13</kubernetes.client.version>
+    <kubernetes.client.version>3.0.0</kubernetes.client.version>
   </properties>
 
   <dependencies>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
index c645b008d736d..fdc6e0a61f73a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
@@ -109,7 +109,7 @@ private[k8s] class LoggingPodStatusWatcherImpl(
       ("namespace", pod.getMetadata.getNamespace()),
       ("labels", pod.getMetadata.getLabels().asScala.mkString(", ")),
       ("pod uid", pod.getMetadata.getUid),
-      ("creation time", pod.getMetadata.getCreationTimestamp()),
+      ("creation time", pod.getMetadata.getCreationTimestamp.getTime),
 
       // spec details
       ("service account name", pod.getSpec.getServiceAccountName()),
@@ -117,7 +117,7 @@ private[k8s] class LoggingPodStatusWatcherImpl(
       ("node name", pod.getSpec.getNodeName()),
 
       // status
-      ("start time", pod.getStatus.getStartTime),
+      ("start time", pod.getStatus.getStartTime.getTime),
       ("container images",
         pod.getStatus.getContainerStatuses()
           .asScala
@@ -162,7 +162,7 @@ private[k8s] class LoggingPodStatusWatcherImpl(
           case running: ContainerStateRunning =>
             Seq(
               ("Container state", "Running"),
-              ("Container started at", running.getStartedAt))
+              ("Container started at", running.getStartedAt.getTime))
           case waiting: ContainerStateWaiting =>
             Seq(
               ("Container state", "Waiting"),
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 1e97ece88de1e..6c70c6509c208 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -487,4 +487,3 @@ private object KubernetesClusterSchedulerBackend {
       " Consider boosting spark executor memory overhead."
   }
 }
-
diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml
index 85827a17a963f..80951d691c644 100644
--- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml
+++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml
index c4fe21604fb44..91a7482546419 100644
--- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml
+++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml
index db5ec86e0dfd8..eab95c5806eb8 100644
--- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml
+++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 2f1587e0d6bb4..58b9dd00e5294 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index de8f1c913651d..3995d0afeb5f4 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -29,7 +29,7 @@
   <name>Spark Project Mesos</name>
   <properties>
     <sbt.project.name>mesos</sbt.project.name>
-    <mesos.version>1.3.0</mesos.version>
+    <mesos.version>1.4.0</mesos.version>
     <mesos.classifier>shaded-protobuf</mesos.classifier>
   </properties>
 
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
index ff60b88c6d533..68f6921153d89 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
@@ -77,10 +77,17 @@ private[mesos] class MesosSubmitRequestServlet(
   private def buildDriverDescription(request: CreateSubmissionRequest): MesosDriverDescription = {
     // Required fields, including the main class because python is not yet supported
     val appResource = Option(request.appResource).getOrElse {
-      throw new SubmitRestMissingFieldException("Application jar is missing.")
+      throw new SubmitRestMissingFieldException("Application jar 'appResource' is missing.")
     }
     val mainClass = Option(request.mainClass).getOrElse {
-      throw new SubmitRestMissingFieldException("Main class is missing.")
+      throw new SubmitRestMissingFieldException("Main class 'mainClass' is missing.")
+    }
+    val appArgs = Option(request.appArgs).getOrElse {
+      throw new SubmitRestMissingFieldException("Application arguments 'appArgs' are missing.")
+    }
+    val environmentVariables = Option(request.environmentVariables).getOrElse {
+      throw new SubmitRestMissingFieldException("Environment variables 'environmentVariables' " +
+        "are missing.")
     }
 
     // Optional fields
@@ -91,8 +98,6 @@ private[mesos] class MesosSubmitRequestServlet(
     val superviseDriver = sparkProperties.get("spark.driver.supervise")
     val driverMemory = sparkProperties.get("spark.driver.memory")
     val driverCores = sparkProperties.get("spark.driver.cores")
-    val appArgs = request.appArgs
-    val environmentVariables = request.environmentVariables
     val name = request.sparkProperties.getOrElse("spark.app.name", mainClass)
 
     // Construct driver description
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index c41283e4a3e39..d224a7325820a 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -36,7 +36,6 @@ import org.apache.spark.deploy.rest.{CreateSubmissionResponse, KillSubmissionRes
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.util.Utils
 
-
 /**
  * Tracks the current state of a Mesos Task that runs a Spark driver.
  * @param driverDescription Submitted driver description from
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index c392061fdb358..53f5f61cca486 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -92,6 +92,8 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   private[this] var stopCalled: Boolean = false
 
   private val launcherBackend = new LauncherBackend() {
+    override protected def conf: SparkConf = sc.conf
+
     override protected def onStopRequest(): Unit = {
       stopSchedulerBackend()
       setState(SparkAppHandle.State.KILLED)
@@ -400,13 +402,20 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
       val offerMem = getResource(offer.getResourcesList, "mem")
       val offerCpus = getResource(offer.getResourcesList, "cpus")
       val offerPorts = getRangeResource(offer.getResourcesList, "ports")
+      val offerReservationInfo = offer
+        .getResourcesList
+        .asScala
+        .find { r => r.getReservation != null }
       val id = offer.getId.getValue
 
       if (tasks.contains(offer.getId)) { // accept
         val offerTasks = tasks(offer.getId)
 
         logDebug(s"Accepting offer: $id with attributes: $offerAttributes " +
-          s"mem: $offerMem cpu: $offerCpus ports: $offerPorts." +
+          offerReservationInfo.map(resInfo =>
+            s"reservation info: ${resInfo.getReservation.toString}").getOrElse("") +
+          s"mem: $offerMem cpu: $offerCpus ports: $offerPorts " +
+          s"resources: ${offer.getResourcesList.asScala.mkString(",")}." +
           s"  Launching ${offerTasks.size} Mesos tasks.")
 
         for (task <- offerTasks) {
@@ -416,7 +425,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
           val ports = getRangeResource(task.getResourcesList, "ports").mkString(",")
 
           logDebug(s"Launching Mesos task: ${taskId.getValue} with mem: $mem cpu: $cpus" +
-            s" ports: $ports")
+            s" ports: $ports" + s" on slave with slave id: ${task.getSlaveId.getValue} ")
         }
 
         driver.launchTasks(
@@ -431,7 +440,8 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
       } else {
         declineOffer(
           driver,
-          offer)
+          offer,
+          Some("Offer was declined due to unmet task launch constraints."))
       }
     }
   }
@@ -513,6 +523,9 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
             totalGpusAcquired += taskGPUs
             gpusByTaskId(taskId) = taskGPUs
           }
+        } else {
+          logDebug(s"Cannot launch a task for offer with id: $offerId on slave " +
+            s"with id: $slaveId. Requirements were not met for this offer.")
         }
       }
     }
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
index 6fcb30af8a733..e75450369ad85 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
@@ -28,7 +28,8 @@ import com.google.common.base.Splitter
 import org.apache.mesos.{MesosSchedulerDriver, Protos, Scheduler, SchedulerDriver}
 import org.apache.mesos.Protos.{TaskState => MesosTaskState, _}
 import org.apache.mesos.Protos.FrameworkInfo.Capability
-import org.apache.mesos.protobuf.{ByteString, GeneratedMessage}
+import org.apache.mesos.Protos.Resource.ReservationInfo
+import org.apache.mesos.protobuf.{ByteString, GeneratedMessageV3}
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.TaskState
@@ -36,8 +37,6 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.util.Utils
 
-
-
 /**
  * Shared trait for implementing a Mesos Scheduler. This holds common state and helper
  * methods and Mesos scheduler will use.
@@ -46,6 +45,8 @@ trait MesosSchedulerUtils extends Logging {
   // Lock used to wait for scheduler to be registered
   private final val registerLatch = new CountDownLatch(1)
 
+  private final val ANY_ROLE = "*"
+
   /**
    * Creates a new MesosSchedulerDriver that communicates to the Mesos master.
    *
@@ -175,17 +176,36 @@ trait MesosSchedulerUtils extends Logging {
     registerLatch.countDown()
   }
 
-  def createResource(name: String, amount: Double, role: Option[String] = None): Resource = {
+  private def setReservationInfo(
+       reservationInfo: Option[ReservationInfo],
+       role: Option[String],
+       builder: Resource.Builder): Unit = {
+    if (!role.contains(ANY_ROLE)) {
+      reservationInfo.foreach { res => builder.setReservation(res) }
+    }
+  }
+
+  def createResource(
+       name: String,
+       amount: Double,
+       role: Option[String] = None,
+       reservationInfo: Option[ReservationInfo] = None): Resource = {
     val builder = Resource.newBuilder()
       .setName(name)
       .setType(Value.Type.SCALAR)
       .setScalar(Value.Scalar.newBuilder().setValue(amount).build())
-
     role.foreach { r => builder.setRole(r) }
-
+    setReservationInfo(reservationInfo, role, builder)
     builder.build()
   }
 
+  private def getReservation(resource: Resource): Option[ReservationInfo] = {
+    if (resource.hasReservation) {
+      Some(resource.getReservation)
+    } else {
+      None
+    }
+  }
   /**
    * Partition the existing set of resources into two groups, those remaining to be
    * scheduled and those requested to be used for a new task.
@@ -203,14 +223,17 @@ trait MesosSchedulerUtils extends Logging {
     var requestedResources = new ArrayBuffer[Resource]
     val remainingResources = resources.asScala.map {
       case r =>
+        val reservation = getReservation(r)
         if (remain > 0 &&
           r.getType == Value.Type.SCALAR &&
           r.getScalar.getValue > 0.0 &&
           r.getName == resourceName) {
           val usage = Math.min(remain, r.getScalar.getValue)
-          requestedResources += createResource(resourceName, usage, Some(r.getRole))
+          requestedResources += createResource(resourceName, usage,
+            Option(r.getRole), reservation)
           remain -= usage
-          createResource(resourceName, r.getScalar.getValue - usage, Some(r.getRole))
+          createResource(resourceName, r.getScalar.getValue - usage,
+            Option(r.getRole), reservation)
         } else {
           r
         }
@@ -228,16 +251,6 @@ trait MesosSchedulerUtils extends Logging {
     (attr.getName, attr.getText.getValue.split(',').toSet)
   }
 
-
-  /** Build a Mesos resource protobuf object */
-  protected def createResource(resourceName: String, quantity: Double): Protos.Resource = {
-    Resource.newBuilder()
-      .setName(resourceName)
-      .setType(Value.Type.SCALAR)
-      .setScalar(Value.Scalar.newBuilder().setValue(quantity).build())
-      .build()
-  }
-
   /**
    * Converts the attributes from the resource offer into a Map of name to Attribute Value
    * The attribute values are the mesos attribute types and they are
@@ -245,7 +258,8 @@ trait MesosSchedulerUtils extends Logging {
    * @param offerAttributes the attributes offered
    * @return
    */
-  protected def toAttributeMap(offerAttributes: JList[Attribute]): Map[String, GeneratedMessage] = {
+  protected def toAttributeMap(offerAttributes: JList[Attribute])
+    : Map[String, GeneratedMessageV3] = {
     offerAttributes.asScala.map { attr =>
       val attrValue = attr.getType match {
         case Value.Type.SCALAR => attr.getScalar
@@ -266,7 +280,7 @@ trait MesosSchedulerUtils extends Logging {
    */
   def matchesAttributeRequirements(
       slaveOfferConstraints: Map[String, Set[String]],
-      offerAttributes: Map[String, GeneratedMessage]): Boolean = {
+      offerAttributes: Map[String, GeneratedMessageV3]): Boolean = {
     slaveOfferConstraints.forall {
       // offer has the required attribute and subsumes the required values for that attribute
       case (name, requiredValues) =>
@@ -427,10 +441,10 @@ trait MesosSchedulerUtils extends Logging {
       // partition port offers
       val (resourcesWithoutPorts, portResources) = filterPortResources(offeredResources)
 
-      val portsAndRoles = requestedPorts.
-        map(x => (x, findPortAndGetAssignedRangeRole(x, portResources)))
+      val portsAndResourceInfo = requestedPorts.
+        map { x => (x, findPortAndGetAssignedResourceInfo(x, portResources)) }
 
-      val assignedPortResources = createResourcesFromPorts(portsAndRoles)
+      val assignedPortResources = createResourcesFromPorts(portsAndResourceInfo)
 
       // ignore non-assigned port resources, they will be declined implicitly by mesos
       // no need for splitting port resources.
@@ -450,16 +464,25 @@ trait MesosSchedulerUtils extends Logging {
     managedPortNames.map(conf.getLong(_, 0)).filter( _ != 0)
   }
 
+  private case class RoleResourceInfo(
+      role: String,
+      resInfo: Option[ReservationInfo])
+
   /** Creates a mesos resource for a specific port number. */
-  private def createResourcesFromPorts(portsAndRoles: List[(Long, String)]) : List[Resource] = {
-    portsAndRoles.flatMap{ case (port, role) =>
-      createMesosPortResource(List((port, port)), Some(role))}
+  private def createResourcesFromPorts(
+      portsAndResourcesInfo: List[(Long, RoleResourceInfo)])
+    : List[Resource] = {
+    portsAndResourcesInfo.flatMap { case (port, rInfo) =>
+      createMesosPortResource(List((port, port)), Option(rInfo.role), rInfo.resInfo)}
   }
 
   /** Helper to create mesos resources for specific port ranges. */
   private def createMesosPortResource(
       ranges: List[(Long, Long)],
-      role: Option[String] = None): List[Resource] = {
+      role: Option[String] = None,
+      reservationInfo: Option[ReservationInfo] = None): List[Resource] = {
+    // for ranges we are going to use (user defined ports fall in there) create mesos resources
+    // for each range there is a role associated with it.
     ranges.map { case (rangeStart, rangeEnd) =>
       val rangeValue = Value.Range.newBuilder()
         .setBegin(rangeStart)
@@ -468,7 +491,8 @@ trait MesosSchedulerUtils extends Logging {
         .setName("ports")
         .setType(Value.Type.RANGES)
         .setRanges(Value.Ranges.newBuilder().addRange(rangeValue))
-      role.foreach(r => builder.setRole(r))
+      role.foreach { r => builder.setRole(r) }
+      setReservationInfo(reservationInfo, role, builder)
       builder.build()
     }
   }
@@ -477,19 +501,21 @@ trait MesosSchedulerUtils extends Logging {
   * Helper to assign a port to an offered range and get the latter's role
   * info to use it later on.
   */
-  private def findPortAndGetAssignedRangeRole(port: Long, portResources: List[Resource])
-    : String = {
+  private def findPortAndGetAssignedResourceInfo(port: Long, portResources: List[Resource])
+    : RoleResourceInfo = {
 
     val ranges = portResources.
-      map(resource =>
-        (resource.getRole, resource.getRanges.getRangeList.asScala
-          .map(r => (r.getBegin, r.getEnd)).toList))
+      map { resource =>
+        val reservation = getReservation(resource)
+        (RoleResourceInfo(resource.getRole, reservation),
+          resource.getRanges.getRangeList.asScala.map(r => (r.getBegin, r.getEnd)).toList)
+      }
 
-    val rangePortRole = ranges
-      .find { case (role, rangeList) => rangeList
+    val rangePortResourceInfo = ranges
+      .find { case (resourceInfo, rangeList) => rangeList
         .exists{ case (rangeStart, rangeEnd) => rangeStart <= port & rangeEnd >= port}}
     // this is safe since we have previously checked about the ranges (see checkPorts method)
-    rangePortRole.map{ case (role, rangeList) => role}.get
+    rangePortResourceInfo.map{ case (resourceInfo, rangeList) => resourceInfo}.get
   }
 
   /** Retrieves the port resources from a list of mesos offered resources */
@@ -564,3 +590,4 @@ trait MesosSchedulerUtils extends Logging {
     }
   }
 }
+
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 43a7ce95bd3de..37e25ceecb883 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index ca0aa0ea3bc73..4d5e3bb043671 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -56,11 +56,28 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
   // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
   // optimal as more containers are available. Might need to handle this better.
 
-  private val sparkConf = new SparkConf()
-  private val yarnConf: YarnConfiguration = SparkHadoopUtil.get.newConfiguration(sparkConf)
-    .asInstanceOf[YarnConfiguration]
   private val isClusterMode = args.userClass != null
 
+  private val sparkConf = new SparkConf()
+  if (args.propertiesFile != null) {
+    Utils.getPropertiesFromFile(args.propertiesFile).foreach { case (k, v) =>
+      sparkConf.set(k, v)
+    }
+  }
+
+  private val securityMgr = new SecurityManager(sparkConf)
+
+  // Set system properties for each config entry. This covers two use cases:
+  // - The default configuration stored by the SparkHadoopUtil class
+  // - The user application creating a new SparkConf in cluster mode
+  //
+  // Both cases create a new SparkConf object which reads these configs from system properties.
+  sparkConf.getAll.foreach { case (k, v) =>
+    sys.props(k) = v
+  }
+
+  private val yarnConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf))
+
   private val ugi = {
     val original = UserGroupInformation.getCurrentUser()
 
@@ -311,7 +328,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
             val credentialManager = new YARNHadoopDelegationTokenManager(
               sparkConf,
               yarnConf,
-              conf => YarnSparkHadoopUtil.get.hadoopFSsToAccess(sparkConf, conf))
+              conf => YarnSparkHadoopUtil.hadoopFSsToAccess(sparkConf, conf))
 
             val credentialRenewer =
               new AMCredentialRenewer(sparkConf, yarnConf, credentialManager)
@@ -323,13 +340,10 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
         credentialRenewerThread.join()
       }
 
-      // Call this to force generation of secret so it gets populated into the Hadoop UGI.
-      val securityMgr = new SecurityManager(sparkConf)
-
       if (isClusterMode) {
-        runDriver(securityMgr)
+        runDriver()
       } else {
-        runExecutorLauncher(securityMgr)
+        runExecutorLauncher()
       }
     } catch {
       case e: Exception =>
@@ -410,15 +424,11 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
       _sparkConf: SparkConf,
       _rpcEnv: RpcEnv,
       driverRef: RpcEndpointRef,
-      uiAddress: Option[String],
-      securityMgr: SecurityManager) = {
+      uiAddress: Option[String]) = {
     val appId = client.getAttemptId().getApplicationId().toString()
     val attemptId = client.getAttemptId().getAttemptId().toString()
-    val historyAddress =
-      _sparkConf.get(HISTORY_SERVER_ADDRESS)
-        .map { text => SparkHadoopUtil.get.substituteHadoopVariables(text, yarnConf) }
-        .map { address => s"${address}${HistoryServer.UI_PATH_PREFIX}/${appId}/${attemptId}" }
-        .getOrElse("")
+    val historyAddress = ApplicationMaster
+      .getHistoryServerAddress(_sparkConf, yarnConf, appId, attemptId)
 
     val driverUrl = RpcEndpointAddress(
       _sparkConf.get("spark.driver.host"),
@@ -463,7 +473,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
       YarnSchedulerBackend.ENDPOINT_NAME)
   }
 
-  private def runDriver(securityMgr: SecurityManager): Unit = {
+  private def runDriver(): Unit = {
     addAmIpFilter(None)
     userClassThread = startUserApplication()
 
@@ -479,7 +489,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
         val driverRef = createSchedulerRef(
           sc.getConf.get("spark.driver.host"),
           sc.getConf.get("spark.driver.port"))
-        registerAM(sc.getConf, rpcEnv, driverRef, sc.ui.map(_.webUrl), securityMgr)
+        registerAM(sc.getConf, rpcEnv, driverRef, sc.ui.map(_.webUrl))
         registered = true
       } else {
         // Sanity check; should never happen in normal operation, since sc should only be null
@@ -498,15 +508,14 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     }
   }
 
-  private def runExecutorLauncher(securityMgr: SecurityManager): Unit = {
+  private def runExecutorLauncher(): Unit = {
     val hostname = Utils.localHostName
     val amCores = sparkConf.get(AM_CORES)
     rpcEnv = RpcEnv.create("sparkYarnAM", hostname, hostname, -1, sparkConf, securityMgr,
       amCores, true)
     val driverRef = waitForSparkDriver()
     addAmIpFilter(Some(driverRef))
-    registerAM(sparkConf, rpcEnv, driverRef, sparkConf.getOption("spark.driver.appUIAddress"),
-      securityMgr)
+    registerAM(sparkConf, rpcEnv, driverRef, sparkConf.getOption("spark.driver.appUIAddress"))
     registered = true
 
     // In client mode the actor will stop the reporter thread.
@@ -686,6 +695,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends
     if (args.primaryRFile != null && args.primaryRFile.endsWith(".R")) {
       // TODO(davies): add R dependencies here
     }
+
     val mainMethod = userClassLoader.loadClass(args.userClass)
       .getMethod("main", classOf[Array[String]])
 
@@ -809,15 +819,6 @@ object ApplicationMaster extends Logging {
   def main(args: Array[String]): Unit = {
     SignalUtils.registerLogger(log)
     val amArgs = new ApplicationMasterArguments(args)
-
-    // Load the properties file with the Spark configuration and set entries as system properties,
-    // so that user code run inside the AM also has access to them.
-    // Note: we must do this before SparkHadoopUtil instantiated
-    if (amArgs.propertiesFile != null) {
-      Utils.getPropertiesFromFile(amArgs.propertiesFile).foreach { case (k, v) =>
-        sys.props(k) = v
-      }
-    }
     master = new ApplicationMaster(amArgs)
     System.exit(master.run())
   }
@@ -830,6 +831,16 @@ object ApplicationMaster extends Logging {
     master.getAttemptId
   }
 
+  private[spark] def getHistoryServerAddress(
+      sparkConf: SparkConf,
+      yarnConf: YarnConfiguration,
+      appId: String,
+      attemptId: String): String = {
+    sparkConf.get(HISTORY_SERVER_ADDRESS)
+      .map { text => SparkHadoopUtil.get.substituteHadoopVariables(text, yarnConf) }
+      .map { address => s"${address}${HistoryServer.UI_PATH_PREFIX}/${appId}/${attemptId}" }
+      .getOrElse("")
+  }
 }
 
 /**
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 99e7d46ca5c96..8cd3cd9746a3a 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -48,7 +48,7 @@ import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException
 import org.apache.hadoop.yarn.util.Records
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
-import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.{SparkApplication, SparkHadoopUtil}
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
@@ -58,18 +58,14 @@ import org.apache.spark.util.{CallerContext, Utils}
 
 private[spark] class Client(
     val args: ClientArguments,
-    val hadoopConf: Configuration,
     val sparkConf: SparkConf)
   extends Logging {
 
   import Client._
   import YarnSparkHadoopUtil._
 
-  def this(clientArgs: ClientArguments, spConf: SparkConf) =
-    this(clientArgs, SparkHadoopUtil.get.newConfiguration(spConf), spConf)
-
   private val yarnClient = YarnClient.createYarnClient
-  private val yarnConf = new YarnConfiguration(hadoopConf)
+  private val hadoopConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf))
 
   private val isClusterMode = sparkConf.get("spark.submit.deployMode", "client") == "cluster"
 
@@ -104,6 +100,8 @@ private[spark] class Client(
   private var amKeytabFileName: String = null
 
   private val launcherBackend = new LauncherBackend() {
+    override protected def conf: SparkConf = sparkConf
+
     override def onStopRequest(): Unit = {
       if (isClusterMode && appId != null) {
         yarnClient.killApplication(appId)
@@ -125,7 +123,7 @@ private[spark] class Client(
   private val credentialManager = new YARNHadoopDelegationTokenManager(
     sparkConf,
     hadoopConf,
-    conf => YarnSparkHadoopUtil.get.hadoopFSsToAccess(sparkConf, conf))
+    conf => YarnSparkHadoopUtil.hadoopFSsToAccess(sparkConf, conf))
 
   def reportLauncherState(state: SparkAppHandle.State): Unit = {
     launcherBackend.setState(state)
@@ -134,8 +132,6 @@ private[spark] class Client(
   def stop(): Unit = {
     launcherBackend.close()
     yarnClient.stop()
-    // Unset YARN mode system env variable, to allow switching between cluster types.
-    System.clearProperty("SPARK_YARN_MODE")
   }
 
   /**
@@ -152,7 +148,7 @@ private[spark] class Client(
       // Setup the credentials before doing anything else,
       // so we have don't have issues at any point.
       setupCredentials()
-      yarnClient.init(yarnConf)
+      yarnClient.init(hadoopConf)
       yarnClient.start()
 
       logInfo("Requesting a new application from cluster with %d NodeManagers"
@@ -398,7 +394,7 @@ private[spark] class Client(
       if (SparkHadoopUtil.get.isProxyUser(currentUser)) {
         currentUser.addCredentials(credentials)
       }
-      logDebug(YarnSparkHadoopUtil.get.dumpTokens(credentials).mkString("\n"))
+      logDebug(SparkHadoopUtil.get.dumpTokens(credentials).mkString("\n"))
     }
 
     // If we use principal and keytab to login, also credentials can be renewed some time
@@ -758,12 +754,14 @@ private[spark] class Client(
       // Save the YARN configuration into a separate file that will be overlayed on top of the
       // cluster's Hadoop conf.
       confStream.putNextEntry(new ZipEntry(SPARK_HADOOP_CONF_FILE))
-      yarnConf.writeXml(confStream)
+      hadoopConf.writeXml(confStream)
       confStream.closeEntry()
 
-      // Save Spark configuration to a file in the archive.
+      // Save Spark configuration to a file in the archive, but filter out the app's secret.
       val props = new Properties()
-      sparkConf.getAll.foreach { case (k, v) => props.setProperty(k, v) }
+      sparkConf.getAll.foreach { case (k, v) =>
+        props.setProperty(k, v)
+      }
       // Override spark.yarn.key to point to the location in distributed cache which will be used
       // by AM.
       Option(amKeytabFileName).foreach { k => props.setProperty(KEYTAB.key, k) }
@@ -786,8 +784,7 @@ private[spark] class Client(
       pySparkArchives: Seq[String]): HashMap[String, String] = {
     logInfo("Setting up the launch environment for our AM container")
     val env = new HashMap[String, String]()
-    populateClasspath(args, yarnConf, sparkConf, env, sparkConf.get(DRIVER_CLASS_PATH))
-    env("SPARK_YARN_MODE") = "true"
+    populateClasspath(args, hadoopConf, sparkConf, env, sparkConf.get(DRIVER_CLASS_PATH))
     env("SPARK_YARN_STAGING_DIR") = stagingDirPath.toString
     env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
     if (loginFromKeytab) {
@@ -861,6 +858,7 @@ private[spark] class Client(
       } else {
         Nil
       }
+
     val launchEnv = setupLaunchEnv(appStagingDirPath, pySparkArchives)
     val localResources = prepareLocalResources(appStagingDirPath, pySparkArchives)
 
@@ -991,7 +989,11 @@ private[spark] class Client(
     logDebug("YARN AM launch context:")
     logDebug(s"    user class: ${Option(args.userClass).getOrElse("N/A")}")
     logDebug("    env:")
-    launchEnv.foreach { case (k, v) => logDebug(s"        $k -> $v") }
+    if (log.isDebugEnabled) {
+      Utils.redact(sparkConf, launchEnv.toSeq).foreach { case (k, v) =>
+        logDebug(s"        $k -> $v")
+      }
+    }
     logDebug("    resources:")
     localResources.foreach { case (k, v) => logDebug(s"        $k -> $v")}
     logDebug("    command:")
@@ -1185,24 +1187,6 @@ private[spark] class Client(
 
 private object Client extends Logging {
 
-  def main(argStrings: Array[String]) {
-    if (!sys.props.contains("SPARK_SUBMIT")) {
-      logWarning("WARNING: This client is deprecated and will be removed in a " +
-        "future version of Spark. Use ./bin/spark-submit with \"--master yarn\"")
-    }
-
-    // Set an env variable indicating we are running in YARN mode.
-    // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes
-    System.setProperty("SPARK_YARN_MODE", "true")
-    val sparkConf = new SparkConf
-    // SparkSubmit would use yarn cache to distribute files & jars in yarn mode,
-    // so remove them from sparkConf here for yarn mode.
-    sparkConf.remove("spark.jars")
-    sparkConf.remove("spark.files")
-    val args = new ClientArguments(argStrings)
-    new Client(args, sparkConf).run()
-  }
-
   // Alias for the user jar
   val APP_JAR_NAME: String = "__app__.jar"
 
@@ -1437,15 +1421,20 @@ private object Client extends Logging {
   }
 
   /**
-   * Return whether the two file systems are the same.
+   * Return whether two URI represent file system are the same
    */
-  private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = {
-    val srcUri = srcFs.getUri()
-    val dstUri = destFs.getUri()
+  private[spark] def compareUri(srcUri: URI, dstUri: URI): Boolean = {
+
     if (srcUri.getScheme() == null || srcUri.getScheme() != dstUri.getScheme()) {
       return false
     }
 
+    val srcAuthority = srcUri.getAuthority()
+    val dstAuthority = dstUri.getAuthority()
+    if (srcAuthority != null && !srcAuthority.equalsIgnoreCase(dstAuthority)) {
+      return false
+    }
+
     var srcHost = srcUri.getHost()
     var dstHost = dstUri.getHost()
 
@@ -1463,6 +1452,17 @@ private object Client extends Logging {
     }
 
     Objects.equal(srcHost, dstHost) && srcUri.getPort() == dstUri.getPort()
+
+  }
+
+  /**
+   * Return whether the two file systems are the same.
+   */
+  protected def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = {
+    val srcUri = srcFs.getUri()
+    val dstUri = destFs.getUri()
+
+    compareUri(srcUri, dstUri)
   }
 
   /**
@@ -1506,3 +1506,16 @@ private object Client extends Logging {
   }
 
 }
+
+private[spark] class YarnClusterApplication extends SparkApplication {
+
+  override def start(args: Array[String], conf: SparkConf): Unit = {
+    // SparkSubmit would use yarn cache to distribute files & jars in yarn mode,
+    // so remove them from sparkConf here for yarn mode.
+    conf.remove("spark.jars")
+    conf.remove("spark.files")
+
+    new Client(new ClientArguments(args), conf).run()
+  }
+
+}
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 7052fb347106b..506adb363aa90 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -41,6 +41,7 @@ import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef}
 import org.apache.spark.scheduler.{ExecutorExited, ExecutorLossReason}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RemoveExecutor
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RetrieveLastAllocatedExecutorId
+import org.apache.spark.scheduler.cluster.SchedulerBackendUtils
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils}
 
 /**
@@ -109,7 +110,7 @@ private[yarn] class YarnAllocator(
     sparkConf.get(EXECUTOR_ATTEMPT_FAILURE_VALIDITY_INTERVAL_MS).getOrElse(-1L)
 
   @volatile private var targetNumExecutors =
-    YarnSparkHadoopUtil.getInitialTargetExecutorNumber(sparkConf)
+    SchedulerBackendUtils.getInitialTargetExecutorNumber(sparkConf)
 
   private var currentNodeBlacklist = Set.empty[String]
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
index 72f4d273ab53b..c1ae12aabb8cc 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
@@ -92,7 +92,7 @@ private[spark] class YarnRMClient extends Logging {
 
   /** Returns the attempt ID. */
   def getAttemptId(): ApplicationAttemptId = {
-    YarnSparkHadoopUtil.get.getContainerId.getApplicationAttemptId()
+    YarnSparkHadoopUtil.getContainerId.getApplicationAttemptId()
   }
 
   /** Returns the configuration for the AmIpFilter to add to the Spark UI. */
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index 3d9f99f57bed7..f406fabd61860 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -17,21 +17,14 @@
 
 package org.apache.spark.deploy.yarn
 
-import java.nio.charset.StandardCharsets.UTF_8
-import java.util.regex.Matcher
-import java.util.regex.Pattern
+import java.util.regex.{Matcher, Pattern}
 
 import scala.collection.mutable.{HashMap, ListBuffer}
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.io.Text
-import org.apache.hadoop.mapred.{JobConf, Master}
-import org.apache.hadoop.security.Credentials
-import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.yarn.api.ApplicationConstants
 import org.apache.hadoop.yarn.api.records.{ApplicationAccessType, ContainerId, Priority}
-import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.util.ConverterUtils
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
@@ -43,87 +36,10 @@ import org.apache.spark.internal.config._
 import org.apache.spark.launcher.YarnCommandBuilderUtils
 import org.apache.spark.util.Utils
 
-
-/**
- * Contains util methods to interact with Hadoop from spark.
- */
-class YarnSparkHadoopUtil extends SparkHadoopUtil {
+object YarnSparkHadoopUtil {
 
   private var credentialUpdater: CredentialUpdater = _
 
-  override def transferCredentials(source: UserGroupInformation, dest: UserGroupInformation) {
-    dest.addCredentials(source.getCredentials())
-  }
-
-  // Note that all params which start with SPARK are propagated all the way through, so if in yarn
-  // mode, this MUST be set to true.
-  override def isYarnMode(): Boolean = { true }
-
-  // Return an appropriate (subclass) of Configuration. Creating a config initializes some Hadoop
-  // subsystems. Always create a new config, don't reuse yarnConf.
-  override def newConfiguration(conf: SparkConf): Configuration = {
-    val hadoopConf = new YarnConfiguration(super.newConfiguration(conf))
-    hadoopConf.addResource(Client.SPARK_HADOOP_CONF_FILE)
-    hadoopConf
-  }
-
-  // Add any user credentials to the job conf which are necessary for running on a secure Hadoop
-  // cluster
-  override def addCredentials(conf: JobConf) {
-    val jobCreds = conf.getCredentials()
-    jobCreds.mergeAll(UserGroupInformation.getCurrentUser().getCredentials())
-  }
-
-  override def addSecretKeyToUserCredentials(key: String, secret: String) {
-    val creds = new Credentials()
-    creds.addSecretKey(new Text(key), secret.getBytes(UTF_8))
-    addCurrentUserCredentials(creds)
-  }
-
-  override def getSecretKeyFromUserCredentials(key: String): Array[Byte] = {
-    val credentials = getCurrentUserCredentials()
-    if (credentials != null) credentials.getSecretKey(new Text(key)) else null
-  }
-
-  private[spark] override def startCredentialUpdater(sparkConf: SparkConf): Unit = {
-    val hadoopConf = newConfiguration(sparkConf)
-    val credentialManager = new YARNHadoopDelegationTokenManager(
-      sparkConf,
-      hadoopConf,
-      conf => YarnSparkHadoopUtil.get.hadoopFSsToAccess(sparkConf, conf))
-    credentialUpdater = new CredentialUpdater(sparkConf, hadoopConf, credentialManager)
-    credentialUpdater.start()
-  }
-
-  private[spark] override def stopCredentialUpdater(): Unit = {
-    if (credentialUpdater != null) {
-      credentialUpdater.stop()
-      credentialUpdater = null
-    }
-  }
-
-  private[spark] def getContainerId: ContainerId = {
-    val containerIdString = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name())
-    ConverterUtils.toContainerId(containerIdString)
-  }
-
-  /** The filesystems for which YARN should fetch delegation tokens. */
-  private[spark] def hadoopFSsToAccess(
-      sparkConf: SparkConf,
-      hadoopConf: Configuration): Set[FileSystem] = {
-    val filesystemsToAccess = sparkConf.get(FILESYSTEMS_TO_ACCESS)
-      .map(new Path(_).getFileSystem(hadoopConf))
-      .toSet
-
-    val stagingFS = sparkConf.get(STAGING_DIR)
-      .map(new Path(_).getFileSystem(hadoopConf))
-      .getOrElse(FileSystem.get(hadoopConf))
-
-    filesystemsToAccess + stagingFS
-  }
-}
-
-object YarnSparkHadoopUtil {
   // Additional memory overhead
   // 10% was arrived at experimentally. In the interest of minimizing memory waste while covering
   // the common cases. Memory overhead tends to grow with container size.
@@ -133,20 +49,10 @@ object YarnSparkHadoopUtil {
 
   val ANY_HOST = "*"
 
-  val DEFAULT_NUMBER_EXECUTORS = 2
-
   // All RM requests are issued with same priority : we do not (yet) have any distinction between
   // request types (like map/reduce in hadoop for example)
   val RM_REQUEST_PRIORITY = Priority.newInstance(1)
 
-  def get: YarnSparkHadoopUtil = {
-    val yarnMode = java.lang.Boolean.parseBoolean(
-      System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE")))
-    if (!yarnMode) {
-      throw new SparkException("YarnSparkHadoopUtil is not available in non-YARN mode!")
-    }
-    SparkHadoopUtil.get.asInstanceOf[YarnSparkHadoopUtil]
-  }
   /**
    * Add a path variable to the given environment map.
    * If the map already contains this key, append the value to the existing value instead.
@@ -280,26 +186,41 @@ object YarnSparkHadoopUtil {
     )
   }
 
-  /**
-   * Getting the initial target number of executors depends on whether dynamic allocation is
-   * enabled.
-   * If not using dynamic allocation it gets the number of executors requested by the user.
-   */
-  def getInitialTargetExecutorNumber(
-      conf: SparkConf,
-      numExecutors: Int = DEFAULT_NUMBER_EXECUTORS): Int = {
-    if (Utils.isDynamicAllocationEnabled(conf)) {
-      val minNumExecutors = conf.get(DYN_ALLOCATION_MIN_EXECUTORS)
-      val initialNumExecutors = Utils.getDynamicAllocationInitialExecutors(conf)
-      val maxNumExecutors = conf.get(DYN_ALLOCATION_MAX_EXECUTORS)
-      require(initialNumExecutors >= minNumExecutors && initialNumExecutors <= maxNumExecutors,
-        s"initial executor number $initialNumExecutors must between min executor number " +
-          s"$minNumExecutors and max executor number $maxNumExecutors")
+  def getContainerId: ContainerId = {
+    val containerIdString = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name())
+    ConverterUtils.toContainerId(containerIdString)
+  }
 
-      initialNumExecutors
-    } else {
-      conf.get(EXECUTOR_INSTANCES).getOrElse(numExecutors)
+  /** The filesystems for which YARN should fetch delegation tokens. */
+  def hadoopFSsToAccess(
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): Set[FileSystem] = {
+    val filesystemsToAccess = sparkConf.get(FILESYSTEMS_TO_ACCESS)
+      .map(new Path(_).getFileSystem(hadoopConf))
+      .toSet
+
+    val stagingFS = sparkConf.get(STAGING_DIR)
+      .map(new Path(_).getFileSystem(hadoopConf))
+      .getOrElse(FileSystem.get(hadoopConf))
+
+    filesystemsToAccess + stagingFS
+  }
+
+  def startCredentialUpdater(sparkConf: SparkConf): Unit = {
+    val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf)
+    val credentialManager = new YARNHadoopDelegationTokenManager(
+      sparkConf,
+      hadoopConf,
+      conf => YarnSparkHadoopUtil.hadoopFSsToAccess(sparkConf, conf))
+    credentialUpdater = new CredentialUpdater(sparkConf, hadoopConf, credentialManager)
+    credentialUpdater.start()
+  }
+
+  def stopCredentialUpdater(): Unit = {
+    if (credentialUpdater != null) {
+      credentialUpdater.stop()
+      credentialUpdater = null
     }
   }
-}
 
+}
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index e1af8ba087d6e..3ba3ae5ab4401 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -217,20 +217,12 @@ package object config {
     .intConf
     .createWithDefault(1)
 
-  private[spark] val DRIVER_MEMORY_OVERHEAD = ConfigBuilder("spark.yarn.driver.memoryOverhead")
-    .bytesConf(ByteUnit.MiB)
-    .createOptional
-
   /* Executor configuration. */
 
   private[spark] val EXECUTOR_CORES = ConfigBuilder("spark.executor.cores")
     .intConf
     .createWithDefault(1)
 
-  private[spark] val EXECUTOR_MEMORY_OVERHEAD = ConfigBuilder("spark.yarn.executor.memoryOverhead")
-    .bytesConf(ByteUnit.MiB)
-    .createOptional
-
   private[spark] val EXECUTOR_NODE_LABEL_EXPRESSION =
     ConfigBuilder("spark.yarn.executor.nodeLabelExpression")
       .doc("Node label expression for executors.")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/AMCredentialRenewer.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/AMCredentialRenewer.scala
index 6134757a82fdc..eaf2cff111a49 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/AMCredentialRenewer.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/AMCredentialRenewer.scala
@@ -62,7 +62,7 @@ private[yarn] class AMCredentialRenewer(
   private val credentialRenewerThread: ScheduledExecutorService =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("Credential Refresh Thread")
 
-  private val hadoopUtil = YarnSparkHadoopUtil.get
+  private val hadoopUtil = SparkHadoopUtil.get
 
   private val credentialsFile = sparkConf.get(CREDENTIALS_FILE_PATH)
   private val daysToKeepFiles = sparkConf.get(CREDENTIALS_FILE_MAX_RETENTION)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index d482376d14dd7..0c6206eebe41d 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -52,7 +52,7 @@ private[spark] class YarnClientSchedulerBackend(
 
     logDebug("ClientArguments called with: " + argsArrayBuf.mkString(" "))
     val args = new ClientArguments(argsArrayBuf.toArray)
-    totalExpectedExecutors = YarnSparkHadoopUtil.getInitialTargetExecutorNumber(conf)
+    totalExpectedExecutors = SchedulerBackendUtils.getInitialTargetExecutorNumber(conf)
     client = new Client(args, conf)
     bindToYarn(client.submitApplication(), None)
 
@@ -66,7 +66,7 @@ private[spark] class YarnClientSchedulerBackend(
     // reads the credentials from HDFS, just like the executors and updates its own credentials
     // cache.
     if (conf.contains("spark.yarn.credentials.file")) {
-      YarnSparkHadoopUtil.get.startCredentialUpdater(conf)
+      YarnSparkHadoopUtil.startCredentialUpdater(conf)
     }
     monitorThread = asyncMonitorApplication()
     monitorThread.start()
@@ -153,7 +153,7 @@ private[spark] class YarnClientSchedulerBackend(
     client.reportLauncherState(SparkAppHandle.State.FINISHED)
 
     super.stop()
-    YarnSparkHadoopUtil.get.stopCredentialUpdater()
+    YarnSparkHadoopUtil.stopCredentialUpdater()
     client.stop()
     logInfo("Stopped")
   }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
index 4f3d5ebf403e0..62bf9818ee248 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
@@ -34,14 +34,14 @@ private[spark] class YarnClusterSchedulerBackend(
     val attemptId = ApplicationMaster.getAttemptId
     bindToYarn(attemptId.getApplicationId(), Some(attemptId))
     super.start()
-    totalExpectedExecutors = YarnSparkHadoopUtil.getInitialTargetExecutorNumber(sc.conf)
+    totalExpectedExecutors = SchedulerBackendUtils.getInitialTargetExecutorNumber(sc.conf)
   }
 
   override def getDriverLogUrls: Option[Map[String, String]] = {
     var driverLogs: Option[Map[String, String]] = None
     try {
       val yarnConf = new YarnConfiguration(sc.hadoopConfiguration)
-      val containerId = YarnSparkHadoopUtil.get.getContainerId
+      val containerId = YarnSparkHadoopUtil.getContainerId
 
       val httpAddress = System.getenv(Environment.NM_HOST.name()) +
         ":" + System.getenv(Environment.NM_HTTP_PORT.name())
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index 415a29fd887e8..bb615c36cd97f 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -20,6 +20,7 @@ package org.apache.spark.scheduler.cluster
 import java.util.concurrent.atomic.{AtomicBoolean}
 
 import scala.concurrent.{ExecutionContext, Future}
+import scala.concurrent.ExecutionContext.Implicits.global
 import scala.util.{Failure, Success}
 import scala.util.control.NonFatal
 
@@ -245,14 +246,7 @@ private[spark] abstract class YarnSchedulerBackend(
           Future.successful(RemoveExecutor(executorId, SlaveLost("AM is not yet registered.")))
       }
 
-      removeExecutorMessage
-        .flatMap { message =>
-          driverEndpoint.ask[Boolean](message)
-        }(ThreadUtils.sameThread)
-        .onFailure {
-          case NonFatal(e) => logError(
-            s"Error requesting driver to remove executor $executorId after disconnection.", e)
-        }(ThreadUtils.sameThread)
+      removeExecutorMessage.foreach { message => driverEndpoint.send(message) }
     }
 
     override def receive: PartialFunction[Any, Unit] = {
@@ -265,12 +259,10 @@ private[spark] abstract class YarnSchedulerBackend(
         addWebUIFilter(filterName, filterParams, proxyBase)
 
       case r @ RemoveExecutor(executorId, reason) =>
-        logWarning(reason.toString)
-        driverEndpoint.ask[Boolean](r).onFailure {
-          case e =>
-            logError("Error requesting driver to remove executor" +
-              s" $executorId for reason $reason", e)
-        }(ThreadUtils.sameThread)
+        if (!stopped.get) {
+          logWarning(s"Requesting driver to remove executor $executorId for reason $reason")
+          driverEndpoint.send(r)
+        }
     }
 
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ApplicationMasterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ApplicationMasterSuite.scala
new file mode 100644
index 0000000000000..695a82f3583e6
--- /dev/null
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ApplicationMasterSuite.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+
+class ApplicationMasterSuite extends SparkFunSuite {
+
+  test("history url with hadoop and spark substitutions") {
+    val host = "rm.host.com"
+    val port = 18080
+    val sparkConf = new SparkConf()
+
+    sparkConf.set("spark.yarn.historyServer.address",
+      "http://${hadoopconf-yarn.resourcemanager.hostname}:${spark.history.ui.port}")
+    val yarnConf = new YarnConfiguration()
+    yarnConf.set("yarn.resourcemanager.hostname", host)
+    val appId = "application_123_1"
+    val attemptId = appId + "_1"
+
+    val shsAddr = ApplicationMaster
+      .getHistoryServerAddress(sparkConf, yarnConf, appId, attemptId)
+
+    assert(shsAddr === s"http://${host}:${port}/history/${appId}/${attemptId}")
+  }
+}
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index 7cc3075eb766c..dc38e34bf5591 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -62,18 +62,14 @@ abstract class BaseYarnClusterSuite
   protected var hadoopConfDir: File = _
   private var logConfDir: File = _
 
-  var oldSystemProperties: Properties = null
-
   def newYarnConfig(): YarnConfiguration
 
   override def beforeAll() {
     super.beforeAll()
-    oldSystemProperties = SerializationUtils.clone(System.getProperties)
 
     tempDir = Utils.createTempDir()
     logConfDir = new File(tempDir, "log4j")
     logConfDir.mkdir()
-    System.setProperty("SPARK_YARN_MODE", "true")
 
     val logConfFile = new File(logConfDir, "log4j.properties")
     Files.write(LOG4J_CONF, logConfFile, StandardCharsets.UTF_8)
@@ -124,7 +120,6 @@ abstract class BaseYarnClusterSuite
     try {
       yarnCluster.stop()
     } finally {
-      System.setProperties(oldSystemProperties)
       super.afterAll()
     }
   }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 6cf68427921fd..7fa597167f3f0 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -24,7 +24,6 @@ import java.util.Properties
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{HashMap => MutableHashMap}
 
-import org.apache.commons.lang3.SerializationUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.MRJobConfig
@@ -36,34 +35,18 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.util.Records
 import org.mockito.Matchers.{eq => meq, _}
 import org.mockito.Mockito._
-import org.scalatest.{BeforeAndAfterAll, Matchers}
+import org.scalatest.Matchers
 
 import org.apache.spark.{SparkConf, SparkFunSuite, TestUtils}
 import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.util.{ResetSystemProperties, SparkConfWithEnv, Utils}
+import org.apache.spark.util.{SparkConfWithEnv, Utils}
 
-class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
-  with ResetSystemProperties {
+class ClientSuite extends SparkFunSuite with Matchers {
 
   import Client._
 
   var oldSystemProperties: Properties = null
 
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    oldSystemProperties = SerializationUtils.clone(System.getProperties)
-    System.setProperty("SPARK_YARN_MODE", "true")
-  }
-
-  override def afterAll(): Unit = {
-    try {
-      System.setProperties(oldSystemProperties)
-      oldSystemProperties = null
-    } finally {
-      super.afterAll()
-    }
-  }
-
   test("default Yarn application classpath") {
     getDefaultYarnApplicationClasspath should be(Fixtures.knownDefYarnAppCP)
   }
@@ -185,7 +168,6 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
   }
 
   test("configuration and args propagate through createApplicationSubmissionContext") {
-    val conf = new Configuration()
     // When parsing tags, duplicates and leading/trailing whitespace should be removed.
     // Spaces between non-comma strings should be preserved as single tags. Empty strings may or
     // may not be removed depending on the version of Hadoop being used.
@@ -200,7 +182,7 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
     val getNewApplicationResponse = Records.newRecord(classOf[GetNewApplicationResponse])
     val containerLaunchContext = Records.newRecord(classOf[ContainerLaunchContext])
 
-    val client = new Client(args, conf, sparkConf)
+    val client = new Client(args, sparkConf)
     client.createApplicationSubmissionContext(
       new YarnClientApplication(getNewApplicationResponse, appContext),
       containerLaunchContext)
@@ -375,6 +357,39 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
     sparkConf.get(SECONDARY_JARS) should be (Some(Seq(new File(jar2.toURI).getName)))
   }
 
+  private val matching = Seq(
+    ("files URI match test1", "file:///file1", "file:///file2"),
+    ("files URI match test2", "file:///c:file1", "file://c:file2"),
+    ("files URI match test3", "file://host/file1", "file://host/file2"),
+    ("wasb URI match test", "wasb://bucket1@user", "wasb://bucket1@user/"),
+    ("hdfs URI match test", "hdfs:/path1", "hdfs:/path1")
+  )
+
+  matching.foreach { t =>
+      test(t._1) {
+        assert(Client.compareUri(new URI(t._2), new URI(t._3)),
+          s"No match between ${t._2} and ${t._3}")
+      }
+  }
+
+  private val unmatching = Seq(
+    ("files URI unmatch test1", "file:///file1", "file://host/file2"),
+    ("files URI unmatch test2", "file://host/file1", "file:///file2"),
+    ("files URI unmatch test3", "file://host/file1", "file://host2/file2"),
+    ("wasb URI unmatch test1", "wasb://bucket1@user", "wasb://bucket2@user/"),
+    ("wasb URI unmatch test2", "wasb://bucket1@user", "wasb://bucket1@user2/"),
+    ("s3 URI unmatch test", "s3a://user@pass:bucket1/", "s3a://user2@pass2:bucket1/"),
+    ("hdfs URI unmatch test1", "hdfs://namenode1/path1", "hdfs://namenode1:8080/path2"),
+    ("hdfs URI unmatch test2", "hdfs://namenode1:8020/path1", "hdfs://namenode1:8080/path2")
+  )
+
+  unmatching.foreach { t =>
+      test(t._1) {
+        assert(!Client.compareUri(new URI(t._2), new URI(t._3)),
+          s"match between ${t._2} and ${t._3}")
+      }
+  }
+
   object Fixtures {
 
     val knownDefYarnAppCP: Seq[String] =
@@ -407,15 +422,14 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
 
   private def createClient(
       sparkConf: SparkConf,
-      conf: Configuration = new Configuration(),
       args: Array[String] = Array()): Client = {
     val clientArgs = new ClientArguments(args)
-    spy(new Client(clientArgs, conf, sparkConf))
+    spy(new Client(clientArgs, sparkConf))
   }
 
   private def classpath(client: Client): Array[String] = {
     val env = new MutableHashMap[String, String]()
-    populateClasspath(null, client.hadoopConf, client.sparkConf, env)
+    populateClasspath(null, new Configuration(), client.sparkConf, env)
     classpath(env)
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 080a20cdf87e4..f941f81f32013 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -132,7 +132,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
         "spark.executor.cores" -> "1",
         "spark.executor.memory" -> "512m",
         "spark.executor.instances" -> "2",
-        // Sending some senstive information, which we'll make sure gets redacted
+        // Sending some sensitive information, which we'll make sure gets redacted
         "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD,
         "spark.yarn.appMasterEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD
       ))
@@ -152,8 +152,13 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
       ))
   }
 
-  test("run Spark in yarn-cluster mode with using SparkHadoopUtil.conf") {
-    testYarnAppUseSparkHadoopUtilConf()
+  test("yarn-cluster should respect conf overrides in SparkHadoopUtil (SPARK-16414)") {
+    val result = File.createTempFile("result", null, tempDir)
+    val finalState = runSpark(false,
+      mainClassName(YarnClusterDriverUseSparkHadoopUtilConf.getClass),
+      appArgs = Seq("key=value", result.getAbsolutePath()),
+      extraConf = Map("spark.hadoop.key" -> "value"))
+    checkResult(finalState, result)
   }
 
   test("run Spark in yarn-client mode with additional jar") {
@@ -261,15 +266,6 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
     checkResult(finalState, result)
   }
 
-  private def testYarnAppUseSparkHadoopUtilConf(): Unit = {
-    val result = File.createTempFile("result", null, tempDir)
-    val finalState = runSpark(false,
-      mainClassName(YarnClusterDriverUseSparkHadoopUtilConf.getClass),
-      appArgs = Seq("key=value", result.getAbsolutePath()),
-      extraConf = Map("spark.hadoop.key" -> "value"))
-    checkResult(finalState, result)
-  }
-
   private def testWithAddJar(clientMode: Boolean): Unit = {
     val originalJar = TestUtils.createJarWithFiles(Map("test.resource" -> "ORIGINAL"), tempDir)
     val driverResult = File.createTempFile("driver", null, tempDir)
@@ -518,7 +514,7 @@ private object YarnClusterDriver extends Logging with Matchers {
             s"Driver logs contain sensitive info (${SECRET_PASSWORD}): \n${log} "
           )
         }
-        val containerId = YarnSparkHadoopUtil.get.getContainerId
+        val containerId = YarnSparkHadoopUtil.getContainerId
         val user = Utils.getCurrentUserName()
         assert(urlStr.endsWith(s"/node/containerlogs/$containerId/$user/stderr?start=-4096"))
       }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index a057618b39950..f21353aa007c8 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -71,14 +71,10 @@ class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging
 
   test("Yarn configuration override") {
     val key = "yarn.nodemanager.hostname"
-    val default = new YarnConfiguration()
-
     val sparkConf = new SparkConf()
       .set("spark.hadoop." + key, "someHostName")
-    val yarnConf = new YarnSparkHadoopUtil().newConfiguration(sparkConf)
-
-    yarnConf.getClass() should be (classOf[YarnConfiguration])
-    yarnConf.get(key) should not be default.get(key)
+    val yarnConf = new YarnConfiguration(SparkHadoopUtil.get.newConfiguration(sparkConf))
+    yarnConf.get(key) should be ("someHostName")
   }
 
 
@@ -145,45 +141,4 @@ class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging
 
   }
 
-  test("check different hadoop utils based on env variable") {
-    try {
-      System.setProperty("SPARK_YARN_MODE", "true")
-      assert(SparkHadoopUtil.get.getClass === classOf[YarnSparkHadoopUtil])
-      System.setProperty("SPARK_YARN_MODE", "false")
-      assert(SparkHadoopUtil.get.getClass === classOf[SparkHadoopUtil])
-    } finally {
-      System.clearProperty("SPARK_YARN_MODE")
-    }
-  }
-
-
-
-  // This test needs to live here because it depends on isYarnMode returning true, which can only
-  // happen in the YARN module.
-  test("security manager token generation") {
-    try {
-      System.setProperty("SPARK_YARN_MODE", "true")
-      val initial = SparkHadoopUtil.get
-        .getSecretKeyFromUserCredentials(SecurityManager.SECRET_LOOKUP_KEY)
-      assert(initial === null || initial.length === 0)
-
-      val conf = new SparkConf()
-        .set(SecurityManager.SPARK_AUTH_CONF, "true")
-        .set(SecurityManager.SPARK_AUTH_SECRET_CONF, "unused")
-      val sm = new SecurityManager(conf)
-
-      val generated = SparkHadoopUtil.get
-        .getSecretKeyFromUserCredentials(SecurityManager.SECRET_LOOKUP_KEY)
-      assert(generated != null)
-      val genString = new Text(generated).toString()
-      assert(genString != "unused")
-      assert(sm.getSecretKey() === genString)
-    } finally {
-      // removeSecretKey() was only added in Hadoop 2.6, so instead we just set the secret
-      // to an empty string.
-      SparkHadoopUtil.get.addSecretKeyToUserCredentials(SecurityManager.SECRET_LOOKUP_KEY, "")
-      System.clearProperty("SPARK_YARN_MODE")
-    }
-  }
-
 }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala
index c918998bde07c..3c7cdc0f1dab8 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/security/YARNHadoopDelegationTokenManagerSuite.scala
@@ -31,24 +31,15 @@ class YARNHadoopDelegationTokenManagerSuite extends SparkFunSuite with Matchers
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-
-    System.setProperty("SPARK_YARN_MODE", "true")
-
     sparkConf = new SparkConf()
     hadoopConf = new Configuration()
   }
 
-  override def afterAll(): Unit = {
-    super.afterAll()
-
-    System.clearProperty("SPARK_YARN_MODE")
-  }
-
   test("Correctly loads credential providers") {
     credentialManager = new YARNHadoopDelegationTokenManager(
       sparkConf,
       hadoopConf,
-      conf => YarnSparkHadoopUtil.get.hadoopFSsToAccess(sparkConf, conf))
+      conf => YarnSparkHadoopUtil.hadoopFSsToAccess(sparkConf, conf))
 
     credentialManager.credentialProviders.get("yarn-test") should not be (None)
   }
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 7bdd3fac773a3..e2fa5754afaee 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -93,7 +93,7 @@ This file is divided into 3 sections:
     <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
   </check>
 
-  <check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
+  <check customId="argcount" level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
     <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
   </check>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 9e2ced30407d4..7d23637e28342 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -134,7 +134,7 @@
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
         <configuration>
-          <argLine>-ea -Xmx4g -Xss4m -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+          <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
         </configuration>
       </plugin>
       <plugin>
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 6fe995f650d55..39d5e4ed56628 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -73,18 +73,22 @@ statement
     | ALTER DATABASE identifier SET DBPROPERTIES tablePropertyList     #setDatabaseProperties
     | DROP DATABASE (IF EXISTS)? identifier (RESTRICT | CASCADE)?      #dropDatabase
     | createTableHeader ('(' colTypeList ')')? tableProvider
-        (OPTIONS options=tablePropertyList)?
-        (PARTITIONED BY partitionColumnNames=identifierList)?
-        bucketSpec? locationSpec?
-        (COMMENT comment=STRING)?
-        (TBLPROPERTIES tableProps=tablePropertyList)?
+        ((OPTIONS options=tablePropertyList) |
+        (PARTITIONED BY partitionColumnNames=identifierList) |
+        bucketSpec |
+        locationSpec |
+        (COMMENT comment=STRING) |
+        (TBLPROPERTIES tableProps=tablePropertyList))*
         (AS? query)?                                                   #createTable
     | createTableHeader ('(' columns=colTypeList ')')?
-        (COMMENT comment=STRING)?
-        (PARTITIONED BY '(' partitionColumns=colTypeList ')')?
-        bucketSpec? skewSpec?
-        rowFormat?  createFileFormat? locationSpec?
-        (TBLPROPERTIES tablePropertyList)?
+        ((COMMENT comment=STRING) |
+        (PARTITIONED BY '(' partitionColumns=colTypeList ')') |
+        bucketSpec |
+        skewSpec |
+        rowFormat |
+        createFileFormat |
+        locationSpec |
+        (TBLPROPERTIES tableProps=tablePropertyList))*
         (AS? query)?                                                   #createHiveTable
     | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
         LIKE source=tableIdentifier locationSpec?                      #createTableLike
@@ -137,7 +141,7 @@ statement
         (LIKE? pattern=STRING)?                                        #showTables
     | SHOW TABLE EXTENDED ((FROM | IN) db=identifier)?
         LIKE pattern=STRING partitionSpec?                             #showTable
-    | SHOW DATABASES (LIKE pattern=STRING)?                            #showDatabases
+    | SHOW DATABASES (LIKE? pattern=STRING)?                            #showDatabases
     | SHOW TBLPROPERTIES table=tableIdentifier
         ('(' key=tablePropertyKey ')')?                                #showTblProperties
     | SHOW COLUMNS (FROM | IN) tableIdentifier
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index 64ab01ca57403..d18542b188f71 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -294,7 +294,7 @@ public void setNullAt(int ordinal) {
     assertIndexIsValid(ordinal);
     BitSetMethods.set(baseObject, baseOffset + 8, ordinal);
 
-    /* we assume the corrresponding column was already 0 or
+    /* we assume the corresponding column was already 0 or
        will be set to 0 later by the caller side */
   }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UTF8StringBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UTF8StringBuilder.java
new file mode 100644
index 0000000000000..f0f66bae245fd
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UTF8StringBuilder.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen;
+
+import org.apache.spark.unsafe.Platform;
+import org.apache.spark.unsafe.array.ByteArrayMethods;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/**
+ * A helper class to write {@link UTF8String}s to an internal buffer and build the concatenated
+ * {@link UTF8String} at the end.
+ */
+public class UTF8StringBuilder {
+
+  private static final int ARRAY_MAX = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH;
+
+  private byte[] buffer;
+  private int cursor = Platform.BYTE_ARRAY_OFFSET;
+
+  public UTF8StringBuilder() {
+    // Since initial buffer size is 16 in `StringBuilder`, we set the same size here
+    this.buffer = new byte[16];
+  }
+
+  // Grows the buffer by at least `neededSize`
+  private void grow(int neededSize) {
+    if (neededSize > ARRAY_MAX - totalSize()) {
+      throw new UnsupportedOperationException(
+        "Cannot grow internal buffer by size " + neededSize + " because the size after growing " +
+          "exceeds size limitation " + ARRAY_MAX);
+    }
+    final int length = totalSize() + neededSize;
+    if (buffer.length < length) {
+      int newLength = length < ARRAY_MAX / 2 ? length * 2 : ARRAY_MAX;
+      final byte[] tmp = new byte[newLength];
+      Platform.copyMemory(
+        buffer,
+        Platform.BYTE_ARRAY_OFFSET,
+        tmp,
+        Platform.BYTE_ARRAY_OFFSET,
+        totalSize());
+      buffer = tmp;
+    }
+  }
+
+  private int totalSize() {
+    return cursor - Platform.BYTE_ARRAY_OFFSET;
+  }
+
+  public void append(UTF8String value) {
+    grow(value.numBytes());
+    value.writeToMemory(buffer, cursor);
+    cursor += value.numBytes();
+  }
+
+  public void append(String value) {
+    append(UTF8String.fromString(value));
+  }
+
+  public UTF8String build() {
+    return UTF8String.fromBytes(buffer, 0, totalSize());
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 65040f1af4b04..9a4bf0075a178 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -63,6 +63,7 @@ object ScalaReflection extends ScalaReflection {
 
   private def dataTypeFor(tpe: `Type`): DataType = cleanUpReflectionObjects {
     tpe.dealias match {
+      case t if t <:< definitions.NullTpe => NullType
       case t if t <:< definitions.IntTpe => IntegerType
       case t if t <:< definitions.LongTpe => LongType
       case t if t <:< definitions.DoubleTpe => DoubleType
@@ -712,6 +713,9 @@ object ScalaReflection extends ScalaReflection {
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
   def schemaFor(tpe: `Type`): Schema = cleanUpReflectionObjects {
     tpe.dealias match {
+      // this must be the first case, since all objects in scala are instances of Null, therefore
+      // Null type would wrongly match the first of them, which is Option as of now
+      case t if t <:< definitions.NullTpe => Schema(NullType, nullable = true)
       case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
         val udt = getClassFromType(t).getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
         Schema(udt, nullable = true)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
index 57f7a80bedc6c..6d587abd8fd4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
@@ -31,7 +31,7 @@ class TableAlreadyExistsException(db: String, table: String)
   extends AnalysisException(s"Table or view '$table' already exists in database '$db'")
 
 class TempTableAlreadyExistsException(table: String)
-  extends AnalysisException(s"Temporary table '$table' already exists")
+  extends AnalysisException(s"Temporary view '$table' already exists")
 
 class PartitionAlreadyExistsException(db: String, table: String, spec: TablePartitionSpec)
   extends AnalysisException(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e5c93b5f0e059..35b35110e491f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import scala.annotation.tailrec
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.AnalysisException
@@ -53,6 +52,7 @@ object SimpleAnalyzer extends Analyzer(
 /**
  * Provides a way to keep state during the analysis, this enables us to decouple the concerns
  * of analysis environment from the catalog.
+ * The state that is kept here is per-query.
  *
  * Note this is thread local.
  *
@@ -71,6 +71,8 @@ object AnalysisContext {
   }
 
   def get: AnalysisContext = value.get()
+  def reset(): Unit = value.remove()
+
   private def set(context: AnalysisContext): Unit = value.set(context)
 
   def withAnalysisContext[A](database: Option[String])(f: => A): A = {
@@ -96,6 +98,17 @@ class Analyzer(
     this(catalog, conf, conf.optimizerMaxIterations)
   }
 
+  override def execute(plan: LogicalPlan): LogicalPlan = {
+    AnalysisContext.reset()
+    try {
+      executeSameContext(plan)
+    } finally {
+      AnalysisContext.reset()
+    }
+  }
+
+  private def executeSameContext(plan: LogicalPlan): LogicalPlan = super.execute(plan)
+
   def resolver: Resolver = conf.resolver
 
   protected val fixedPoint = FixedPoint(maxIterations)
@@ -151,7 +164,7 @@ class Analyzer(
       TimeWindowing ::
       ResolveInlineTables(conf) ::
       ResolveTimeZone(conf) ::
-      TypeCoercion.typeCoercionRules ++
+      TypeCoercion.typeCoercionRules(conf) ++
       extendedResolutionRules : _*),
     Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*),
     Batch("View", Once,
@@ -165,18 +178,19 @@ class Analyzer(
     Batch("Subquery", Once,
       UpdateOuterReferences),
     Batch("Cleanup", fixedPoint,
-      CleanupAliases)
+      CleanupAliases,
+      EliminateBarriers)
   )
 
   /**
    * Analyze cte definitions and substitute child plan with analyzed cte definitions.
    */
   object CTESubstitution extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators  {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp  {
       case With(child, relations) =>
         substituteCTE(child, relations.foldLeft(Seq.empty[(String, LogicalPlan)]) {
           case (resolved, (name, relation)) =>
-            resolved :+ name -> execute(substituteCTE(relation, resolved))
+            resolved :+ name -> executeSameContext(substituteCTE(relation, resolved))
         })
       case other => other
     }
@@ -200,7 +214,7 @@ class Analyzer(
    * Substitute child plan with WindowSpecDefinitions.
    */
   object WindowsSubstitution extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       // Lookup WindowSpecDefinitions. This rule works with unresolved children.
       case WithWindowDefinition(windowDefinitions, child) =>
         child.transform {
@@ -221,28 +235,26 @@ class Analyzer(
    */
   object ResolveAliases extends Rule[LogicalPlan] {
     private def assignAliases(exprs: Seq[NamedExpression]) = {
-      exprs.zipWithIndex.map {
-        case (expr, i) =>
-          expr.transformUp { case u @ UnresolvedAlias(child, optGenAliasFunc) =>
-            child match {
-              case ne: NamedExpression => ne
-              case go @ GeneratorOuter(g: Generator) if g.resolved => MultiAlias(go, Nil)
-              case e if !e.resolved => u
-              case g: Generator => MultiAlias(g, Nil)
-              case c @ Cast(ne: NamedExpression, _, _) => Alias(c, ne.name)()
-              case e: ExtractValue => Alias(e, toPrettySQL(e))()
-              case e if optGenAliasFunc.isDefined =>
-                Alias(child, optGenAliasFunc.get.apply(e))()
-              case e => Alias(e, toPrettySQL(e))()
-            }
+      exprs.map(_.transformUp { case u @ UnresolvedAlias(child, optGenAliasFunc) =>
+          child match {
+            case ne: NamedExpression => ne
+            case go @ GeneratorOuter(g: Generator) if g.resolved => MultiAlias(go, Nil)
+            case e if !e.resolved => u
+            case g: Generator => MultiAlias(g, Nil)
+            case c @ Cast(ne: NamedExpression, _, _) => Alias(c, ne.name)()
+            case e: ExtractValue => Alias(e, toPrettySQL(e))()
+            case e if optGenAliasFunc.isDefined =>
+              Alias(child, optGenAliasFunc.get.apply(e))()
+            case e => Alias(e, toPrettySQL(e))()
           }
-      }.asInstanceOf[Seq[NamedExpression]]
+        }
+      ).asInstanceOf[Seq[NamedExpression]]
     }
 
     private def hasUnresolvedAlias(exprs: Seq[NamedExpression]) =
       exprs.exists(_.find(_.isInstanceOf[UnresolvedAlias]).isDefined)
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case Aggregate(groups, aggs, child) if child.resolved && hasUnresolvedAlias(aggs) =>
         Aggregate(groups, assignAliases(aggs), child)
 
@@ -602,7 +614,7 @@ class Analyzer(
               "avoid errors. Increase the value of spark.sql.view.maxNestedViewDepth to work " +
               "aroud this.")
           }
-          execute(child)
+          executeSameContext(child)
         }
         view.copy(child = newChild)
       case p @ SubqueryAlias(_, view: View) =>
@@ -611,7 +623,7 @@ class Analyzer(
       case _ => plan
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case i @ InsertIntoTable(u: UnresolvedRelation, parts, child, _, _) if child.resolved =>
         EliminateSubqueryAliases(lookupTableFromCatalog(u)) match {
           case v: View =>
@@ -672,6 +684,12 @@ class Analyzer(
         s"between $left and $right")
 
       right.collect {
+        // For `AnalysisBarrier`, recursively de-duplicate its child.
+        case oldVersion: AnalysisBarrier
+            if oldVersion.outputSet.intersect(conflictingAttributes).nonEmpty =>
+          val newVersion = dedupRight(left, oldVersion.child)
+          (oldVersion, AnalysisBarrier(newVersion))
+
         // Handle base relations that might appear more than once.
         case oldVersion: MultiInstanceRelation
             if oldVersion.outputSet.intersect(conflictingAttributes).nonEmpty =>
@@ -692,7 +710,7 @@ class Analyzer(
           (oldVersion, oldVersion.copy(aggregateExpressions = newAliases(aggregateExpressions)))
 
         case oldVersion: Generate
-            if oldVersion.generatedSet.intersect(conflictingAttributes).nonEmpty =>
+            if oldVersion.producedAttributes.intersect(conflictingAttributes).nonEmpty =>
           val newOutput = oldVersion.generatorOutput.map(_.newInstance())
           (oldVersion, oldVersion.copy(generatorOutput = newOutput))
 
@@ -712,7 +730,7 @@ class Analyzer(
           right
         case Some((oldRelation, newRelation)) =>
           val attributeRewrites = AttributeMap(oldRelation.output.zip(newRelation.output))
-          val newRight = right transformUp {
+          right transformUp {
             case r if r == oldRelation => newRelation
           } transformUp {
             case other => other transformExpressions {
@@ -722,7 +740,6 @@ class Analyzer(
                 s.withNewPlan(dedupOuterReferencesInSubquery(s.plan, attributeRewrites))
             }
           }
-          newRight
       }
     }
 
@@ -799,7 +816,7 @@ class Analyzer(
       case _ => e.mapChildren(resolve(_, q))
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case p: LogicalPlan if !p.childrenResolved => p
 
       // If the projection list contains Stars, expand it.
@@ -957,7 +974,8 @@ class Analyzer(
   protected[sql] def resolveExpression(
       expr: Expression,
       plan: LogicalPlan,
-      throws: Boolean = false) = {
+      throws: Boolean = false): Expression = {
+    if (expr.resolved) return expr
     // Resolve expression in one round.
     // If throws == false or the desired attribute doesn't exist
     // (like try to resolve `a.b` but `a` doesn't exist), fail and return the origin one.
@@ -993,7 +1011,7 @@ class Analyzer(
    * have no effect on the results.
    */
   object ResolveOrdinalInOrderByAndGroupBy extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case p if !p.childrenResolved => p
       // Replace the index with the related attribute for ORDER BY,
       // which is a 1-base position of the projection list.
@@ -1049,7 +1067,7 @@ class Analyzer(
       }}
     }
 
-    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case agg @ Aggregate(groups, aggs, child)
           if conf.groupByAliases && child.resolved && aggs.forall(_.resolved) &&
             groups.exists(!_.resolved) =>
@@ -1073,102 +1091,79 @@ class Analyzer(
    * The HAVING clause could also used a grouping columns that is not presented in the SELECT.
    */
   object ResolveMissingReferences extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       // Skip sort with aggregate. This will be handled in ResolveAggregateFunctions
+      case sa @ Sort(_, _, AnalysisBarrier(child: Aggregate)) => sa
       case sa @ Sort(_, _, child: Aggregate) => sa
 
       case s @ Sort(order, _, child) if !s.resolved && child.resolved =>
-        try {
-          val newOrder = order.map(resolveExpressionRecursively(_, child).asInstanceOf[SortOrder])
-          val requiredAttrs = AttributeSet(newOrder).filter(_.resolved)
-          val missingAttrs = requiredAttrs -- child.outputSet
-          if (missingAttrs.nonEmpty) {
-            // Add missing attributes and then project them away after the sort.
-            Project(child.output,
-              Sort(newOrder, s.global, addMissingAttr(child, missingAttrs)))
-          } else if (newOrder != order) {
-            s.copy(order = newOrder)
-          } else {
-            s
-          }
-        } catch {
-          // Attempting to resolve it might fail. When this happens, return the original plan.
-          // Users will see an AnalysisException for resolution failure of missing attributes
-          // in Sort
-          case ae: AnalysisException => s
+        val (newOrder, newChild) = resolveExprsAndAddMissingAttrs(order, child)
+        val ordering = newOrder.map(_.asInstanceOf[SortOrder])
+        if (child.output == newChild.output) {
+          s.copy(order = ordering)
+        } else {
+          // Add missing attributes and then project them away.
+          val newSort = s.copy(order = ordering, child = newChild)
+          Project(child.output, newSort)
         }
 
       case f @ Filter(cond, child) if !f.resolved && child.resolved =>
-        try {
-          val newCond = resolveExpressionRecursively(cond, child)
-          val requiredAttrs = newCond.references.filter(_.resolved)
-          val missingAttrs = requiredAttrs -- child.outputSet
-          if (missingAttrs.nonEmpty) {
-            // Add missing attributes and then project them away.
-            Project(child.output,
-              Filter(newCond, addMissingAttr(child, missingAttrs)))
-          } else if (newCond != cond) {
-            f.copy(condition = newCond)
-          } else {
-            f
-          }
-        } catch {
-          // Attempting to resolve it might fail. When this happens, return the original plan.
-          // Users will see an AnalysisException for resolution failure of missing attributes
-          case ae: AnalysisException => f
+        val (newCond, newChild) = resolveExprsAndAddMissingAttrs(Seq(cond), child)
+        if (child.output == newChild.output) {
+          f.copy(condition = newCond.head)
+        } else {
+          // Add missing attributes and then project them away.
+          val newFilter = Filter(newCond.head, newChild)
+          Project(child.output, newFilter)
         }
     }
 
-    /**
-     * Add the missing attributes into projectList of Project/Window or aggregateExpressions of
-     * Aggregate.
-     */
-    private def addMissingAttr(plan: LogicalPlan, missingAttrs: AttributeSet): LogicalPlan = {
-      if (missingAttrs.isEmpty) {
-        return plan
-      }
-      plan match {
-        case p: Project =>
-          val missing = missingAttrs -- p.child.outputSet
-          Project(p.projectList ++ missingAttrs, addMissingAttr(p.child, missing))
-        case a: Aggregate =>
-          // all the missing attributes should be grouping expressions
-          // TODO: push down AggregateExpression
-          missingAttrs.foreach { attr =>
-            if (!a.groupingExpressions.exists(_.semanticEquals(attr))) {
-              throw new AnalysisException(s"Can't add $attr to ${a.simpleString}")
-            }
-          }
-          val newAggregateExpressions = a.aggregateExpressions ++ missingAttrs
-          a.copy(aggregateExpressions = newAggregateExpressions)
-        case g: Generate =>
-          // If join is false, we will convert it to true for getting from the child the missing
-          // attributes that its child might have or could have.
-          val missing = missingAttrs -- g.child.outputSet
-          g.copy(join = true, child = addMissingAttr(g.child, missing))
-        case d: Distinct =>
-          throw new AnalysisException(s"Can't add $missingAttrs to $d")
-        case u: UnaryNode =>
-          u.withNewChildren(addMissingAttr(u.child, missingAttrs) :: Nil)
-        case other =>
-          throw new AnalysisException(s"Can't add $missingAttrs to $other")
-      }
-    }
-
-    /**
-     * Resolve the expression on a specified logical plan and it's child (recursively), until
-     * the expression is resolved or meet a non-unary node or Subquery.
-     */
-    @tailrec
-    private def resolveExpressionRecursively(expr: Expression, plan: LogicalPlan): Expression = {
-      val resolved = resolveExpression(expr, plan)
-      if (resolved.resolved) {
-        resolved
+    private def resolveExprsAndAddMissingAttrs(
+        exprs: Seq[Expression], plan: LogicalPlan): (Seq[Expression], LogicalPlan) = {
+      if (exprs.forall(_.resolved)) {
+        // All given expressions are resolved, no need to continue anymore.
+        (exprs, plan)
       } else {
         plan match {
-          case u: UnaryNode if !u.isInstanceOf[SubqueryAlias] =>
-            resolveExpressionRecursively(resolved, u.child)
-          case other => resolved
+          // For `AnalysisBarrier`, recursively resolve expressions and add missing attributes via
+          // its child.
+          case barrier: AnalysisBarrier =>
+            val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(exprs, barrier.child)
+            (newExprs, AnalysisBarrier(newChild))
+
+          case p: Project =>
+            val maybeResolvedExprs = exprs.map(resolveExpression(_, p))
+            val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, p.child)
+            val missingAttrs = AttributeSet(newExprs) -- AttributeSet(maybeResolvedExprs)
+            (newExprs, Project(p.projectList ++ missingAttrs, newChild))
+
+          case a @ Aggregate(groupExprs, aggExprs, child) =>
+            val maybeResolvedExprs = exprs.map(resolveExpression(_, a))
+            val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, child)
+            val missingAttrs = AttributeSet(newExprs) -- AttributeSet(maybeResolvedExprs)
+            if (missingAttrs.forall(attr => groupExprs.exists(_.semanticEquals(attr)))) {
+              // All the missing attributes are grouping expressions, valid case.
+              (newExprs, a.copy(aggregateExpressions = aggExprs ++ missingAttrs, child = newChild))
+            } else {
+              // Need to add non-grouping attributes, invalid case.
+              (exprs, a)
+            }
+
+          case g: Generate =>
+            val maybeResolvedExprs = exprs.map(resolveExpression(_, g))
+            val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, g.child)
+            (newExprs, g.copy(unrequiredChildIndex = Nil, child = newChild))
+
+          // For `Distinct` and `SubqueryAlias`, we can't recursively resolve and add attributes
+          // via its children.
+          case u: UnaryNode if !u.isInstanceOf[Distinct] && !u.isInstanceOf[SubqueryAlias] =>
+            val maybeResolvedExprs = exprs.map(resolveExpression(_, u))
+            val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, u.child)
+            (newExprs, u.withNewChildren(Seq(newChild)))
+
+          // For other operators, we can't recursively resolve and add attributes via its children.
+          case other =>
+            (exprs.map(resolveExpression(_, other)), other)
         }
       }
     }
@@ -1197,7 +1192,7 @@ class Analyzer(
    * Replaces [[UnresolvedFunction]]s with concrete [[Expression]]s.
    */
   object ResolveFunctions extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case q: LogicalPlan =>
         q transformExpressions {
           case u if !u.childrenResolved => u // Skip until children are resolved.
@@ -1288,7 +1283,7 @@ class Analyzer(
       do {
         // Try to resolve the subquery plan using the regular analyzer.
         previous = current
-        current = execute(current)
+        current = executeSameContext(current)
 
         // Use the outer references to resolve the subquery plan if it isn't resolved yet.
         val i = plans.iterator
@@ -1334,7 +1329,7 @@ class Analyzer(
     /**
      * Resolve and rewrite all subqueries in an operator tree..
      */
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       // In case of HAVING (a filter after an aggregate) we use both the aggregate and
       // its child for resolution.
       case f @ Filter(_, a: Aggregate) if f.childrenResolved =>
@@ -1350,7 +1345,7 @@ class Analyzer(
    */
   object ResolveSubqueryColumnAliases extends Rule[LogicalPlan] {
 
-     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+     def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case u @ UnresolvedSubqueryColumnAliases(columnNames, child) if child.resolved =>
         // Resolves output attributes if a query has alias names in its subquery:
         // e.g., SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2)
@@ -1373,7 +1368,7 @@ class Analyzer(
    * Turns projections that contain aggregate expressions into aggregations.
    */
   object GlobalAggregates extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case Project(projectList, child) if containsAggregates(projectList) =>
         Aggregate(Nil, projectList, child)
     }
@@ -1399,19 +1394,19 @@ class Analyzer(
    * underlying aggregate operator and then projected away after the original operator.
    */
   object ResolveAggregateFunctions extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
-      case filter @ Filter(havingCondition,
-             aggregate @ Aggregate(grouping, originalAggExprs, child))
-          if aggregate.resolved =>
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
+      case Filter(cond, AnalysisBarrier(agg: Aggregate)) =>
+        apply(Filter(cond, agg)).mapChildren(AnalysisBarrier)
+      case f @ Filter(cond, agg @ Aggregate(grouping, originalAggExprs, child)) if agg.resolved =>
 
         // Try resolving the condition of the filter as though it is in the aggregate clause
         try {
           val aggregatedCondition =
             Aggregate(
               grouping,
-              Alias(havingCondition, "havingCondition")() :: Nil,
+              Alias(cond, "havingCondition")() :: Nil,
               child)
-          val resolvedOperator = execute(aggregatedCondition)
+          val resolvedOperator = executeSameContext(aggregatedCondition)
           def resolvedAggregateFilter =
             resolvedOperator
               .asInstanceOf[Aggregate]
@@ -1430,7 +1425,7 @@ class Analyzer(
               // Grouping functions are handled in the rule [[ResolveGroupingAnalytics]].
               case e: Expression if grouping.exists(_.semanticEquals(e)) &&
                   !ResolveGroupingAnalytics.hasGroupingFunction(e) &&
-                  !aggregate.output.exists(_.semanticEquals(e)) =>
+                  !agg.output.exists(_.semanticEquals(e)) =>
                 e match {
                   case ne: NamedExpression =>
                     aggregateExpressions += ne
@@ -1444,21 +1439,23 @@ class Analyzer(
 
             // Push the aggregate expressions into the aggregate (if any).
             if (aggregateExpressions.nonEmpty) {
-              Project(aggregate.output,
+              Project(agg.output,
                 Filter(transformedAggregateFilter,
-                  aggregate.copy(aggregateExpressions = originalAggExprs ++ aggregateExpressions)))
+                  agg.copy(aggregateExpressions = originalAggExprs ++ aggregateExpressions)))
             } else {
-              filter
+              f
             }
           } else {
-            filter
+            f
           }
         } catch {
           // Attempting to resolve in the aggregate can result in ambiguity.  When this happens,
           // just return the original plan.
-          case ae: AnalysisException => filter
+          case ae: AnalysisException => f
         }
 
+      case Sort(sortOrder, global, AnalysisBarrier(aggregate: Aggregate)) =>
+        apply(Sort(sortOrder, global, aggregate)).mapChildren(AnalysisBarrier)
       case sort @ Sort(sortOrder, global, aggregate: Aggregate) if aggregate.resolved =>
 
         // Try resolving the ordering as though it is in the aggregate clause.
@@ -1467,7 +1464,8 @@ class Analyzer(
           val aliasedOrdering =
             unresolvedSortOrders.map(o => Alias(o.child, "aggOrder")())
           val aggregatedOrdering = aggregate.copy(aggregateExpressions = aliasedOrdering)
-          val resolvedAggregate: Aggregate = execute(aggregatedOrdering).asInstanceOf[Aggregate]
+          val resolvedAggregate: Aggregate =
+            executeSameContext(aggregatedOrdering).asInstanceOf[Aggregate]
           val resolvedAliasedOrdering: Seq[Alias] =
             resolvedAggregate.aggregateExpressions.asInstanceOf[Seq[Alias]]
 
@@ -1571,7 +1569,7 @@ class Analyzer(
       }
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case Project(projectList, _) if projectList.exists(hasNestedGenerator) =>
         val nestedGenerator = projectList.find(hasNestedGenerator).get
         throw new AnalysisException("Generators are not supported when it's nested in " +
@@ -1595,7 +1593,7 @@ class Analyzer(
             resolvedGenerator =
               Generate(
                 generator,
-                join = projectList.size > 1, // Only join if there are other expressions in SELECT.
+                unrequiredChildIndex = Nil,
                 outer = outer,
                 qualifier = None,
                 generatorOutput = ResolveGenerate.makeGeneratorOutput(generator, names),
@@ -1629,7 +1627,7 @@ class Analyzer(
    * that wrap the [[Generator]].
    */
   object ResolveGenerate extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case g: Generate if !g.child.resolved || !g.generator.resolved => g
       case g: Generate if !g.resolved =>
         g.copy(generatorOutput = makeGeneratorOutput(g.generator, g.generatorOutput.map(_.name)))
@@ -1946,7 +1944,7 @@ class Analyzer(
    * put them into an inner Project and finally project them away at the outer Project.
    */
   object PullOutNondeterministic extends Rule[LogicalPlan] {
-    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case p if !p.resolved => p // Skip unresolved nodes.
       case p: Project => p
       case f: Filter => f
@@ -1991,7 +1989,7 @@ class Analyzer(
    * and we should return null if the input is null.
    */
   object HandleNullInputsForUDF extends Rule[LogicalPlan] {
-    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case p if !p.resolved => p // Skip unresolved nodes.
 
       case p => p transformExpressionsUp {
@@ -2056,7 +2054,7 @@ class Analyzer(
    * Then apply a Project on a normal Join to eliminate natural or using join.
    */
   object ResolveNaturalAndUsingJoin extends Rule[LogicalPlan] {
-    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case j @ Join(left, right, UsingJoin(joinType, usingCols), condition)
           if left.resolved && right.resolved && j.duplicateResolved =>
         commonNaturalJoinProcessing(left, right, joinType, usingCols, None)
@@ -2121,7 +2119,7 @@ class Analyzer(
    * to the given input attributes.
    */
   object ResolveDeserializer extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case p if !p.childrenResolved => p
       case p if p.resolved => p
 
@@ -2207,7 +2205,7 @@ class Analyzer(
    * constructed is an inner class.
    */
   object ResolveNewInstance extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case p if !p.childrenResolved => p
       case p if p.resolved => p
 
@@ -2241,7 +2239,7 @@ class Analyzer(
         "type of the field in the target object")
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
       case p if !p.childrenResolved => p
       case p if p.resolved => p
 
@@ -2300,7 +2298,7 @@ object CleanupAliases extends Rule[LogicalPlan] {
     case other => trimAliases(other)
   }
 
-  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
     case Project(projectList, child) =>
       val cleanedProjectList =
         projectList.map(trimNonTopLevelAliases(_).asInstanceOf[NamedExpression])
@@ -2329,6 +2327,13 @@ object CleanupAliases extends Rule[LogicalPlan] {
   }
 }
 
+/** Remove the barrier nodes of analysis */
+object EliminateBarriers extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
+    case AnalysisBarrier(child) => child
+  }
+}
+
 /**
  * Ignore event time watermark in batch query, which is only supported in Structured Streaming.
  * TODO: add this rule into analyzer rule list.
@@ -2379,7 +2384,7 @@ object TimeWindowing extends Rule[LogicalPlan] {
    * @return the logical plan that will generate the time windows using the Expand operator, with
    *         the Filter operator for correctness and Project for usability.
    */
-  def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+  def apply(plan: LogicalPlan): LogicalPlan = plan.transformUp {
     case p: LogicalPlan if p.children.size == 1 =>
       val child = p.children.head
       val windowExpressions =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index b5e8bdd79869e..bbcec5627bd49 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -78,8 +78,6 @@ trait CheckAnalysis extends PredicateHelper {
     // We transform up and order the rules so as to catch the first possible failure instead
     // of the result of cascading resolution failures.
     plan.foreachUp {
-      case p if p.analyzed => // Skip already analyzed sub-plans
-
       case u: UnresolvedRelation =>
         u.failAnalysis(s"Table or view not found: ${u.tableIdentifier}")
 
@@ -353,8 +351,6 @@ trait CheckAnalysis extends PredicateHelper {
       case o if !o.resolved => failAnalysis(s"unresolved operator ${o.simpleString}")
       case _ =>
     }
-
-    plan.foreach(_.setAnalyzed())
   }
 
   /**
@@ -612,8 +608,8 @@ trait CheckAnalysis extends PredicateHelper {
       // allows to have correlation under it
       // but must not host any outer references.
       // Note:
-      // Generator with join=false is treated as Category 4.
-      case g: Generate if g.join =>
+      // Generator with requiredChildOutput.isEmpty is treated as Category 4.
+      case g: Generate if g.requiredChildOutput.nonEmpty =>
         failOnInvalidOuterReference(g)
 
       // Category 4: Any other operators not in the above 3 categories
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index fd2ac78b25dbd..a8100b9b24aac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -58,7 +58,7 @@ import org.apache.spark.sql.types._
  * - FLOAT and DOUBLE cause fixed-length decimals to turn into DOUBLE
  */
 // scalastyle:on
-object DecimalPrecision extends Rule[LogicalPlan] {
+object DecimalPrecision extends TypeCoercionRule {
   import scala.math.{max, min}
 
   private def isFloat(t: DataType): Boolean = t == FloatType || t == DoubleType
@@ -78,7 +78,7 @@ object DecimalPrecision extends Rule[LogicalPlan] {
     PromotePrecision(Cast(e, dataType))
   }
 
-  def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+  override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = plan transformUp {
     // fix decimal precision for expressions
     case q => q.transformExpressionsUp(
       decimalAndDecimal.orElse(integralAndDecimalLiteral).orElse(nondecimalAndDecimal))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 11538bd31b4fd..5ddb39822617d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -392,6 +392,7 @@ object FunctionRegistry {
     expression[ToUnixTimestamp]("to_unix_timestamp"),
     expression[ToUTCTimestamp]("to_utc_timestamp"),
     expression[TruncDate]("trunc"),
+    expression[TruncTimestamp]("date_trunc"),
     expression[UnixTimestamp]("unix_timestamp"),
     expression[DayOfWeek]("dayofweek"),
     expression[WeekOfYear]("weekofyear"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
index 7358f9ee36921..a214e59302cd9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
@@ -103,7 +103,7 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] {
       })
   )
 
-  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case u: UnresolvedTableValuedFunction if u.functionArgs.forall(_.resolved) =>
       val resolvedFunc = builtinFunctions.get(u.functionName.toLowerCase(Locale.ROOT)) match {
         case Some(tvf) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
index 072dc954879ca..7a0aa08289efa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import scala.util.control.NonFatal
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.expressions.{Add, AttributeReference, AttributeSet, Cast, CheckOverflow, Expression, ExpressionSet, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Literal, Multiply, PreciseTimestampConversion, PredicateHelper, Subtract, TimeAdd, TimeSub, UnaryMinus}
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark._
@@ -238,6 +238,8 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
           collect(child, !negate)
         case CheckOverflow(child, _) =>
           collect(child, negate)
+        case PromotePrecision(child) =>
+          collect(child, negate)
         case Cast(child, dataType, _) =>
           dataType match {
             case _: NumericType | _: TimestampType => collect(child, negate)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
index 860d20f897690..f9fd0df9e4010 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
@@ -33,7 +33,7 @@ class SubstituteUnresolvedOrdinals(conf: SQLConf) extends Rule[LogicalPlan] {
     case _ => false
   }
 
-  def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) =>
       val newOrders = s.order.map {
         case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _, _) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 28be955e08a0d..e8669c4637d06 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -22,10 +22,12 @@ import javax.annotation.Nullable
 import scala.annotation.tailrec
 import scala.collection.mutable
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
@@ -44,19 +46,19 @@ import org.apache.spark.sql.types._
  */
 object TypeCoercion {
 
-  val typeCoercionRules =
-    PropagateTypes ::
-      InConversion ::
+  def typeCoercionRules(conf: SQLConf): List[Rule[LogicalPlan]] =
+    InConversion ::
       WidenSetOperationTypes ::
       PromoteStrings ::
       DecimalPrecision ::
       BooleanEquality ::
       FunctionArgumentConversion ::
+      ConcatCoercion(conf) ::
+      EltCoercion(conf) ::
       CaseWhenCoercion ::
       IfCoercion ::
       StackCoercion ::
       Division ::
-      PropagateTypes ::
       ImplicitTypeCasts ::
       DateTimeOperations ::
       WindowFrameCoercion ::
@@ -220,38 +222,6 @@ object TypeCoercion {
   private def haveSameType(exprs: Seq[Expression]): Boolean =
     exprs.map(_.dataType).distinct.length == 1
 
-  /**
-   * Applies any changes to [[AttributeReference]] data types that are made by other rules to
-   * instances higher in the query tree.
-   */
-  object PropagateTypes extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-
-      // No propagation required for leaf nodes.
-      case q: LogicalPlan if q.children.isEmpty => q
-
-      // Don't propagate types from unresolved children.
-      case q: LogicalPlan if !q.childrenResolved => q
-
-      case q: LogicalPlan =>
-        val inputMap = q.inputSet.toSeq.map(a => (a.exprId, a)).toMap
-        q transformExpressions {
-          case a: AttributeReference =>
-            inputMap.get(a.exprId) match {
-              // This can happen when an Attribute reference is born in a non-leaf node, for
-              // example due to a call to an external script like in the Transform operator.
-              // TODO: Perhaps those should actually be aliases?
-              case None => a
-              // Leave the same if the dataTypes match.
-              case Some(newType) if a.dataType == newType.dataType => a
-              case Some(newType) =>
-                logDebug(s"Promoting $a to $newType in ${q.simpleString}")
-                newType
-            }
-        }
-    }
-  }
-
   /**
    * Widens numeric types and converts strings to numbers when appropriate.
    *
@@ -280,9 +250,7 @@ object TypeCoercion {
    */
   object WidenSetOperationTypes extends Rule[LogicalPlan] {
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-      case p if p.analyzed => p
-
+    def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
       case s @ SetOperation(left, right) if s.childrenResolved &&
           left.output.length == right.output.length && !s.resolved =>
         val newChildren: Seq[LogicalPlan] = buildNewChildrenWithWiderTypes(left :: right :: Nil)
@@ -345,7 +313,7 @@ object TypeCoercion {
   /**
    * Promotes strings that appear in arithmetic expressions.
    */
-  object PromoteStrings extends Rule[LogicalPlan] {
+  object PromoteStrings extends TypeCoercionRule {
     private def castExpr(expr: Expression, targetType: DataType): Expression = {
       (expr.dataType, targetType) match {
         case (NullType, dt) => Literal.create(null, targetType)
@@ -354,13 +322,16 @@ object TypeCoercion {
       }
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+    override protected def coerceTypes(
+        plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
-      case a @ BinaryArithmetic(left @ StringType(), right) =>
+      case a @ BinaryArithmetic(left @ StringType(), right)
+        if right.dataType != CalendarIntervalType =>
         a.makeCopy(Array(Cast(left, DoubleType), right))
-      case a @ BinaryArithmetic(left, right @ StringType()) =>
+      case a @ BinaryArithmetic(left, right @ StringType())
+        if left.dataType != CalendarIntervalType =>
         a.makeCopy(Array(left, Cast(right, DoubleType)))
 
       // For equality between string and timestamp we cast the string to a timestamp
@@ -403,7 +374,7 @@ object TypeCoercion {
    *    operator type is found the original expression will be returned and an
    *    Analysis Exception will be raised at the type checking phase.
    */
-  object InConversion extends Rule[LogicalPlan] {
+  object InConversion extends TypeCoercionRule {
     private def flattenExpr(expr: Expression): Seq[Expression] = {
       expr match {
         // Multi columns in IN clause is represented as a CreateNamedStruct.
@@ -413,7 +384,8 @@ object TypeCoercion {
       }
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+    override protected def coerceTypes(
+        plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
@@ -472,7 +444,7 @@ object TypeCoercion {
     private val trueValues = Seq(1.toByte, 1.toShort, 1, 1L, Decimal.ONE)
     private val falseValues = Seq(0.toByte, 0.toShort, 0, 0L, Decimal.ZERO)
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+    def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
@@ -512,8 +484,9 @@ object TypeCoercion {
   /**
    * This ensure that the types for various functions are as expected.
    */
-  object FunctionArgumentConversion extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+  object FunctionArgumentConversion extends TypeCoercionRule {
+    override protected def coerceTypes(
+        plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
@@ -602,8 +575,9 @@ object TypeCoercion {
    * Hive only performs integral division with the DIV operator. The arguments to / are always
    * converted to fractional types.
    */
-  object Division extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+  object Division extends TypeCoercionRule {
+    override protected def coerceTypes(
+        plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       // Skip nodes who has not been resolved yet,
       // as this is an extra rule which should be applied at last.
       case e if !e.childrenResolved => e
@@ -624,8 +598,9 @@ object TypeCoercion {
   /**
    * Coerces the type of different branches of a CASE WHEN statement to a common type.
    */
-  object CaseWhenCoercion extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+  object CaseWhenCoercion extends TypeCoercionRule {
+    override protected def coerceTypes(
+        plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       case c: CaseWhen if c.childrenResolved && !c.valueTypesEqual =>
         val maybeCommonType = findWiderCommonType(c.valueTypes)
         maybeCommonType.map { commonType =>
@@ -654,8 +629,9 @@ object TypeCoercion {
   /**
    * Coerces the type of different branches of If statement to a common type.
    */
-  object IfCoercion extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+  object IfCoercion extends TypeCoercionRule {
+    override protected def coerceTypes(
+        plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       case e if !e.childrenResolved => e
       // Find tightest common type for If, if the true value and false value have different types.
       case i @ If(pred, left, right) if left.dataType != right.dataType =>
@@ -674,8 +650,8 @@ object TypeCoercion {
   /**
    * Coerces NullTypes in the Stack expression to the column types of the corresponding positions.
    */
-  object StackCoercion extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+  object StackCoercion extends TypeCoercionRule {
+    override def coerceTypes(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       case s @ Stack(children) if s.childrenResolved && s.hasFoldableNumRows =>
         Stack(children.zipWithIndex.map {
           // The first child is the number of rows for stack.
@@ -687,6 +663,56 @@ object TypeCoercion {
     }
   }
 
+  /**
+   * Coerces the types of [[Concat]] children to expected ones.
+   *
+   * If `spark.sql.function.concatBinaryAsString` is false and all children types are binary,
+   * the expected types are binary. Otherwise, the expected ones are strings.
+   */
+  case class ConcatCoercion(conf: SQLConf) extends TypeCoercionRule {
+
+    override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = plan transform { case p =>
+      p transformExpressionsUp {
+        // Skip nodes if unresolved or empty children
+        case c @ Concat(children) if !c.childrenResolved || children.isEmpty => c
+        case c @ Concat(children) if conf.concatBinaryAsString ||
+            !children.map(_.dataType).forall(_ == BinaryType) =>
+          val newChildren = c.children.map { e =>
+            ImplicitTypeCasts.implicitCast(e, StringType).getOrElse(e)
+          }
+          c.copy(children = newChildren)
+      }
+    }
+  }
+
+  /**
+   * Coerces the types of [[Elt]] children to expected ones.
+   *
+   * If `spark.sql.function.eltOutputAsString` is false and all children types are binary,
+   * the expected types are binary. Otherwise, the expected ones are strings.
+   */
+  case class EltCoercion(conf: SQLConf) extends TypeCoercionRule {
+
+    override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = plan transform { case p =>
+      p transformExpressionsUp {
+        // Skip nodes if unresolved or not enough children
+        case c @ Elt(children) if !c.childrenResolved || children.size < 2 => c
+        case c @ Elt(children) =>
+          val index = children.head
+          val newIndex = ImplicitTypeCasts.implicitCast(index, IntegerType).getOrElse(index)
+          val newInputs = if (conf.eltOutputAsString ||
+              !children.tail.map(_.dataType).forall(_ == BinaryType)) {
+            children.tail.map { e =>
+              ImplicitTypeCasts.implicitCast(e, StringType).getOrElse(e)
+            }
+          } else {
+            children.tail
+          }
+          c.copy(children = newIndex +: newInputs)
+      }
+    }
+  }
+
   /**
    * Turns Add/Subtract of DateType/TimestampType/StringType and CalendarIntervalType
    * to TimeAdd/TimeSub
@@ -695,7 +721,7 @@ object TypeCoercion {
 
     private val acceptedTypes = Seq(DateType, TimestampType, StringType)
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+    def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
@@ -711,8 +737,9 @@ object TypeCoercion {
   /**
    * Casts types according to the expected input types for [[Expression]]s.
    */
-  object ImplicitTypeCasts extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+  object ImplicitTypeCasts extends TypeCoercionRule {
+    override protected def coerceTypes(
+        plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
@@ -828,8 +855,9 @@ object TypeCoercion {
   /**
    * Cast WindowFrame boundaries to the type they operate upon.
    */
-  object WindowFrameCoercion extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+  object WindowFrameCoercion extends TypeCoercionRule {
+    override protected def coerceTypes(
+        plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       case s @ WindowSpecDefinition(_, Seq(order), SpecifiedWindowFrame(RangeFrame, lower, upper))
           if order.resolved =>
         s.copy(frameSpecification = SpecifiedWindowFrame(
@@ -850,3 +878,46 @@ object TypeCoercion {
     }
   }
 }
+
+trait TypeCoercionRule extends Rule[LogicalPlan] with Logging {
+  /**
+   * Applies any changes to [[AttributeReference]] data types that are made by the transform method
+   * to instances higher in the query tree.
+   */
+  def apply(plan: LogicalPlan): LogicalPlan = {
+    val newPlan = coerceTypes(plan)
+    if (plan.fastEquals(newPlan)) {
+      plan
+    } else {
+      propagateTypes(newPlan)
+    }
+  }
+
+  protected def coerceTypes(plan: LogicalPlan): LogicalPlan
+
+  private def propagateTypes(plan: LogicalPlan): LogicalPlan = plan transformUp {
+    // No propagation required for leaf nodes.
+    case q: LogicalPlan if q.children.isEmpty => q
+
+    // Don't propagate types from unresolved children.
+    case q: LogicalPlan if !q.childrenResolved => q
+
+    case q: LogicalPlan =>
+      val inputMap = q.inputSet.toSeq.map(a => (a.exprId, a)).toMap
+      q transformExpressions {
+        case a: AttributeReference =>
+          inputMap.get(a.exprId) match {
+            // This can happen when an Attribute reference is born in a non-leaf node, for
+            // example due to a call to an external script like in the Transform operator.
+            // TODO: Perhaps those should actually be aliases?
+            case None => a
+            // Leave the same if the dataTypes match.
+            case Some(newType) if a.dataType == newType.dataType => a
+            case Some(newType) =>
+              logDebug(
+                s"Promoting $a from ${a.dataType} to ${newType.dataType} in ${q.simpleString}")
+              newType
+          }
+      }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 04502d04d9509..b55043c270644 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, MonotonicallyIncreasingID}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans._
@@ -339,6 +339,29 @@ object UnsupportedOperationChecker {
     }
   }
 
+  def checkForContinuous(plan: LogicalPlan, outputMode: OutputMode): Unit = {
+    checkForStreaming(plan, outputMode)
+
+    plan.foreachUp { implicit subPlan =>
+      subPlan match {
+        case (_: Project | _: Filter | _: MapElements | _: MapPartitions |
+              _: DeserializeToObject | _: SerializeFromObject) =>
+        case node if node.nodeName == "StreamingRelationV2" =>
+        case node =>
+          throwError(s"Continuous processing does not support ${node.nodeName} operations.")
+      }
+
+      subPlan.expressions.foreach { e =>
+        if (e.collectLeaves().exists {
+          case (_: CurrentTimestamp | _: CurrentDate) => true
+          case _ => false
+        }) {
+          throwError(s"Continuous processing does not support current time operations.")
+        }
+      }
+    }
+  }
+
   private def throwErrorIf(
       condition: Boolean,
       msg: String)(implicit operator: LogicalPlan): Unit = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
index a27aa845bf0ae..af1f9165b0044 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
@@ -38,7 +38,7 @@ case class ResolveTimeZone(conf: SQLConf) extends Rule[LogicalPlan] {
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan =
-    plan.resolveExpressions(transformTimeZoneExprs)
+    plan.transformAllExpressions(transformTimeZoneExprs)
 
   def resolveTimeZones(e: Expression): Expression = e.transform(transformTimeZoneExprs)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
index ea46dd7282401..20216087b0158 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.internal.SQLConf
  *         view resolution, in this way, we are able to get the correct view column ordering and
  *         omit the extra columns that we don't require);
  *    1.2. Else set the child output attributes to `queryOutput`.
- * 2. Map the `queryQutput` to view output by index, if the corresponding attributes don't match,
+ * 2. Map the `queryOutput` to view output by index, if the corresponding attributes don't match,
  *    try to up cast and alias the attribute in `queryOutput` to the attribute in the view output.
  * 3. Add a Project over the child, with the new output generated by the previous steps.
  * If the view output doesn't have the same number of columns neither with the child output, nor
@@ -48,7 +48,7 @@ import org.apache.spark.sql.internal.SQLConf
  * completely resolved during the batch of Resolution.
  */
 case class AliasViewChild(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case v @ View(desc, output, child) if child.resolved && output != child.output =>
       val resolver = conf.resolver
       val queryColumnNames = desc.viewQueryColumnNames
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 7c100afcd738f..59cb26d5e6c36 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -359,12 +359,12 @@ package object dsl {
 
       def generate(
         generator: Generator,
-        join: Boolean = false,
+        unrequiredChildIndex: Seq[Int] = Nil,
         outer: Boolean = false,
         alias: Option[String] = None,
         outputNames: Seq[String] = Nil): LogicalPlan =
-        Generate(generator, join = join, outer = outer, alias,
-          outputNames.map(UnresolvedAttribute(_)), logicalPlan)
+        Generate(generator, unrequiredChildIndex, outer,
+          alias, outputNames.map(UnresolvedAttribute(_)), logicalPlan)
 
       def insertInto(tableName: String, overwrite: Boolean = false): LogicalPlan =
         InsertIntoTable(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
index 65e497afc12cd..d848ba18356d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
@@ -31,7 +31,7 @@ package org.apache.spark.sql.catalyst.expressions
  *  - [[EqualTo]] and [[EqualNullSafe]] are reordered by `hashCode`.
  *  - Other comparisons ([[GreaterThan]], [[LessThan]]) are reversed by `hashCode`.
  */
-object Canonicalize extends {
+object Canonicalize {
   def execute(e: Expression): Expression = {
     expressionReorder(ignoreNamesTypes(e))
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 12baddf1bf7ac..a95ebe301b9d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -181,7 +181,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       TypeCheckResult.TypeCheckSuccess
     } else {
       TypeCheckResult.TypeCheckFailure(
-        s"cannot cast ${child.dataType} to $dataType")
+        s"cannot cast ${child.dataType.simpleString} to ${dataType.simpleString}")
     }
   }
 
@@ -206,6 +206,84 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     case DateType => buildCast[Int](_, d => UTF8String.fromString(DateTimeUtils.dateToString(d)))
     case TimestampType => buildCast[Long](_,
       t => UTF8String.fromString(DateTimeUtils.timestampToString(t, timeZone)))
+    case ArrayType(et, _) =>
+      buildCast[ArrayData](_, array => {
+        val builder = new UTF8StringBuilder
+        builder.append("[")
+        if (array.numElements > 0) {
+          val toUTF8String = castToString(et)
+          if (!array.isNullAt(0)) {
+            builder.append(toUTF8String(array.get(0, et)).asInstanceOf[UTF8String])
+          }
+          var i = 1
+          while (i < array.numElements) {
+            builder.append(",")
+            if (!array.isNullAt(i)) {
+              builder.append(" ")
+              builder.append(toUTF8String(array.get(i, et)).asInstanceOf[UTF8String])
+            }
+            i += 1
+          }
+        }
+        builder.append("]")
+        builder.build()
+      })
+    case MapType(kt, vt, _) =>
+      buildCast[MapData](_, map => {
+        val builder = new UTF8StringBuilder
+        builder.append("[")
+        if (map.numElements > 0) {
+          val keyArray = map.keyArray()
+          val valueArray = map.valueArray()
+          val keyToUTF8String = castToString(kt)
+          val valueToUTF8String = castToString(vt)
+          builder.append(keyToUTF8String(keyArray.get(0, kt)).asInstanceOf[UTF8String])
+          builder.append(" ->")
+          if (!valueArray.isNullAt(0)) {
+            builder.append(" ")
+            builder.append(valueToUTF8String(valueArray.get(0, vt)).asInstanceOf[UTF8String])
+          }
+          var i = 1
+          while (i < map.numElements) {
+            builder.append(", ")
+            builder.append(keyToUTF8String(keyArray.get(i, kt)).asInstanceOf[UTF8String])
+            builder.append(" ->")
+            if (!valueArray.isNullAt(i)) {
+              builder.append(" ")
+              builder.append(valueToUTF8String(valueArray.get(i, vt))
+                .asInstanceOf[UTF8String])
+            }
+            i += 1
+          }
+        }
+        builder.append("]")
+        builder.build()
+      })
+    case StructType(fields) =>
+      buildCast[InternalRow](_, row => {
+        val builder = new UTF8StringBuilder
+        builder.append("[")
+        if (row.numFields > 0) {
+          val st = fields.map(_.dataType)
+          val toUTF8StringFuncs = st.map(castToString)
+          if (!row.isNullAt(0)) {
+            builder.append(toUTF8StringFuncs(0)(row.get(0, st(0))).asInstanceOf[UTF8String])
+          }
+          var i = 1
+          while (i < row.numFields) {
+            builder.append(",")
+            if (!row.isNullAt(i)) {
+              builder.append(" ")
+              builder.append(toUTF8StringFuncs(i)(row.get(i, st(i))).asInstanceOf[UTF8String])
+            }
+            i += 1
+          }
+        }
+        builder.append("]")
+        builder.build()
+      })
+    case udt: UserDefinedType[_] =>
+      buildCast[Any](_, o => UTF8String.fromString(udt.deserialize(o).toString))
     case _ => buildCast[Any](_, o => UTF8String.fromString(o.toString))
   }
 
@@ -548,8 +626,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast))
   }
 
-  // three function arguments are: child.primitive, result.primitive and result.isNull
-  // it returns the code snippets to be put in null safe evaluation region
+  // The function arguments are: `input`, `result` and `resultIsNull`. We don't need `inputIsNull`
+  // in parameter list, because the returned code will be put in null safe evaluation region.
   private[this] type CastFunction = (String, String, String) => String
 
   private[this] def nullSafeCastFunction(
@@ -584,19 +662,136 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       throw new SparkException(s"Cannot cast $from to $to.")
   }
 
-  // Since we need to cast child expressions recursively inside ComplexTypes, such as Map's
+  // Since we need to cast input expressions recursively inside ComplexTypes, such as Map's
   // Key and Value, Struct's field, we need to name out all the variable names involved in a cast.
-  private[this] def castCode(ctx: CodegenContext, childPrim: String, childNull: String,
-    resultPrim: String, resultNull: String, resultType: DataType, cast: CastFunction): String = {
+  private[this] def castCode(ctx: CodegenContext, input: String, inputIsNull: String,
+    result: String, resultIsNull: String, resultType: DataType, cast: CastFunction): String = {
     s"""
-      boolean $resultNull = $childNull;
-      ${ctx.javaType(resultType)} $resultPrim = ${ctx.defaultValue(resultType)};
-      if (!$childNull) {
-        ${cast(childPrim, resultPrim, resultNull)}
+      boolean $resultIsNull = $inputIsNull;
+      ${ctx.javaType(resultType)} $result = ${ctx.defaultValue(resultType)};
+      if (!$inputIsNull) {
+        ${cast(input, result, resultIsNull)}
       }
     """
   }
 
+  private def writeArrayToStringBuilder(
+      et: DataType,
+      array: String,
+      buffer: String,
+      ctx: CodegenContext): String = {
+    val elementToStringCode = castToStringCode(et, ctx)
+    val funcName = ctx.freshName("elementToString")
+    val elementToStringFunc = ctx.addNewFunction(funcName,
+      s"""
+         |private UTF8String $funcName(${ctx.javaType(et)} element) {
+         |  UTF8String elementStr = null;
+         |  ${elementToStringCode("element", "elementStr", null /* resultIsNull won't be used */)}
+         |  return elementStr;
+         |}
+       """.stripMargin)
+
+    val loopIndex = ctx.freshName("loopIndex")
+    s"""
+       |$buffer.append("[");
+       |if ($array.numElements() > 0) {
+       |  if (!$array.isNullAt(0)) {
+       |    $buffer.append($elementToStringFunc(${ctx.getValue(array, et, "0")}));
+       |  }
+       |  for (int $loopIndex = 1; $loopIndex < $array.numElements(); $loopIndex++) {
+       |    $buffer.append(",");
+       |    if (!$array.isNullAt($loopIndex)) {
+       |      $buffer.append(" ");
+       |      $buffer.append($elementToStringFunc(${ctx.getValue(array, et, loopIndex)}));
+       |    }
+       |  }
+       |}
+       |$buffer.append("]");
+     """.stripMargin
+  }
+
+  private def writeMapToStringBuilder(
+      kt: DataType,
+      vt: DataType,
+      map: String,
+      buffer: String,
+      ctx: CodegenContext): String = {
+
+    def dataToStringFunc(func: String, dataType: DataType) = {
+      val funcName = ctx.freshName(func)
+      val dataToStringCode = castToStringCode(dataType, ctx)
+      ctx.addNewFunction(funcName,
+        s"""
+           |private UTF8String $funcName(${ctx.javaType(dataType)} data) {
+           |  UTF8String dataStr = null;
+           |  ${dataToStringCode("data", "dataStr", null /* resultIsNull won't be used */)}
+           |  return dataStr;
+           |}
+         """.stripMargin)
+    }
+
+    val keyToStringFunc = dataToStringFunc("keyToString", kt)
+    val valueToStringFunc = dataToStringFunc("valueToString", vt)
+    val loopIndex = ctx.freshName("loopIndex")
+    s"""
+       |$buffer.append("[");
+       |if ($map.numElements() > 0) {
+       |  $buffer.append($keyToStringFunc(${ctx.getValue(s"$map.keyArray()", kt, "0")}));
+       |  $buffer.append(" ->");
+       |  if (!$map.valueArray().isNullAt(0)) {
+       |    $buffer.append(" ");
+       |    $buffer.append($valueToStringFunc(${ctx.getValue(s"$map.valueArray()", vt, "0")}));
+       |  }
+       |  for (int $loopIndex = 1; $loopIndex < $map.numElements(); $loopIndex++) {
+       |    $buffer.append(", ");
+       |    $buffer.append($keyToStringFunc(${ctx.getValue(s"$map.keyArray()", kt, loopIndex)}));
+       |    $buffer.append(" ->");
+       |    if (!$map.valueArray().isNullAt($loopIndex)) {
+       |      $buffer.append(" ");
+       |      $buffer.append($valueToStringFunc(
+       |        ${ctx.getValue(s"$map.valueArray()", vt, loopIndex)}));
+       |    }
+       |  }
+       |}
+       |$buffer.append("]");
+     """.stripMargin
+  }
+
+  private def writeStructToStringBuilder(
+      st: Seq[DataType],
+      row: String,
+      buffer: String,
+      ctx: CodegenContext): String = {
+    val structToStringCode = st.zipWithIndex.map { case (ft, i) =>
+      val fieldToStringCode = castToStringCode(ft, ctx)
+      val field = ctx.freshName("field")
+      val fieldStr = ctx.freshName("fieldStr")
+      s"""
+         |${if (i != 0) s"""$buffer.append(",");""" else ""}
+         |if (!$row.isNullAt($i)) {
+         |  ${if (i != 0) s"""$buffer.append(" ");""" else ""}
+         |
+         |  // Append $i field into the string buffer
+         |  ${ctx.javaType(ft)} $field = ${ctx.getValue(row, ft, s"$i")};
+         |  UTF8String $fieldStr = null;
+         |  ${fieldToStringCode(field, fieldStr, null /* resultIsNull won't be used */)}
+         |  $buffer.append($fieldStr);
+         |}
+       """.stripMargin
+    }
+
+    val writeStructCode = ctx.splitExpressions(
+      expressions = structToStringCode,
+      funcName = "fieldToString",
+      arguments = ("InternalRow", row) :: (classOf[UTF8StringBuilder].getName, buffer) :: Nil)
+
+    s"""
+       |$buffer.append("[");
+       |$writeStructCode
+       |$buffer.append("]");
+     """.stripMargin
+  }
+
   private[this] def castToStringCode(from: DataType, ctx: CodegenContext): CastFunction = {
     from match {
       case BinaryType =>
@@ -605,9 +800,49 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
         (c, evPrim, evNull) => s"""$evPrim = UTF8String.fromString(
           org.apache.spark.sql.catalyst.util.DateTimeUtils.dateToString($c));"""
       case TimestampType =>
-        val tz = ctx.addReferenceMinorObj(timeZone)
+        val tz = ctx.addReferenceObj("timeZone", timeZone)
         (c, evPrim, evNull) => s"""$evPrim = UTF8String.fromString(
           org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampToString($c, $tz));"""
+      case ArrayType(et, _) =>
+        (c, evPrim, evNull) => {
+          val buffer = ctx.freshName("buffer")
+          val bufferClass = classOf[UTF8StringBuilder].getName
+          val writeArrayElemCode = writeArrayToStringBuilder(et, c, buffer, ctx)
+          s"""
+             |$bufferClass $buffer = new $bufferClass();
+             |$writeArrayElemCode;
+             |$evPrim = $buffer.build();
+           """.stripMargin
+        }
+      case MapType(kt, vt, _) =>
+        (c, evPrim, evNull) => {
+          val buffer = ctx.freshName("buffer")
+          val bufferClass = classOf[UTF8StringBuilder].getName
+          val writeMapElemCode = writeMapToStringBuilder(kt, vt, c, buffer, ctx)
+          s"""
+             |$bufferClass $buffer = new $bufferClass();
+             |$writeMapElemCode;
+             |$evPrim = $buffer.build();
+           """.stripMargin
+        }
+      case StructType(fields) =>
+        (c, evPrim, evNull) => {
+          val row = ctx.freshName("row")
+          val buffer = ctx.freshName("buffer")
+          val bufferClass = classOf[UTF8StringBuilder].getName
+          val writeStructCode = writeStructToStringBuilder(fields.map(_.dataType), row, buffer, ctx)
+          s"""
+             |InternalRow $row = $c;
+             |$bufferClass $buffer = new $bufferClass();
+             |$writeStructCode
+             |$evPrim = $buffer.build();
+           """.stripMargin
+        }
+      case udt: UserDefinedType[_] =>
+        val udtRef = ctx.addReferenceObj("udt", udt)
+        (c, evPrim, evNull) => {
+          s"$evPrim = UTF8String.fromString($udtRef.deserialize($c).toString());"
+        }
       case _ =>
         (c, evPrim, evNull) => s"$evPrim = UTF8String.fromString(String.valueOf($c));"
     }
@@ -633,7 +868,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
         }
        """
     case TimestampType =>
-      val tz = ctx.addReferenceMinorObj(timeZone)
+      val tz = ctx.addReferenceObj("timeZone", timeZone)
       (c, evPrim, evNull) =>
         s"$evPrim = org.apache.spark.sql.catalyst.util.DateTimeUtils.millisToDays($c / 1000L, $tz);"
     case _ =>
@@ -713,7 +948,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       from: DataType,
       ctx: CodegenContext): CastFunction = from match {
     case StringType =>
-      val tz = ctx.addReferenceMinorObj(timeZone)
+      val tz = ctx.addReferenceObj("timeZone", timeZone)
       val longOpt = ctx.freshName("longOpt")
       (c, evPrim, evNull) =>
         s"""
@@ -730,7 +965,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     case _: IntegralType =>
       (c, evPrim, evNull) => s"$evPrim = ${longToTimeStampCode(c)};"
     case DateType =>
-      val tz = ctx.addReferenceMinorObj(timeZone)
+      val tz = ctx.addReferenceObj("timeZone", timeZone)
       (c, evPrim, evNull) =>
         s"$evPrim = org.apache.spark.sql.catalyst.util.DateTimeUtils.daysToMillis($c, $tz) * 1000;"
     case DecimalType() =>
@@ -799,16 +1034,16 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   private[this] def castToByteCode(from: DataType, ctx: CodegenContext): CastFunction = from match {
     case StringType =>
-      val wrapper = ctx.freshName("wrapper")
-      ctx.addMutableState("UTF8String.IntWrapper", wrapper,
-        s"$wrapper = new UTF8String.IntWrapper();")
+      val wrapper = ctx.freshName("intWrapper")
       (c, evPrim, evNull) =>
         s"""
+          UTF8String.IntWrapper $wrapper = new UTF8String.IntWrapper();
           if ($c.toByte($wrapper)) {
             $evPrim = (byte) $wrapper.value;
           } else {
             $evNull = true;
           }
+          $wrapper = null;
         """
     case BooleanType =>
       (c, evPrim, evNull) => s"$evPrim = $c ? (byte) 1 : (byte) 0;"
@@ -826,16 +1061,16 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       from: DataType,
       ctx: CodegenContext): CastFunction = from match {
     case StringType =>
-      val wrapper = ctx.freshName("wrapper")
-      ctx.addMutableState("UTF8String.IntWrapper", wrapper,
-        s"$wrapper = new UTF8String.IntWrapper();")
+      val wrapper = ctx.freshName("intWrapper")
       (c, evPrim, evNull) =>
         s"""
+          UTF8String.IntWrapper $wrapper = new UTF8String.IntWrapper();
           if ($c.toShort($wrapper)) {
             $evPrim = (short) $wrapper.value;
           } else {
             $evNull = true;
           }
+          $wrapper = null;
         """
     case BooleanType =>
       (c, evPrim, evNull) => s"$evPrim = $c ? (short) 1 : (short) 0;"
@@ -851,16 +1086,16 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   private[this] def castToIntCode(from: DataType, ctx: CodegenContext): CastFunction = from match {
     case StringType =>
-      val wrapper = ctx.freshName("wrapper")
-      ctx.addMutableState("UTF8String.IntWrapper", wrapper,
-        s"$wrapper = new UTF8String.IntWrapper();")
+      val wrapper = ctx.freshName("intWrapper")
       (c, evPrim, evNull) =>
         s"""
+          UTF8String.IntWrapper $wrapper = new UTF8String.IntWrapper();
           if ($c.toInt($wrapper)) {
             $evPrim = $wrapper.value;
           } else {
             $evNull = true;
           }
+          $wrapper = null;
         """
     case BooleanType =>
       (c, evPrim, evNull) => s"$evPrim = $c ? 1 : 0;"
@@ -876,17 +1111,17 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   private[this] def castToLongCode(from: DataType, ctx: CodegenContext): CastFunction = from match {
     case StringType =>
-      val wrapper = ctx.freshName("wrapper")
-      ctx.addMutableState("UTF8String.LongWrapper", wrapper,
-        s"$wrapper = new UTF8String.LongWrapper();")
+      val wrapper = ctx.freshName("longWrapper")
 
       (c, evPrim, evNull) =>
         s"""
+          UTF8String.LongWrapper $wrapper = new UTF8String.LongWrapper();
           if ($c.toLong($wrapper)) {
             $evPrim = $wrapper.value;
           } else {
             $evNull = true;
           }
+          $wrapper = null;
         """
     case BooleanType =>
       (c, evPrim, evNull) => s"$evPrim = $c ? 1L : 0L;"
@@ -1014,8 +1249,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       case (fromField, toField) => nullSafeCastFunction(fromField.dataType, toField.dataType, ctx)
     }
     val rowClass = classOf[GenericInternalRow].getName
-    val result = ctx.freshName("result")
-    val tmpRow = ctx.freshName("tmpRow")
+    val tmpResult = ctx.freshName("tmpResult")
+    val tmpInput = ctx.freshName("tmpInput")
 
     val fieldsEvalCode = fieldsCasts.zipWithIndex.map { case (cast, i) =>
       val fromFieldPrim = ctx.freshName("ffp")
@@ -1024,37 +1259,33 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       val toFieldNull = ctx.freshName("tfn")
       val fromType = ctx.javaType(from.fields(i).dataType)
       s"""
-        boolean $fromFieldNull = $tmpRow.isNullAt($i);
+        boolean $fromFieldNull = $tmpInput.isNullAt($i);
         if ($fromFieldNull) {
-          $result.setNullAt($i);
+          $tmpResult.setNullAt($i);
         } else {
           $fromType $fromFieldPrim =
-            ${ctx.getValue(tmpRow, from.fields(i).dataType, i.toString)};
+            ${ctx.getValue(tmpInput, from.fields(i).dataType, i.toString)};
           ${castCode(ctx, fromFieldPrim,
             fromFieldNull, toFieldPrim, toFieldNull, to.fields(i).dataType, cast)}
           if ($toFieldNull) {
-            $result.setNullAt($i);
+            $tmpResult.setNullAt($i);
           } else {
-            ${ctx.setColumn(result, to.fields(i).dataType, i, toFieldPrim)};
+            ${ctx.setColumn(tmpResult, to.fields(i).dataType, i, toFieldPrim)};
           }
         }
        """
     }
-    val fieldsEvalCodes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
-      ctx.splitExpressions(
-        expressions = fieldsEvalCode,
-        funcName = "castStruct",
-        arguments = ("InternalRow", tmpRow) :: (rowClass, result) :: Nil)
-    } else {
-      fieldsEvalCode.mkString("\n")
-    }
+    val fieldsEvalCodes = ctx.splitExpressions(
+      expressions = fieldsEvalCode,
+      funcName = "castStruct",
+      arguments = ("InternalRow", tmpInput) :: (rowClass, tmpResult) :: Nil)
 
-    (c, evPrim, evNull) =>
+    (input, result, resultIsNull) =>
       s"""
-        final $rowClass $result = new $rowClass(${fieldsCasts.length});
-        final InternalRow $tmpRow = $c;
+        final $rowClass $tmpResult = new $rowClass(${fieldsCasts.length});
+        final InternalRow $tmpInput = $input;
         $fieldsEvalCodes
-        $evPrim = $result;
+        $result = $tmpResult;
       """
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 743782a6453e9..4568714933095 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -119,8 +119,7 @@ abstract class Expression extends TreeNode[Expression] {
     // TODO: support whole stage codegen too
     if (eval.code.trim.length > 1024 && ctx.INPUT_ROW != null && ctx.currentVars == null) {
       val setIsNull = if (eval.isNull != "false" && eval.isNull != "true") {
-        val globalIsNull = ctx.freshName("globalIsNull")
-        ctx.addMutableState(ctx.JAVA_BOOLEAN, globalIsNull)
+        val globalIsNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, "globalIsNull")
         val localIsNull = eval.isNull
         eval.isNull = globalIsNull
         s"$globalIsNull = $localIsNull;"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
index 821d784a01342..11fb579dfa88c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
@@ -65,10 +65,9 @@ case class MonotonicallyIncreasingID() extends LeafExpression with Nondeterminis
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val countTerm = ctx.freshName("count")
-    val partitionMaskTerm = ctx.freshName("partitionMask")
-    ctx.addMutableState(ctx.JAVA_LONG, countTerm)
-    ctx.addMutableState(ctx.JAVA_LONG, partitionMaskTerm)
+    val countTerm = ctx.addMutableState(ctx.JAVA_LONG, "count")
+    val partitionMaskTerm = "partitionMask"
+    ctx.addImmutableStateIfNotExists(ctx.JAVA_LONG, partitionMaskTerm)
     ctx.addPartitionInitializationStatement(s"$countTerm = 0L;")
     ctx.addPartitionInitializationStatement(s"$partitionMaskTerm = ((long) partitionIndex) << 33;")
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 179853032035e..388ef42883ad3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -76,11 +76,6 @@ case class ScalaUDF(
     }.foreach(println)
 
   */
-
-  // Accessors used in genCode
-  def userDefinedFunc(): AnyRef = function
-  def getChildren(): Seq[Expression] = children
-
   private[this] val f = children.size match {
     case 0 =>
       val func = function.asInstanceOf[() => Any]
@@ -981,50 +976,19 @@ case class ScalaUDF(
   }
 
   // scalastyle:on line.size.limit
-
-  // Generate codes used to convert the arguments to Scala type for user-defined functions
-  private[this] def genCodeForConverter(ctx: CodegenContext, index: Int): String = {
-    val converterClassName = classOf[Any => Any].getName
-    val typeConvertersClassName = CatalystTypeConverters.getClass.getName + ".MODULE$"
-    val expressionClassName = classOf[Expression].getName
-    val scalaUDFClassName = classOf[ScalaUDF].getName
-
-    val converterTerm = ctx.freshName("converter")
-    val expressionIdx = ctx.references.size - 1
-    ctx.addMutableState(converterClassName, converterTerm,
-      s"$converterTerm = ($converterClassName)$typeConvertersClassName" +
-        s".createToScalaConverter(((${expressionClassName})((($scalaUDFClassName)" +
-          s"references[$expressionIdx]).getChildren().apply($index))).dataType());")
-    converterTerm
-  }
-
   override def doGenCode(
       ctx: CodegenContext,
       ev: ExprCode): ExprCode = {
-
-    val scalaUDF = ctx.addReferenceObj("scalaUDF", this)
     val converterClassName = classOf[Any => Any].getName
-    val typeConvertersClassName = CatalystTypeConverters.getClass.getName + ".MODULE$"
-
-    // Generate codes used to convert the returned value of user-defined functions to Catalyst type
-    val catalystConverterTerm = ctx.freshName("catalystConverter")
-    ctx.addMutableState(converterClassName, catalystConverterTerm,
-      s"$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
-        s".createToCatalystConverter($scalaUDF.dataType());")
 
+    // The type converters for inputs and the result.
+    val converters: Array[Any => Any] = children.map { c =>
+      CatalystTypeConverters.createToScalaConverter(c.dataType)
+    }.toArray :+ CatalystTypeConverters.createToCatalystConverter(dataType)
+    val convertersTerm = ctx.addReferenceObj("converters", converters, s"$converterClassName[]")
+    val errorMsgTerm = ctx.addReferenceObj("errMsg", udfErrorMessage)
     val resultTerm = ctx.freshName("result")
 
-    // This must be called before children expressions' codegen
-    // because ctx.references is used in genCodeForConverter
-    val converterTerms = children.indices.map(genCodeForConverter(ctx, _))
-
-    // Initialize user-defined function
-    val funcClassName = s"scala.Function${children.size}"
-
-    val funcTerm = ctx.freshName("udf")
-    ctx.addMutableState(funcClassName, funcTerm,
-      s"$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
-
     // codegen for children expressions
     val evals = children.map(_.genCode(ctx))
 
@@ -1032,38 +996,42 @@ case class ScalaUDF(
     // We need to get the boxedType of dataType's javaType here. Because for the dataType
     // such as IntegerType, its javaType is `int` and the returned type of user-defined
     // function is Object. Trying to convert an Object to `int` will cause casting exception.
-    val evalCode = evals.map(_.code).mkString
-    val (converters, funcArguments) = converterTerms.zipWithIndex.map { case (converter, i) =>
-      val eval = evals(i)
+    val evalCode = evals.map(_.code).mkString("\n")
+    val (funcArgs, initArgs) = evals.zipWithIndex.map { case (eval, i) =>
       val argTerm = ctx.freshName("arg")
-      val convert = s"Object $argTerm = ${eval.isNull} ? null : $converter.apply(${eval.value});"
-      (convert, argTerm)
+      val convert = s"$convertersTerm[$i].apply(${eval.value})"
+      val initArg = s"Object $argTerm = ${eval.isNull} ? null : $convert;"
+      (argTerm, initArg)
     }.unzip
 
-    val getFuncResult = s"$funcTerm.apply(${funcArguments.mkString(", ")})"
+    val udf = ctx.addReferenceObj("udf", function, s"scala.Function${children.length}")
+    val getFuncResult = s"$udf.apply(${funcArgs.mkString(", ")})"
+    val resultConverter = s"$convertersTerm[${children.length}]"
     val callFunc =
       s"""
-         ${ctx.boxedType(dataType)} $resultTerm = null;
-         try {
-           $resultTerm = (${ctx.boxedType(dataType)})$catalystConverterTerm.apply($getFuncResult);
-         } catch (Exception e) {
-           throw new org.apache.spark.SparkException($scalaUDF.udfErrorMessage(), e);
-         }
-       """
-
-    ev.copy(code = s"""
-      $evalCode
-      ${converters.mkString("\n")}
-      $callFunc
+         |${ctx.boxedType(dataType)} $resultTerm = null;
+         |try {
+         |  $resultTerm = (${ctx.boxedType(dataType)})$resultConverter.apply($getFuncResult);
+         |} catch (Exception e) {
+         |  throw new org.apache.spark.SparkException($errorMsgTerm, e);
+         |}
+       """.stripMargin
 
-      boolean ${ev.isNull} = $resultTerm == null;
-      ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
-      if (!${ev.isNull}) {
-        ${ev.value} = $resultTerm;
-      }""")
+    ev.copy(code =
+      s"""
+         |$evalCode
+         |${initArgs.mkString("\n")}
+         |$callFunc
+         |
+         |boolean ${ev.isNull} = $resultTerm == null;
+         |${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+         |if (!${ev.isNull}) {
+         |  ${ev.value} = $resultTerm;
+         |}
+       """.stripMargin)
   }
 
-  private[this] val converter = CatalystTypeConverters.createToCatalystConverter(dataType)
+  private[this] val resultConverter = CatalystTypeConverters.createToCatalystConverter(dataType)
 
   lazy val udfErrorMessage = {
     val funcCls = function.getClass.getSimpleName
@@ -1079,6 +1047,6 @@ case class ScalaUDF(
         throw new SparkException(udfErrorMessage, e)
     }
 
-    converter(result)
+    resultConverter(result)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
index 4fa18d6b3209b..a160b9b275290 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
@@ -43,8 +43,8 @@ case class SparkPartitionID() extends LeafExpression with Nondeterministic {
   override protected def evalInternal(input: InternalRow): Int = partitionId
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val idTerm = ctx.freshName("partitionId")
-    ctx.addMutableState(ctx.JAVA_INT, idTerm)
+    val idTerm = "partitionId"
+    ctx.addImmutableStateIfNotExists(ctx.JAVA_INT, idTerm)
     ctx.addPartitionInitializationStatement(s"$idTerm = partitionIndex;")
     ev.copy(code = s"final ${ctx.javaType(dataType)} ${ev.value} = $idTerm;", isNull = "false")
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index 7facb9dad9a76..a45854a3b5146 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -132,7 +132,7 @@ case class ApproximatePercentile(
         case TimestampType => value.asInstanceOf[Long].toDouble
         case n: NumericType => n.numeric.toDouble(value.asInstanceOf[n.InternalType])
         case other: DataType =>
-          throw new UnsupportedOperationException(s"Unexpected data type $other")
+          throw new UnsupportedOperationException(s"Unexpected data type ${other.simpleString}")
       }
       buffer.add(doubleValue)
     }
@@ -157,7 +157,7 @@ case class ApproximatePercentile(
       case DoubleType => doubleResult
       case _: DecimalType => doubleResult.map(Decimal(_))
       case other: DataType =>
-        throw new UnsupportedOperationException(s"Unexpected data type $other")
+        throw new UnsupportedOperationException(s"Unexpected data type ${other.simpleString}")
     }
     if (result.length == 0) {
       null
@@ -296,8 +296,8 @@ object ApproximatePercentile {
       Ints.BYTES + Doubles.BYTES + Longs.BYTES +
       // length of summary.sampled
       Ints.BYTES +
-      // summary.sampled, Array[Stat(value: Double, g: Int, delta: Int)]
-      summaries.sampled.length * (Doubles.BYTES + Ints.BYTES + Ints.BYTES)
+      // summary.sampled, Array[Stat(value: Double, g: Long, delta: Long)]
+      summaries.sampled.length * (Doubles.BYTES + Longs.BYTES + Longs.BYTES)
     }
 
     final def serialize(obj: PercentileDigest): Array[Byte] = {
@@ -312,8 +312,8 @@ object ApproximatePercentile {
       while (i < summary.sampled.length) {
         val stat = summary.sampled(i)
         buffer.putDouble(stat.value)
-        buffer.putInt(stat.g)
-        buffer.putInt(stat.delta)
+        buffer.putLong(stat.g)
+        buffer.putLong(stat.delta)
         i += 1
       }
       buffer.array()
@@ -330,8 +330,8 @@ object ApproximatePercentile {
       var i = 0
       while (i < sampledLength) {
         val value = buffer.getDouble()
-        val g = buffer.getInt()
-        val delta = buffer.getInt()
+        val g = buffer.getLong()
+        val delta = buffer.getLong()
         sampled(i) = Stats(value, g, delta)
         i += 1
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index d98f7b3d8efe6..8bb14598a6d7b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -602,23 +602,36 @@ case class Least(children: Seq[Expression]) extends Expression {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val evalChildren = children.map(_.genCode(ctx))
-    ctx.addMutableState(ctx.JAVA_BOOLEAN, ev.isNull)
-    ctx.addMutableState(ctx.javaType(dataType), ev.value)
-    def updateEval(eval: ExprCode): String = {
+    ev.isNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, ev.isNull)
+    val evals = evalChildren.map(eval =>
       s"""
-        ${eval.code}
-        if (!${eval.isNull} && (${ev.isNull} ||
-          ${ctx.genGreater(dataType, ev.value, eval.value)})) {
-          ${ev.isNull} = false;
-          ${ev.value} = ${eval.value};
-        }
-      """
-    }
-    val codes = ctx.splitExpressions(evalChildren.map(updateEval))
-    ev.copy(code = s"""
-      ${ev.isNull} = true;
-      ${ev.value} = ${ctx.defaultValue(dataType)};
-      $codes""")
+         |${eval.code}
+         |if (!${eval.isNull} && (${ev.isNull} ||
+         |  ${ctx.genGreater(dataType, ev.value, eval.value)})) {
+         |  ${ev.isNull} = false;
+         |  ${ev.value} = ${eval.value};
+         |}
+      """.stripMargin
+    )
+
+    val resultType = ctx.javaType(dataType)
+    val codes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = evals,
+      funcName = "least",
+      extraArguments = Seq(resultType -> ev.value),
+      returnType = resultType,
+      makeSplitFunction = body =>
+        s"""
+          |$body
+          |return ${ev.value};
+        """.stripMargin,
+      foldFunctions = _.map(funcCall => s"${ev.value} = $funcCall;").mkString("\n"))
+    ev.copy(code =
+      s"""
+         |${ev.isNull} = true;
+         |${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+         |$codes
+      """.stripMargin)
   }
 }
 
@@ -668,22 +681,35 @@ case class Greatest(children: Seq[Expression]) extends Expression {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val evalChildren = children.map(_.genCode(ctx))
-    ctx.addMutableState(ctx.JAVA_BOOLEAN, ev.isNull)
-    ctx.addMutableState(ctx.javaType(dataType), ev.value)
-    def updateEval(eval: ExprCode): String = {
+    ev.isNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, ev.isNull)
+    val evals = evalChildren.map(eval =>
       s"""
-        ${eval.code}
-        if (!${eval.isNull} && (${ev.isNull} ||
-          ${ctx.genGreater(dataType, eval.value, ev.value)})) {
-          ${ev.isNull} = false;
-          ${ev.value} = ${eval.value};
-        }
-      """
-    }
-    val codes = ctx.splitExpressions(evalChildren.map(updateEval))
-    ev.copy(code = s"""
-      ${ev.isNull} = true;
-      ${ev.value} = ${ctx.defaultValue(dataType)};
-      $codes""")
+         |${eval.code}
+         |if (!${eval.isNull} && (${ev.isNull} ||
+         |  ${ctx.genGreater(dataType, eval.value, ev.value)})) {
+         |  ${ev.isNull} = false;
+         |  ${ev.value} = ${eval.value};
+         |}
+      """.stripMargin
+    )
+
+    val resultType = ctx.javaType(dataType)
+    val codes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = evals,
+      funcName = "greatest",
+      extraArguments = Seq(resultType -> ev.value),
+      returnType = resultType,
+      makeSplitFunction = body =>
+        s"""
+           |$body
+           |return ${ev.value};
+        """.stripMargin,
+      foldFunctions = _.map(funcCall => s"${ev.value} = $funcCall;").mkString("\n"))
+    ev.copy(code =
+      s"""
+         |${ev.isNull} = true;
+         |${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+         |$codes
+      """.stripMargin)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 668c816b3fd8d..2c714c228e6c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -29,7 +29,7 @@ import scala.util.control.NonFatal
 import com.google.common.cache.{CacheBuilder, CacheLoader}
 import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException}
 import org.codehaus.commons.compiler.CompileException
-import org.codehaus.janino.{ByteArrayClassLoader, ClassBodyEvaluator, JaninoRuntimeException, SimpleCompiler}
+import org.codehaus.janino.{ByteArrayClassLoader, ClassBodyEvaluator, InternalCompilerException, SimpleCompiler}
 import org.codehaus.janino.util.ClassFile
 
 import org.apache.spark.{SparkEnv, TaskContext, TaskKilledException}
@@ -109,28 +109,14 @@ class CodegenContext {
    *
    * Returns the code to access it.
    *
-   * This is for minor objects not to store the object into field but refer it from the references
-   * field at the time of use because number of fields in class is limited so we should reduce it.
+   * This does not to store the object into field but refer it from the references field at the
+   * time of use because number of fields in class is limited so we should reduce it.
    */
-  def addReferenceMinorObj(obj: Any, className: String = null): String = {
+  def addReferenceObj(objName: String, obj: Any, className: String = null): String = {
     val idx = references.length
     references += obj
     val clsName = Option(className).getOrElse(obj.getClass.getName)
-    s"(($clsName) references[$idx])"
-  }
-
-  /**
-   * Add an object to `references`, create a class member to access it.
-   *
-   * Returns the name of class member.
-   */
-  def addReferenceObj(name: String, obj: Any, className: String = null): String = {
-    val term = freshName(name)
-    val idx = references.length
-    references += obj
-    val clsName = Option(className).getOrElse(obj.getClass.getName)
-    addMutableState(clsName, term, s"$term = ($clsName) references[$idx];")
-    term
+    s"(($clsName) references[$idx] /* $objName */)"
   }
 
   /**
@@ -142,7 +128,7 @@ class CodegenContext {
    * `currentVars` to null, or set `currentVars(i)` to null for certain columns, before calling
    * `Expression.genCode`.
    */
-  final var INPUT_ROW = "i"
+  var INPUT_ROW = "i"
 
   /**
    * Holding a list of generated columns as input of current operator, will be used by
@@ -151,22 +137,83 @@ class CodegenContext {
   var currentVars: Seq[ExprCode] = null
 
   /**
-   * Holding expressions' mutable states like `MonotonicallyIncreasingID.count` as a
-   * 3-tuple: java type, variable name, code to init it.
-   * As an example, ("int", "count", "count = 0;") will produce code:
+   * Holding expressions' inlined mutable states like `MonotonicallyIncreasingID.count` as a
+   * 2-tuple: java type, variable name.
+   * As an example, ("int", "count") will produce code:
    * {{{
    *   private int count;
    * }}}
-   * as a member variable, and add
-   * {{{
-   *   count = 0;
-   * }}}
-   * to the constructor.
+   * as a member variable
    *
    * They will be kept as member variables in generated classes like `SpecificProjection`.
+   *
+   * Exposed for tests only.
+   */
+  private[catalyst] val inlinedMutableStates: mutable.ArrayBuffer[(String, String)] =
+    mutable.ArrayBuffer.empty[(String, String)]
+
+  /**
+   * The mapping between mutable state types and corrseponding compacted arrays.
+   * The keys are java type string. The values are [[MutableStateArrays]] which encapsulates
+   * the compacted arrays for the mutable states with the same java type.
+   *
+   * Exposed for tests only.
+   */
+  private[catalyst] val arrayCompactedMutableStates: mutable.Map[String, MutableStateArrays] =
+    mutable.Map.empty[String, MutableStateArrays]
+
+  // An array holds the code that will initialize each state
+  // Exposed for tests only.
+  private[catalyst] val mutableStateInitCode: mutable.ArrayBuffer[String] =
+    mutable.ArrayBuffer.empty[String]
+
+  // Tracks the names of all the mutable states.
+  private val mutableStateNames: mutable.HashSet[String] = mutable.HashSet.empty
+
+  /**
+   * This class holds a set of names of mutableStateArrays that is used for compacting mutable
+   * states for a certain type, and holds the next available slot of the current compacted array.
    */
-  val mutableStates: mutable.ArrayBuffer[(String, String, String)] =
-    mutable.ArrayBuffer.empty[(String, String, String)]
+  class MutableStateArrays {
+    val arrayNames = mutable.ListBuffer.empty[String]
+    createNewArray()
+
+    private[this] var currentIndex = 0
+
+    private def createNewArray() = {
+      val newArrayName = freshName("mutableStateArray")
+      mutableStateNames += newArrayName
+      arrayNames.append(newArrayName)
+    }
+
+    def getCurrentIndex: Int = currentIndex
+
+    /**
+     * Returns the reference of next available slot in current compacted array. The size of each
+     * compacted array is controlled by the constant `CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT`.
+     * Once reaching the threshold, new compacted array is created.
+     */
+    def getNextSlot(): String = {
+      if (currentIndex < CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT) {
+        val res = s"${arrayNames.last}[$currentIndex]"
+        currentIndex += 1
+        res
+      } else {
+        createNewArray()
+        currentIndex = 1
+        s"${arrayNames.last}[0]"
+      }
+    }
+
+  }
+
+  /**
+   * A map containing the mutable states which have been defined so far using
+   * `addImmutableStateIfNotExists`. Each entry contains the name of the mutable state as key and
+   * its Java type and init code as value.
+   */
+  private val immutableStates: mutable.Map[String, (String, String)] =
+    mutable.Map.empty[String, (String, String)]
 
   /**
    * Add a mutable state as a field to the generated class. c.f. the comments above.
@@ -177,11 +224,85 @@ class CodegenContext {
    *                 the list of default imports available.
    *                 Also, generic type arguments are accepted but ignored.
    * @param variableName Name of the field.
-   * @param initCode The statement(s) to put into the init() method to initialize this field.
+   * @param initFunc Function includes statement(s) to put into the init() method to initialize
+   *                 this field. The argument is the name of the mutable state variable.
    *                 If left blank, the field will be default-initialized.
+   * @param forceInline whether the declaration and initialization code may be inlined rather than
+   *                    compacted. Please set `true` into forceInline for one of the followings:
+   *                    1. use the original name of the status
+   *                    2. expect to non-frequently generate the status
+   *                       (e.g. not much sort operators in one stage)
+   * @param useFreshName If this is false and the mutable state ends up inlining in the outer
+   *                     class, the name is not changed
+   * @return the name of the mutable state variable, which is the original name or fresh name if
+   *         the variable is inlined to the outer class, or an array access if the variable is to
+   *         be stored in an array of variables of the same type.
+   *         A variable will be inlined into the outer class when one of the following conditions
+   *         are satisfied:
+   *         1. forceInline is true
+   *         2. its type is primitive type and the total number of the inlined mutable variables
+   *            is less than `CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD`
+   *         3. its type is multi-dimensional array
+   *         When a variable is compacted into an array, the max size of the array for compaction
+   *         is given by `CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT`.
    */
-  def addMutableState(javaType: String, variableName: String, initCode: String = ""): Unit = {
-    mutableStates += ((javaType, variableName, initCode))
+  def addMutableState(
+      javaType: String,
+      variableName: String,
+      initFunc: String => String = _ => "",
+      forceInline: Boolean = false,
+      useFreshName: Boolean = true): String = {
+
+    // want to put a primitive type variable at outerClass for performance
+    val canInlinePrimitive = isPrimitiveType(javaType) &&
+      (inlinedMutableStates.length < CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD)
+    if (forceInline || canInlinePrimitive || javaType.contains("[][]")) {
+      val varName = if (useFreshName) freshName(variableName) else variableName
+      val initCode = initFunc(varName)
+      inlinedMutableStates += ((javaType, varName))
+      mutableStateInitCode += initCode
+      mutableStateNames += varName
+      varName
+    } else {
+      val arrays = arrayCompactedMutableStates.getOrElseUpdate(javaType, new MutableStateArrays)
+      val element = arrays.getNextSlot()
+
+      val initCode = initFunc(element)
+      mutableStateInitCode += initCode
+      element
+    }
+  }
+
+  /**
+   * Add an immutable state as a field to the generated class only if it does not exist yet a field
+   * with that name. This helps reducing the number of the generated class' fields, since the same
+   * variable can be reused by many functions.
+   *
+   * Even though the added variables are not declared as final, they should never be reassigned in
+   * the generated code to prevent errors and unexpected behaviors.
+   *
+   * Internally, this method calls `addMutableState`.
+   *
+   * @param javaType Java type of the field.
+   * @param variableName Name of the field.
+   * @param initFunc Function includes statement(s) to put into the init() method to initialize
+   *                 this field. The argument is the name of the mutable state variable.
+   */
+  def addImmutableStateIfNotExists(
+      javaType: String,
+      variableName: String,
+      initFunc: String => String = _ => ""): Unit = {
+    val existingImmutableState = immutableStates.get(variableName)
+    if (existingImmutableState.isEmpty) {
+      addMutableState(javaType, variableName, initFunc, useFreshName = false, forceInline = true)
+      immutableStates(variableName) = (javaType, initFunc(variableName))
+    } else {
+      val (prevJavaType, prevInitCode) = existingImmutableState.get
+      assert(prevJavaType == javaType, s"$variableName has already been defined with type " +
+        s"$prevJavaType and now it is tried to define again with type $javaType.")
+      assert(prevInitCode == initFunc(variableName), s"$variableName has already been defined " +
+        s"with different initialization statements.")
+    }
   }
 
   /**
@@ -190,8 +311,7 @@ class CodegenContext {
    * data types like: UTF8String, ArrayData, MapData & InternalRow.
    */
   def addBufferedState(dataType: DataType, variableName: String, initCode: String): ExprCode = {
-    val value = freshName(variableName)
-    addMutableState(javaType(dataType), value, "")
+    val value = addMutableState(javaType(dataType), variableName)
     val code = dataType match {
       case StringType => s"$value = $initCode.clone();"
       case _: StructType | _: ArrayType | _: MapType => s"$value = $initCode.copy();"
@@ -203,15 +323,37 @@ class CodegenContext {
   def declareMutableStates(): String = {
     // It's possible that we add same mutable state twice, e.g. the `mergeExpressions` in
     // `TypedAggregateExpression`, we should call `distinct` here to remove the duplicated ones.
-    mutableStates.distinct.map { case (javaType, variableName, _) =>
+    val inlinedStates = inlinedMutableStates.distinct.map { case (javaType, variableName) =>
       s"private $javaType $variableName;"
-    }.mkString("\n")
+    }
+
+    val arrayStates = arrayCompactedMutableStates.flatMap { case (javaType, mutableStateArrays) =>
+      val numArrays = mutableStateArrays.arrayNames.size
+      mutableStateArrays.arrayNames.zipWithIndex.map { case (arrayName, index) =>
+        val length = if (index + 1 == numArrays) {
+          mutableStateArrays.getCurrentIndex
+        } else {
+          CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT
+        }
+        if (javaType.contains("[]")) {
+          // initializer had an one-dimensional array variable
+          val baseType = javaType.substring(0, javaType.length - 2)
+          s"private $javaType[] $arrayName = new $baseType[$length][];"
+        } else {
+          // initializer had a scalar variable
+          s"private $javaType[] $arrayName = new $javaType[$length];"
+        }
+      }
+    }
+
+    (inlinedStates ++ arrayStates).mkString("\n")
   }
 
   def initMutableStates(): String = {
     // It's possible that we add same mutable state twice, e.g. the `mergeExpressions` in
     // `TypedAggregateExpression`, we should call `distinct` here to remove the duplicated ones.
-    val initCodes = mutableStates.distinct.map(_._3 + "\n")
+    val initCodes = mutableStateInitCode.distinct.map(_ + "\n")
+
     // The generated initialization code may exceed 64kb function size limit in JVM if there are too
     // many mutable states, so split it into multiple functions.
     splitExpressions(expressions = initCodes, funcName = "init", arguments = Nil)
@@ -781,23 +923,45 @@ class CodegenContext {
    * beyond 1000kb, we declare a private, inner sub-class, and the function is inlined to it
    * instead, because classes have a constant pool limit of 65,536 named values.
    *
-   * Note that we will extract the current inputs of this context and pass them to the generated
-   * functions. The input is `INPUT_ROW` for normal codegen path, and `currentVars` for whole
-   * stage codegen path. Whole stage codegen path is not supported yet.
+   * Note that different from `splitExpressions`, we will extract the current inputs of this
+   * context and pass them to the generated functions. The input is `INPUT_ROW` for normal codegen
+   * path, and `currentVars` for whole stage codegen path. Whole stage codegen path is not
+   * supported yet.
    *
    * @param expressions the codes to evaluate expressions.
+   * @param funcName the split function name base.
+   * @param extraArguments the list of (type, name) of the arguments of the split function,
+   *                       except for the current inputs like `ctx.INPUT_ROW`.
+   * @param returnType the return type of the split function.
+   * @param makeSplitFunction makes split function body, e.g. add preparation or cleanup.
+   * @param foldFunctions folds the split function calls.
    */
-  def splitExpressions(expressions: Seq[String]): String = {
+  def splitExpressionsWithCurrentInputs(
+      expressions: Seq[String],
+      funcName: String = "apply",
+      extraArguments: Seq[(String, String)] = Nil,
+      returnType: String = "void",
+      makeSplitFunction: String => String = identity,
+      foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
     // TODO: support whole stage codegen
     if (INPUT_ROW == null || currentVars != null) {
-      return expressions.mkString("\n")
+      expressions.mkString("\n")
+    } else {
+      splitExpressions(
+        expressions,
+        funcName,
+        ("InternalRow", INPUT_ROW) +: extraArguments,
+        returnType,
+        makeSplitFunction,
+        foldFunctions)
     }
-    splitExpressions(expressions, funcName = "apply", arguments = ("InternalRow", INPUT_ROW) :: Nil)
   }
 
   /**
    * Splits the generated code of expressions into multiple functions, because function has
-   * 64kb code size limit in JVM
+   * 64kb code size limit in JVM. If the class to which the function would be inlined would grow
+   * beyond 1000kb, we declare a private, inner sub-class, and the function is inlined to it
+   * instead, because classes have a constant pool limit of 65,536 named values.
    *
    * @param expressions the codes to evaluate expressions.
    * @param funcName the split function name base.
@@ -819,6 +983,15 @@ class CodegenContext {
       // inline execution if only one block
       blocks.head
     } else {
+      if (Utils.isTesting) {
+        // Passing global variables to the split method is dangerous, as any mutating to it is
+        // ignored and may lead to unexpected behavior.
+        arguments.foreach { case (_, name) =>
+          assert(!mutableStateNames.contains(name),
+            s"split function argument $name cannot be a global variable.")
+        }
+      }
+
       val func = freshName(funcName)
       val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ")
       val functions = blocks.zipWithIndex.map { case (body, i) =>
@@ -854,7 +1027,7 @@ class CodegenContext {
    *
    * @param expressions the codes to evaluate expressions.
    */
-  def buildCodeBlocks(expressions: Seq[String]): Seq[String] = {
+  private def buildCodeBlocks(expressions: Seq[String]): Seq[String] = {
     val blocks = new ArrayBuffer[String]()
     val blockBuilder = new StringBuilder()
     var length = 0
@@ -1003,9 +1176,9 @@ class CodegenContext {
     val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
     commonExprs.foreach { e =>
       val expr = e.head
-      val fnName = freshName("evalExpr")
-      val isNull = s"${fnName}IsNull"
-      val value = s"${fnName}Value"
+      val fnName = freshName("subExpr")
+      val isNull = addMutableState(JAVA_BOOLEAN, "subExprIsNull")
+      val value = addMutableState(javaType(expr.dataType), "subExprValue")
 
       // Generate the code for this expression tree and wrap it in a function.
       val eval = expr.genCode(this)
@@ -1031,9 +1204,6 @@ class CodegenContext {
       //   2. Less code.
       // Currently, we will do this for all non-leaf only expression trees (i.e. expr trees with
       // at least two nodes) as the cost of doing it is expected to be low.
-      addMutableState(JAVA_BOOLEAN, isNull, s"$isNull = false;")
-      addMutableState(javaType(expr.dataType), value,
-        s"$value = ${defaultValue(expr.dataType)};")
 
       subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);"
       val state = SubExprEliminationState(isNull, value)
@@ -1157,6 +1327,15 @@ object CodeGenerator extends Logging {
   // class.
   val GENERATED_CLASS_SIZE_THRESHOLD = 1000000
 
+  // This is the threshold for the number of global variables, whose types are primitive type or
+  // complex type (e.g. more than one-dimensional array), that will be placed at the outer class
+  val OUTER_CLASS_VARIABLES_THRESHOLD = 10000
+
+  // This is the maximum number of array elements to keep global variables in one Java array
+  // 32767 is the maximum integer value that does not require a constant pool entry in a Java
+  // bytecode instruction
+  val MUTABLESTATEARRAY_SIZE_LIMIT = 32768
+
   /**
    * Compile the Java source code into a Java class, using Janino.
    *
@@ -1218,12 +1397,12 @@ object CodeGenerator extends Logging {
       evaluator.cook("generated.java", code.body)
       updateAndGetCompilationStats(evaluator)
     } catch {
-      case e: JaninoRuntimeException =>
+      case e: InternalCompilerException =>
         val msg = s"failed to compile: $e"
         logError(msg, e)
         val maxLines = SQLConf.get.loggingMaxLinesForCodegen
         logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
-        throw new JaninoRuntimeException(msg, e)
+        throw new InternalCompilerException(msg, e)
       case e: CompileException =>
         val msg = s"failed to compile: $e"
         logError(msg, e)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 5fdbda51b4ad1..b53c0087e7e2d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -57,42 +57,38 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
       case _ => true
     }.unzip
     val exprVals = ctx.generateExpressions(validExpr, useSubexprElimination)
-    val projectionCodes = exprVals.zip(index).map {
+
+    // 4-tuples: (code for projection, isNull variable name, value variable name, column index)
+    val projectionCodes: Seq[(String, String, String, Int)] = exprVals.zip(index).map {
       case (ev, i) =>
         val e = expressions(i)
+        val value = ctx.addMutableState(ctx.javaType(e.dataType), "value")
         if (e.nullable) {
-          val isNull = s"isNull_$i"
-          val value = s"value_$i"
-          ctx.addMutableState(ctx.JAVA_BOOLEAN, isNull, s"$isNull = true;")
-          ctx.addMutableState(ctx.javaType(e.dataType), value,
-            s"$value = ${ctx.defaultValue(e.dataType)};")
-          s"""
-            ${ev.code}
-            $isNull = ${ev.isNull};
-            $value = ${ev.value};
-           """
+          val isNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, "isNull")
+          (s"""
+              |${ev.code}
+              |$isNull = ${ev.isNull};
+              |$value = ${ev.value};
+            """.stripMargin, isNull, value, i)
         } else {
-          val value = s"value_$i"
-          ctx.addMutableState(ctx.javaType(e.dataType), value,
-            s"$value = ${ctx.defaultValue(e.dataType)};")
-          s"""
-            ${ev.code}
-            $value = ${ev.value};
-           """
+          (s"""
+              |${ev.code}
+              |$value = ${ev.value};
+            """.stripMargin, ev.isNull, value, i)
         }
     }
 
     // Evaluate all the subexpressions.
     val evalSubexpr = ctx.subexprFunctions.mkString("\n")
 
-    val updates = validExpr.zip(index).map {
-      case (e, i) =>
-        val ev = ExprCode("", s"isNull_$i", s"value_$i")
+    val updates = validExpr.zip(projectionCodes).map {
+      case (e, (_, isNull, value, i)) =>
+        val ev = ExprCode("", isNull, value)
         ctx.updateColumn("mutableRow", e.dataType, i, ev, e.nullable)
     }
 
-    val allProjections = ctx.splitExpressions(projectionCodes)
-    val allUpdates = ctx.splitExpressions(updates)
+    val allProjections = ctx.splitExpressionsWithCurrentInputs(projectionCodes.map(_._1))
+    val allUpdates = ctx.splitExpressionsWithCurrentInputs(updates)
 
     val codeBody = s"""
       public java.lang.Object generate(Object[] references) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index 5d35cce1a91cb..3dcbb518ba42a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -49,8 +49,6 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val tmpInput = ctx.freshName("tmpInput")
     val output = ctx.freshName("safeRow")
     val values = ctx.freshName("values")
-    // These expressions could be split into multiple functions
-    ctx.addMutableState("Object[]", values, s"$values = null;")
 
     val rowClass = classOf[GenericInternalRow].getName
 
@@ -66,15 +64,15 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val allFields = ctx.splitExpressions(
       expressions = fieldWriters,
       funcName = "writeFields",
-      arguments = Seq("InternalRow" -> tmpInput)
+      arguments = Seq("InternalRow" -> tmpInput, "Object[]" -> values)
     )
-    val code = s"""
-      final InternalRow $tmpInput = $input;
-      $values = new Object[${schema.length}];
-      $allFields
-      final InternalRow $output = new $rowClass($values);
-      $values = null;
-    """
+    val code =
+      s"""
+         |final InternalRow $tmpInput = $input;
+         |final Object[] $values = new Object[${schema.length}];
+         |$allFields
+         |final InternalRow $output = new $rowClass($values);
+       """.stripMargin
 
     ExprCode(code, "false", output)
   }
@@ -159,7 +157,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
             }
           """
     }
-    val allExpressions = ctx.splitExpressions(expressionCodes)
+    val allExpressions = ctx.splitExpressionsWithCurrentInputs(expressionCodes)
 
     val codeBody = s"""
       public java.lang.Object generate(Object[] references) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index b022457865d50..36ffa8dcdd2b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -73,9 +73,8 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
       bufferHolder: String,
       isTopLevel: Boolean = false): String = {
     val rowWriterClass = classOf[UnsafeRowWriter].getName
-    val rowWriter = ctx.freshName("rowWriter")
-    ctx.addMutableState(rowWriterClass, rowWriter,
-      s"$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
+    val rowWriter = ctx.addMutableState(rowWriterClass, "rowWriter",
+      v => s"$v = new $rowWriterClass($bufferHolder, ${inputs.length});")
 
     val resetWriter = if (isTopLevel) {
       // For top level row writer, it always writes to the beginning of the global buffer holder,
@@ -186,9 +185,8 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     // Puts `input` in a local variable to avoid to re-evaluate it if it's a statement.
     val tmpInput = ctx.freshName("tmpInput")
     val arrayWriterClass = classOf[UnsafeArrayWriter].getName
-    val arrayWriter = ctx.freshName("arrayWriter")
-    ctx.addMutableState(arrayWriterClass, arrayWriter,
-      s"$arrayWriter = new $arrayWriterClass();")
+    val arrayWriter = ctx.addMutableState(arrayWriterClass, "arrayWriter",
+      v => s"$v = new $arrayWriterClass();")
     val numElements = ctx.freshName("numElements")
     val index = ctx.freshName("index")
 
@@ -318,13 +316,12 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
       case _ => true
     }
 
-    val result = ctx.freshName("result")
-    ctx.addMutableState("UnsafeRow", result, s"$result = new UnsafeRow(${expressions.length});")
+    val result = ctx.addMutableState("UnsafeRow", "result",
+      v => s"$v = new UnsafeRow(${expressions.length});")
 
-    val holder = ctx.freshName("holder")
     val holderClass = classOf[BufferHolder].getName
-    ctx.addMutableState(holderClass, holder,
-      s"$holder = new $holderClass($result, ${numVarLenFields * 32});")
+    val holder = ctx.addMutableState(holderClass, "holder",
+      v => s"$v = new $holderClass($result, ${numVarLenFields * 32});")
 
     val resetBufferHolder = if (numVarLenFields == 0) {
       ""
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
index be5f5a73b5d47..febf7b0c96c2a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
@@ -70,7 +70,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
 
     // --------------------- copy bitset from row 1 and row 2 --------------------------- //
     val copyBitset = Seq.tabulate(outputBitsetWords) { i =>
-      val bits = if (bitset1Remainder > 0) {
+      val bits = if (bitset1Remainder > 0 && bitset2Words != 0) {
         if (i < bitset1Words - 1) {
           s"$getLong(obj1, offset1 + ${i * 8})"
         } else if (i == bitset1Words - 1) {
@@ -152,7 +152,9 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
       } else {
         // Number of bytes to increase for the offset. Note that since in UnsafeRow we store the
         // offset in the upper 32 bit of the words, we can just shift the offset to the left by
-        // 32 and increment that amount in place.
+        // 32 and increment that amount in place. However, we need to handle the important special
+        // case of a null field, in which case the offset should be zero and should not have a
+        // shift added to it.
         val shift =
           if (i < schema1.size) {
             s"${(outputBitsetWords - bitset1Words + schema2.size) * 8}L"
@@ -160,14 +162,55 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
             s"(${(outputBitsetWords - bitset2Words + schema1.size) * 8}L + numBytesVariableRow1)"
           }
         val cursor = offset + outputBitsetWords * 8 + i * 8
-        s"$putLong(buf, $cursor, $getLong(buf, $cursor) + ($shift << 32));\n"
+        // UnsafeRow is a little underspecified, so in what follows we'll treat UnsafeRowWriter's
+        // output as a de-facto specification for the internal layout of data.
+        //
+        // Null-valued fields will always have a data offset of 0 because
+        // UnsafeRowWriter.setNullAt(ordinal) sets the null bit and stores 0 to in field's
+        // position in the fixed-length section of the row. As a result, we must NOT add
+        // `shift` to the offset for null fields.
+        //
+        // We could perform a null-check here by inspecting the null-tracking bitmap, but doing
+        // so could be expensive and will add significant bloat to the generated code. Instead,
+        // we'll rely on the invariant "stored offset == 0 for variable-length data type implies
+        // that the field's value is null."
+        //
+        // To establish that this invariant holds, we'll prove that a non-null field can never
+        // have a stored offset of 0. There are two cases to consider:
+        //
+        //   1. The non-null field's data is of non-zero length: reading this field's value
+        //      must read data from the variable-length section of the row, so the stored offset
+        //      will actually be used in address calculation and must be correct. The offsets
+        //      count bytes from the start of the UnsafeRow so these offsets will always be
+        //      non-zero because the storage of the offsets themselves takes up space at the
+        //      start of the row.
+        //   2. The non-null field's data is of zero length (i.e. its data is empty). In this
+        //      case, we have to worry about the possibility that an arbitrary offset value was
+        //      stored because we never actually read any bytes using this offset and therefore
+        //      would not crash if it was incorrect. The variable-sized data writing paths in
+        //      UnsafeRowWriter unconditionally calls setOffsetAndSize(ordinal, numBytes) with
+        //      no special handling for the case where `numBytes == 0`. Internally,
+        //      setOffsetAndSize computes the offset without taking the size into account. Thus
+        //      the stored offset is the same non-zero offset that would be used if the field's
+        //      dataSize was non-zero (and in (1) above we've shown that case behaves as we
+        //      expect).
+        //
+        // Thus it is safe to perform `existingOffset != 0` checks here in the place of
+        // more expensive null-bit checks.
+        s"""
+           |existingOffset = $getLong(buf, $cursor);
+           |if (existingOffset != 0) {
+           |    $putLong(buf, $cursor, existingOffset + ($shift << 32));
+           |}
+         """.stripMargin
       }
     }
 
     val updateOffsets = ctx.splitExpressions(
       expressions = updateOffset,
       funcName = "copyBitsetFunc",
-      arguments = ("long", "numBytesVariableRow1") :: Nil)
+      arguments = ("long", "numBytesVariableRow1") :: Nil,
+      makeSplitFunction = (s: String) => "long existingOffset;\n" + s)
 
     // ------------------------ Finally, put everything together  --------------------------- //
     val codeBody = s"""
@@ -200,6 +243,7 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U
        |    $copyFixedLengthRow2
        |    $copyVariableLengthRow1
        |    $copyVariableLengthRow2
+       |    long existingOffset;
        |    $updateOffsets
        |
        |    out.pointTo(buf, sizeInBytes);
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 57a7f2e207738..3dc2ee03a86e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -63,7 +63,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
     val (preprocess, assigns, postprocess, arrayData) =
       GenArrayData.genCodeToCreateArrayData(ctx, et, evals, false)
     ev.copy(
-      code = preprocess + ctx.splitExpressions(assigns) + postprocess,
+      code = preprocess + assigns + postprocess,
       value = arrayData,
       isNull = "false")
   }
@@ -77,24 +77,22 @@ private [sql] object GenArrayData {
    *
    * @param ctx a [[CodegenContext]]
    * @param elementType data type of underlying array elements
-   * @param elementsCode a set of [[ExprCode]] for each element of an underlying array
+   * @param elementsCode concatenated set of [[ExprCode]] for each element of an underlying array
    * @param isMapKey if true, throw an exception when the element is null
-   * @return (code pre-assignments, assignments to each array elements, code post-assignments,
-   *           arrayData name)
+   * @return (code pre-assignments, concatenated assignments to each array elements,
+   *           code post-assignments, arrayData name)
    */
   def genCodeToCreateArrayData(
       ctx: CodegenContext,
       elementType: DataType,
       elementsCode: Seq[ExprCode],
-      isMapKey: Boolean): (String, Seq[String], String, String) = {
-    val arrayName = ctx.freshName("array")
+      isMapKey: Boolean): (String, String, String, String) = {
     val arrayDataName = ctx.freshName("arrayData")
     val numElements = elementsCode.length
 
     if (!ctx.isPrimitiveType(elementType)) {
+      val arrayName = ctx.freshName("arrayObject")
       val genericArrayClass = classOf[GenericArrayData].getName
-      ctx.addMutableState("Object[]", arrayName,
-        s"$arrayName = new Object[$numElements];")
 
       val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
         val isNullAssignment = if (!isMapKey) {
@@ -110,17 +108,21 @@ private [sql] object GenArrayData {
          }
        """
       }
+      val assignmentString = ctx.splitExpressionsWithCurrentInputs(
+        expressions = assignments,
+        funcName = "apply",
+        extraArguments = ("Object[]", arrayDataName) :: Nil)
 
-      ("",
-       assignments,
+      (s"Object[] $arrayName = new Object[$numElements];",
+       assignmentString,
        s"final ArrayData $arrayDataName = new $genericArrayClass($arrayName);",
        arrayDataName)
     } else {
+      val arrayName = ctx.freshName("array")
       val unsafeArraySizeInBytes =
         UnsafeArrayData.calculateHeaderPortionInBytes(numElements) +
         ByteArrayMethods.roundNumberOfBytesToNearestWord(elementType.defaultSize * numElements)
       val baseOffset = Platform.BYTE_ARRAY_OFFSET
-      ctx.addMutableState("UnsafeArrayData", arrayDataName)
 
       val primitiveValueTypeName = ctx.primitiveTypeName(elementType)
       val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
@@ -137,14 +139,18 @@ private [sql] object GenArrayData {
          }
        """
       }
+      val assignmentString = ctx.splitExpressionsWithCurrentInputs(
+        expressions = assignments,
+        funcName = "apply",
+        extraArguments = ("UnsafeArrayData", arrayDataName) :: Nil)
 
       (s"""
         byte[] $arrayName = new byte[$unsafeArraySizeInBytes];
-        $arrayDataName = new UnsafeArrayData();
+        UnsafeArrayData $arrayDataName = new UnsafeArrayData();
         Platform.putLong($arrayName, $baseOffset, $numElements);
         $arrayDataName.pointTo($arrayName, $baseOffset, $unsafeArraySizeInBytes);
       """,
-       assignments,
+       assignmentString,
        "",
        arrayDataName)
     }
@@ -216,10 +222,10 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
       s"""
        final boolean ${ev.isNull} = false;
        $preprocessKeyData
-       ${ctx.splitExpressions(assignKeys)}
+       $assignKeys
        $postprocessKeyData
        $preprocessValueData
-       ${ctx.splitExpressions(assignValues)}
+       $assignValues
        $postprocessValueData
        final MapData ${ev.value} = new $mapClass($keyArrayData, $valueArrayData);
       """
@@ -350,22 +356,25 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rowClass = classOf[GenericInternalRow].getName
     val values = ctx.freshName("values")
-    ctx.addMutableState("Object[]", values, s"$values = null;")
-    val valuesCode = ctx.splitExpressions(
-      valExprs.zipWithIndex.map { case (e, i) =>
-        val eval = e.genCode(ctx)
-        s"""
-          ${eval.code}
-          if (${eval.isNull}) {
-            $values[$i] = null;
-          } else {
-            $values[$i] = ${eval.value};
-          }"""
-      })
+    val valCodes = valExprs.zipWithIndex.map { case (e, i) =>
+      val eval = e.genCode(ctx)
+      s"""
+         |${eval.code}
+         |if (${eval.isNull}) {
+         |  $values[$i] = null;
+         |} else {
+         |  $values[$i] = ${eval.value};
+         |}
+       """.stripMargin
+    }
+    val valuesCode = ctx.splitExpressionsWithCurrentInputs(
+      expressions = valCodes,
+      funcName = "createNamedStruct",
+      extraArguments = "Object[]" -> values :: Nil)
 
     ev.copy(code =
       s"""
-         |$values = new Object[${valExprs.size}];
+         |Object[] $values = new Object[${valExprs.size}];
          |$valuesCode
          |final InternalRow ${ev.value} = new $rowClass($values);
          |$values = null;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 43e643178c899..b444c3a7be92a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -40,7 +40,8 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
   override def checkInputDataTypes(): TypeCheckResult = {
     if (predicate.dataType != BooleanType) {
       TypeCheckResult.TypeCheckFailure(
-        s"type of predicate expression in If should be boolean, not ${predicate.dataType}")
+        "type of predicate expression in If should be boolean, " +
+          s"not ${predicate.dataType.simpleString}")
     } else if (!trueValue.dataType.sameType(falseValue.dataType)) {
       TypeCheckResult.TypeCheckFailure(s"differing types in '$sql' " +
         s"(${trueValue.dataType.simpleString} and ${falseValue.dataType.simpleString}).")
@@ -180,13 +181,17 @@ case class CaseWhen(
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    // This variable represents whether the first successful condition is met or not.
-    // It is initialized to `false` and it is set to `true` when the first condition which
-    // evaluates to `true` is met and therefore is not needed to go on anymore on the computation
-    // of the following conditions.
-    val conditionMet = ctx.freshName("caseWhenConditionMet")
-    ctx.addMutableState(ctx.JAVA_BOOLEAN, ev.isNull)
-    ctx.addMutableState(ctx.javaType(dataType), ev.value)
+    // This variable holds the state of the result:
+    // -1 means the condition is not met yet and the result is unknown.
+    val NOT_MATCHED = -1
+    // 0 means the condition is met and result is not null.
+    val HAS_NONNULL = 0
+    // 1 means the condition is met and result is null.
+    val HAS_NULL = 1
+    // It is initialized to `NOT_MATCHED`, and if it's set to `HAS_NULL` or `HAS_NONNULL`,
+    // We won't go on anymore on the computation.
+    val resultState = ctx.freshName("caseWhenResultState")
+    ev.value = ctx.addMutableState(ctx.javaType(dataType), ev.value)
 
     // these blocks are meant to be inside a
     // do {
@@ -200,9 +205,8 @@ case class CaseWhen(
          |${cond.code}
          |if (!${cond.isNull} && ${cond.value}) {
          |  ${res.code}
-         |  ${ev.isNull} = ${res.isNull};
+         |  $resultState = (byte)(${res.isNull} ? $HAS_NULL : $HAS_NONNULL);
          |  ${ev.value} = ${res.value};
-         |  $conditionMet = true;
          |  continue;
          |}
        """.stripMargin
@@ -212,65 +216,61 @@ case class CaseWhen(
       val res = elseExpr.genCode(ctx)
       s"""
          |${res.code}
-         |${ev.isNull} = ${res.isNull};
+         |$resultState = (byte)(${res.isNull} ? $HAS_NULL : $HAS_NONNULL);
          |${ev.value} = ${res.value};
        """.stripMargin
     }
 
     val allConditions = cases ++ elseCode
 
-    val code = if (ctx.INPUT_ROW == null || ctx.currentVars != null) {
-        allConditions.mkString("\n")
-      } else {
-        // This generates code like:
-        //   conditionMet = caseWhen_1(i);
-        //   if(conditionMet) {
-        //     continue;
-        //   }
-        //   conditionMet = caseWhen_2(i);
-        //   if(conditionMet) {
-        //     continue;
-        //   }
-        //   ...
-        // and the declared methods are:
-        //   private boolean caseWhen_1234() {
-        //     boolean conditionMet = false;
-        //     do {
-        //       // here the evaluation of the conditions
-        //     } while (false);
-        //     return conditionMet;
-        //   }
-        ctx.splitExpressions(allConditions, "caseWhen",
-          ("InternalRow", ctx.INPUT_ROW) :: Nil,
-          returnType = ctx.JAVA_BOOLEAN,
-          makeSplitFunction = {
-            func =>
-              s"""
-                ${ctx.JAVA_BOOLEAN} $conditionMet = false;
-                do {
-                  $func
-                } while (false);
-                return $conditionMet;
-              """
-          },
-          foldFunctions = { funcCalls =>
-            funcCalls.map { funcCall =>
-              s"""
-                $conditionMet = $funcCall;
-                if ($conditionMet) {
-                  continue;
-                }"""
-            }.mkString
-          })
-      }
+    // This generates code like:
+    //   caseWhenResultState = caseWhen_1(i);
+    //   if(caseWhenResultState != -1) {
+    //     continue;
+    //   }
+    //   caseWhenResultState = caseWhen_2(i);
+    //   if(caseWhenResultState != -1) {
+    //     continue;
+    //   }
+    //   ...
+    // and the declared methods are:
+    //   private byte caseWhen_1234() {
+    //     byte caseWhenResultState = -1;
+    //     do {
+    //       // here the evaluation of the conditions
+    //     } while (false);
+    //     return caseWhenResultState;
+    //   }
+    val codes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = allConditions,
+      funcName = "caseWhen",
+      returnType = ctx.JAVA_BYTE,
+      makeSplitFunction = func =>
+        s"""
+           |${ctx.JAVA_BYTE} $resultState = $NOT_MATCHED;
+           |do {
+           |  $func
+           |} while (false);
+           |return $resultState;
+         """.stripMargin,
+      foldFunctions = _.map { funcCall =>
+        s"""
+           |$resultState = $funcCall;
+           |if ($resultState != $NOT_MATCHED) {
+           |  continue;
+           |}
+         """.stripMargin
+      }.mkString)
 
-    ev.copy(code = s"""
-      ${ev.isNull} = true;
-      ${ev.value} = ${ctx.defaultValue(dataType)};
-      ${ctx.JAVA_BOOLEAN} $conditionMet = false;
-      do {
-        $code
-      } while (false);""")
+    ev.copy(code =
+      s"""
+         |${ctx.JAVA_BYTE} $resultState = $NOT_MATCHED;
+         |do {
+         |  $codes
+         |} while (false);
+         |// TRUE if any condition is met and the result is null, or no any condition is met.
+         |final boolean ${ev.isNull} = ($resultState != $HAS_NONNULL);
+       """.stripMargin)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index eaf8788888211..424871f2047e9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -23,6 +23,8 @@ import java.util.{Calendar, TimeZone}
 
 import scala.util.control.NonFatal
 
+import org.apache.commons.lang3.StringEscapeUtils
+
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -226,7 +228,7 @@ case class Hour(child: Expression, timeZoneId: Option[String] = None)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val tz = ctx.addReferenceMinorObj(timeZone)
+    val tz = ctx.addReferenceObj("timeZone", timeZone)
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     defineCodeGen(ctx, ev, c => s"$dtu.getHours($c, $tz)")
   }
@@ -257,7 +259,7 @@ case class Minute(child: Expression, timeZoneId: Option[String] = None)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val tz = ctx.addReferenceMinorObj(timeZone)
+    val tz = ctx.addReferenceObj("timeZone", timeZone)
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     defineCodeGen(ctx, ev, c => s"$dtu.getMinutes($c, $tz)")
   }
@@ -288,7 +290,7 @@ case class Second(child: Expression, timeZoneId: Option[String] = None)
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val tz = ctx.addReferenceMinorObj(timeZone)
+    val tz = ctx.addReferenceObj("timeZone", timeZone)
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     defineCodeGen(ctx, ev, c => s"$dtu.getSeconds($c, $tz)")
   }
@@ -442,9 +444,10 @@ case class DayOfWeek(child: Expression) extends UnaryExpression with ImplicitCas
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     nullSafeCodeGen(ctx, ev, time => {
       val cal = classOf[Calendar].getName
-      val c = ctx.freshName("cal")
       val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-      ctx.addMutableState(cal, c, s"""$c = $cal.getInstance($dtu.getTimeZone("UTC"));""")
+      val c = "calDayOfWeek"
+      ctx.addImmutableStateIfNotExists(cal, c,
+        v => s"""$v = $cal.getInstance($dtu.getTimeZone("UTC"));""")
       s"""
         $c.setTimeInMillis($time * 1000L * 3600L * 24L);
         ${ev.value} = $c.get($cal.DAY_OF_WEEK);
@@ -484,18 +487,18 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     nullSafeCodeGen(ctx, ev, time => {
       val cal = classOf[Calendar].getName
-      val c = ctx.freshName("cal")
+      val c = "calWeekOfYear"
       val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-      ctx.addMutableState(cal, c,
+      ctx.addImmutableStateIfNotExists(cal, c, v =>
         s"""
-          $c = $cal.getInstance($dtu.getTimeZone("UTC"));
-          $c.setFirstDayOfWeek($cal.MONDAY);
-          $c.setMinimalDaysInFirstWeek(4);
-         """)
+           |$v = $cal.getInstance($dtu.getTimeZone("UTC"));
+           |$v.setFirstDayOfWeek($cal.MONDAY);
+           |$v.setMinimalDaysInFirstWeek(4);
+         """.stripMargin)
       s"""
-        $c.setTimeInMillis($time * 1000L * 3600L * 24L);
-        ${ev.value} = $c.get($cal.WEEK_OF_YEAR);
-      """
+         |$c.setTimeInMillis($time * 1000L * 3600L * 24L);
+         |${ev.value} = $c.get($cal.WEEK_OF_YEAR);
+       """.stripMargin
     })
   }
 }
@@ -529,7 +532,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-    val tz = ctx.addReferenceMinorObj(timeZone)
+    val tz = ctx.addReferenceObj("timeZone", timeZone)
     defineCodeGen(ctx, ev, (timestamp, format) => {
       s"""UTF8String.fromString($dtu.newDateFormat($format.toString(), $tz)
           .format(new java.util.Date($timestamp / 1000)))"""
@@ -691,7 +694,7 @@ abstract class UnixTime
             }""")
         }
       case StringType =>
-        val tz = ctx.addReferenceMinorObj(timeZone)
+        val tz = ctx.addReferenceObj("timeZone", timeZone)
         val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
         nullSafeCodeGen(ctx, ev, (string, format) => {
           s"""
@@ -715,7 +718,7 @@ abstract class UnixTime
             ${ev.value} = ${eval1.value} / 1000000L;
           }""")
       case DateType =>
-        val tz = ctx.addReferenceMinorObj(timeZone)
+        val tz = ctx.addReferenceObj("timeZone", timeZone)
         val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
         val eval1 = left.genCode(ctx)
         ev.copy(code = s"""
@@ -827,7 +830,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
           }""")
       }
     } else {
-      val tz = ctx.addReferenceMinorObj(timeZone)
+      val tz = ctx.addReferenceObj("timeZone", timeZone)
       val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
       nullSafeCodeGen(ctx, ev, (seconds, f) => {
         s"""
@@ -969,7 +972,7 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val tz = ctx.addReferenceMinorObj(timeZone)
+    val tz = ctx.addReferenceObj("timeZone", timeZone)
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     defineCodeGen(ctx, ev, (sd, i) => {
       s"""$dtu.timestampAddInterval($sd, $i.months, $i.microseconds, $tz)"""
@@ -1007,19 +1010,21 @@ case class FromUTCTimestamp(left: Expression, right: Expression)
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     if (right.foldable) {
-      val tz = right.eval()
+      val tz = right.eval().asInstanceOf[UTF8String]
       if (tz == null) {
         ev.copy(code = s"""
            |boolean ${ev.isNull} = true;
            |long ${ev.value} = 0;
          """.stripMargin)
       } else {
-        val tzTerm = ctx.freshName("tz")
-        val utcTerm = ctx.freshName("utc")
         val tzClass = classOf[TimeZone].getName
         val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-        ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $dtu.getTimeZone("$tz");""")
-        ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $dtu.getTimeZone("UTC");""")
+        val escapedTz = StringEscapeUtils.escapeJava(tz.toString)
+        val tzTerm = ctx.addMutableState(tzClass, "tz",
+          v => s"""$v = $dtu.getTimeZone("$escapedTz");""")
+        val utcTerm = "tzUTC"
+        ctx.addImmutableStateIfNotExists(tzClass, utcTerm,
+          v => s"""$v = $dtu.getTimeZone("UTC");""")
         val eval = left.genCode(ctx)
         ev.copy(code = s"""
            |${eval.code}
@@ -1065,7 +1070,7 @@ case class TimeSub(start: Expression, interval: Expression, timeZoneId: Option[S
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val tz = ctx.addReferenceMinorObj(timeZone)
+    val tz = ctx.addReferenceObj("timeZone", timeZone)
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     defineCodeGen(ctx, ev, (sd, i) => {
       s"""$dtu.timestampAddInterval($sd, 0 - $i.months, 0 - $i.microseconds, $tz)"""
@@ -1143,7 +1148,7 @@ case class MonthsBetween(date1: Expression, date2: Expression, timeZoneId: Optio
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val tz = ctx.addReferenceMinorObj(timeZone)
+    val tz = ctx.addReferenceObj("timeZone", timeZone)
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     defineCodeGen(ctx, ev, (l, r) => {
       s"""$dtu.monthsBetween($l, $r, $tz)"""
@@ -1183,19 +1188,21 @@ case class ToUTCTimestamp(left: Expression, right: Expression)
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     if (right.foldable) {
-      val tz = right.eval()
+      val tz = right.eval().asInstanceOf[UTF8String]
       if (tz == null) {
         ev.copy(code = s"""
            |boolean ${ev.isNull} = true;
            |long ${ev.value} = 0;
          """.stripMargin)
       } else {
-        val tzTerm = ctx.freshName("tz")
-        val utcTerm = ctx.freshName("utc")
         val tzClass = classOf[TimeZone].getName
         val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
-        ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $dtu.getTimeZone("$tz");""")
-        ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $dtu.getTimeZone("UTC");""")
+        val escapedTz = StringEscapeUtils.escapeJava(tz.toString)
+        val tzTerm = ctx.addMutableState(tzClass, "tz",
+          v => s"""$v = $dtu.getTimeZone("$escapedTz");""")
+        val utcTerm = "tzUTC"
+        ctx.addImmutableStateIfNotExists(tzClass, utcTerm,
+          v => s"""$v = $dtu.getTimeZone("UTC");""")
         val eval = left.genCode(ctx)
         ev.copy(code = s"""
            |${eval.code}
@@ -1295,80 +1302,79 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child:
   override def dataType: DataType = TimestampType
 }
 
-/**
- * Returns date truncated to the unit specified by the format.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_('2009-02-12', 'MM');
-       2009-02-01
-      > SELECT _FUNC_('2015-10-27', 'YEAR');
-       2015-01-01
-  """,
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class TruncDate(date: Expression, format: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
-  override def left: Expression = date
-  override def right: Expression = format
-
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType)
-  override def dataType: DataType = DateType
+trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes {
+  val instant: Expression
+  val format: Expression
   override def nullable: Boolean = true
-  override def prettyName: String = "trunc"
 
   private lazy val truncLevel: Int =
     DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
 
-  override def eval(input: InternalRow): Any = {
+  /**
+   * @param input internalRow (time)
+   * @param maxLevel Maximum level that can be used for truncation (e.g MONTH for Date input)
+   * @param truncFunc function: (time, level) => time
+   */
+  protected def evalHelper(input: InternalRow, maxLevel: Int)(
+    truncFunc: (Any, Int) => Any): Any = {
     val level = if (format.foldable) {
       truncLevel
     } else {
       DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String])
     }
-    if (level == -1) {
-      // unknown format
+    if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) {
+      // unknown format or too large level
       null
     } else {
-      val d = date.eval(input)
-      if (d == null) {
+      val t = instant.eval(input)
+      if (t == null) {
         null
       } else {
-        DateTimeUtils.truncDate(d.asInstanceOf[Int], level)
+        truncFunc(t, level)
       }
     }
   }
 
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+  protected def codeGenHelper(
+      ctx: CodegenContext,
+      ev: ExprCode,
+      maxLevel: Int,
+      orderReversed: Boolean = false)(
+      truncFunc: (String, String) => String)
+    : ExprCode = {
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
 
     if (format.foldable) {
-      if (truncLevel == -1) {
+      if (truncLevel == DateTimeUtils.TRUNC_INVALID || truncLevel > maxLevel) {
         ev.copy(code = s"""
           boolean ${ev.isNull} = true;
           ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""")
       } else {
-        val d = date.genCode(ctx)
+        val t = instant.genCode(ctx)
+        val truncFuncStr = truncFunc(t.value, truncLevel.toString)
         ev.copy(code = s"""
-          ${d.code}
-          boolean ${ev.isNull} = ${d.isNull};
+          ${t.code}
+          boolean ${ev.isNull} = ${t.isNull};
           ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
           if (!${ev.isNull}) {
-            ${ev.value} = $dtu.truncDate(${d.value}, $truncLevel);
+            ${ev.value} = $dtu.$truncFuncStr;
           }""")
       }
     } else {
-      nullSafeCodeGen(ctx, ev, (dateVal, fmt) => {
+      nullSafeCodeGen(ctx, ev, (left, right) => {
         val form = ctx.freshName("form")
+        val (dateVal, fmt) = if (orderReversed) {
+          (right, left)
+        } else {
+          (left, right)
+        }
+        val truncFuncStr = truncFunc(dateVal, form)
         s"""
           int $form = $dtu.parseTruncLevel($fmt);
-          if ($form == -1) {
+          if ($form == -1 || $form > $maxLevel) {
             ${ev.isNull} = true;
           } else {
-            ${ev.value} = $dtu.truncDate($dateVal, $form);
+            ${ev.value} = $dtu.$truncFuncStr
           }
         """
       })
@@ -1376,6 +1382,101 @@ case class TruncDate(date: Expression, format: Expression)
   }
 }
 
+/**
+ * Returns date truncated to the unit specified by the format.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.
+    `fmt` should be one of ["year", "yyyy", "yy", "mon", "month", "mm"]
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('2009-02-12', 'MM');
+       2009-02-01
+      > SELECT _FUNC_('2015-10-27', 'YEAR');
+       2015-01-01
+  """,
+  since = "1.5.0")
+// scalastyle:on line.size.limit
+case class TruncDate(date: Expression, format: Expression)
+  extends TruncInstant {
+  override def left: Expression = date
+  override def right: Expression = format
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType)
+  override def dataType: DataType = DateType
+  override def prettyName: String = "trunc"
+  override val instant = date
+
+  override def eval(input: InternalRow): Any = {
+    evalHelper(input, maxLevel = DateTimeUtils.TRUNC_TO_MONTH) { (d: Any, level: Int) =>
+      DateTimeUtils.truncDate(d.asInstanceOf[Int], level)
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    codeGenHelper(ctx, ev, maxLevel = DateTimeUtils.TRUNC_TO_MONTH) { (date: String, fmt: String) =>
+      s"truncDate($date, $fmt);"
+    }
+  }
+}
+
+/**
+ * Returns timestamp truncated to the unit specified by the format.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(fmt, ts) - Returns timestamp `ts` truncated to the unit specified by the format model `fmt`.
+    `fmt` should be one of ["YEAR", "YYYY", "YY", "MON", "MONTH", "MM", "DAY", "DD", "HOUR", "MINUTE", "SECOND", "WEEK", "QUARTER"]
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('2015-03-05T09:32:05.359', 'YEAR');
+       2015-01-01T00:00:00
+      > SELECT _FUNC_('2015-03-05T09:32:05.359', 'MM');
+       2015-03-01T00:00:00
+      > SELECT _FUNC_('2015-03-05T09:32:05.359', 'DD');
+       2015-03-05T00:00:00
+      > SELECT _FUNC_('2015-03-05T09:32:05.359', 'HOUR');
+       2015-03-05T09:00:00
+  """,
+  since = "2.3.0")
+// scalastyle:on line.size.limit
+case class TruncTimestamp(
+    format: Expression,
+    timestamp: Expression,
+    timeZoneId: Option[String] = None)
+  extends TruncInstant with TimeZoneAwareExpression {
+  override def left: Expression = format
+  override def right: Expression = timestamp
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType)
+  override def dataType: TimestampType = TimestampType
+  override def prettyName: String = "date_trunc"
+  override val instant = timestamp
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+
+  def this(format: Expression, timestamp: Expression) = this(format, timestamp, None)
+
+  override def eval(input: InternalRow): Any = {
+    evalHelper(input, maxLevel = DateTimeUtils.TRUNC_TO_SECOND) { (t: Any, level: Int) =>
+      DateTimeUtils.truncTimestamp(t.asInstanceOf[Long], level, timeZone)
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val tz = ctx.addReferenceObj("timeZone", timeZone)
+    codeGenHelper(ctx, ev, maxLevel = DateTimeUtils.TRUNC_TO_SECOND, true) {
+      (date: String, fmt: String) =>
+        s"truncTimestamp($date, $fmt, $tz);"
+    }
+  }
+}
+
 /**
  * Returns the number of days from startDate to endDate.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
index 752dea23e1f7a..db1579ba28671 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
@@ -70,10 +70,12 @@ case class MakeDecimal(child: Expression, precision: Int, scale: Int) extends Un
 case class PromotePrecision(child: Expression) extends UnaryExpression {
   override def dataType: DataType = child.dataType
   override def eval(input: InternalRow): Any = child.eval(input)
+  /** Just a simple pass-through for code generation. */
   override def genCode(ctx: CodegenContext): ExprCode = child.genCode(ctx)
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = ev.copy("")
   override def prettyName: String = "promote_precision"
   override def sql: String = child.sql
+  override lazy val canonicalized: Expression = child.canonicalized
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index f1aa130669266..4f4d49166e88c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -155,8 +155,8 @@ case class Stack(children: Seq[Expression]) extends Generator {
         val j = (i - 1) % numFields
         if (children(i).dataType != elementSchema.fields(j).dataType) {
           return TypeCheckResult.TypeCheckFailure(
-            s"Argument ${j + 1} (${elementSchema.fields(j).dataType}) != " +
-              s"Argument $i (${children(i).dataType})")
+            s"Argument ${j + 1} (${elementSchema.fields(j).dataType.simpleString}) != " +
+              s"Argument $i (${children(i).dataType.simpleString})")
         }
       }
       TypeCheckResult.TypeCheckSuccess
@@ -199,11 +199,11 @@ case class Stack(children: Seq[Expression]) extends Generator {
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     // Rows - we write these into an array.
-    val rowData = ctx.freshName("rows")
-    ctx.addMutableState("InternalRow[]", rowData, s"$rowData = new InternalRow[$numRows];")
+    val rowData = ctx.addMutableState("InternalRow[]", "rows",
+      v => s"$v = new InternalRow[$numRows];")
     val values = children.tail
     val dataTypes = values.take(numFields).map(_.dataType)
-    val code = ctx.splitExpressions(Seq.tabulate(numRows) { row =>
+    val code = ctx.splitExpressionsWithCurrentInputs(Seq.tabulate(numRows) { row =>
       val fields = Seq.tabulate(numFields) { col =>
         val index = row * numFields + col
         if (index < values.length) values(index) else Literal(null, dataTypes(col))
@@ -214,11 +214,11 @@ case class Stack(children: Seq[Expression]) extends Generator {
 
     // Create the collection.
     val wrapperClass = classOf[mutable.WrappedArray[_]].getName
-    ctx.addMutableState(
-      s"$wrapperClass<InternalRow>",
-      ev.value,
-      s"${ev.value} = $wrapperClass$$.MODULE$$.make($rowData);")
-    ev.copy(code = code, isNull = "false")
+    ev.copy(code =
+      s"""
+         |$code
+         |$wrapperClass<InternalRow> ${ev.value} = $wrapperClass$$.MODULE$$.make($rowData);
+       """.stripMargin, isNull = "false")
   }
 }
 
@@ -249,7 +249,8 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with
       TypeCheckResult.TypeCheckSuccess
     case _ =>
       TypeCheckResult.TypeCheckFailure(
-        s"input to function explode should be array or map type, not ${child.dataType}")
+        "input to function explode should be array or map type, " +
+          s"not ${child.dataType.simpleString}")
   }
 
   // hive-compatible default alias for explode function ("col" for array, "key", "value" for map)
@@ -378,7 +379,8 @@ case class Inline(child: Expression) extends UnaryExpression with CollectionGene
       TypeCheckResult.TypeCheckSuccess
     case _ =>
       TypeCheckResult.TypeCheckFailure(
-        s"input to function $prettyName should be array of struct type, not ${child.dataType}")
+        s"input to function $prettyName should be array of struct type, " +
+          s"not ${child.dataType.simpleString}")
   }
 
   override def elementSchema: StructType = child.dataType match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index c3289b8299933..055ebf6c0da54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -270,17 +270,32 @@ abstract class HashExpression[E] extends Expression {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     ev.isNull = "false"
-    val childrenHash = ctx.splitExpressions(children.map { child =>
+
+    val childrenHash = children.map { child =>
       val childGen = child.genCode(ctx)
       childGen.code + ctx.nullSafeExec(child.nullable, childGen.isNull) {
         computeHash(childGen.value, child.dataType, ev.value, ctx)
       }
-    })
+    }
 
-    ctx.addMutableState(ctx.javaType(dataType), ev.value)
-    ev.copy(code = s"""
-      ${ev.value} = $seed;
-      $childrenHash""")
+    val hashResultType = ctx.javaType(dataType)
+    val codes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = childrenHash,
+      funcName = "computeHash",
+      extraArguments = Seq(hashResultType -> ev.value),
+      returnType = hashResultType,
+      makeSplitFunction = body =>
+        s"""
+           |$body
+           |return ${ev.value};
+         """.stripMargin,
+      foldFunctions = _.map(funcCall => s"${ev.value} = $funcCall;").mkString("\n"))
+
+    ev.copy(code =
+      s"""
+         |$hashResultType ${ev.value} = $seed;
+         |$codes
+       """.stripMargin)
   }
 
   protected def nullSafeElementHash(
@@ -389,13 +404,21 @@ abstract class HashExpression[E] extends Expression {
       input: String,
       result: String,
       fields: Array[StructField]): String = {
-    val hashes = fields.zipWithIndex.map { case (field, index) =>
+    val fieldsHash = fields.zipWithIndex.map { case (field, index) =>
       nullSafeElementHash(input, index.toString, field.nullable, field.dataType, result, ctx)
     }
+    val hashResultType = ctx.javaType(dataType)
     ctx.splitExpressions(
-      expressions = hashes,
-      funcName = "getHash",
-      arguments = Seq("InternalRow" -> input))
+      expressions = fieldsHash,
+      funcName = "computeHashForStruct",
+      arguments = Seq("InternalRow" -> input, hashResultType -> result),
+      returnType = hashResultType,
+      makeSplitFunction = body =>
+        s"""
+           |$body
+           |return $result;
+         """.stripMargin,
+      foldFunctions = _.map(funcCall => s"$result = $funcCall;").mkString("\n"))
   }
 
   @tailrec
@@ -610,25 +633,41 @@ case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     ev.isNull = "false"
+
     val childHash = ctx.freshName("childHash")
-    val childrenHash = ctx.splitExpressions(children.map { child =>
+    val childrenHash = children.map { child =>
       val childGen = child.genCode(ctx)
       val codeToComputeHash = ctx.nullSafeExec(child.nullable, childGen.isNull) {
         computeHash(childGen.value, child.dataType, childHash, ctx)
       }
       s"""
          |${childGen.code}
+         |$childHash = 0;
          |$codeToComputeHash
          |${ev.value} = (31 * ${ev.value}) + $childHash;
-         |$childHash = 0;
        """.stripMargin
-    })
+    }
 
-    ctx.addMutableState(ctx.javaType(dataType), ev.value)
-    ctx.addMutableState(ctx.JAVA_INT, childHash, s"$childHash = 0;")
-    ev.copy(code = s"""
-      ${ev.value} = $seed;
-      $childrenHash""")
+    val codes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = childrenHash,
+      funcName = "computeHash",
+      extraArguments = Seq(ctx.JAVA_INT -> ev.value),
+      returnType = ctx.JAVA_INT,
+      makeSplitFunction = body =>
+        s"""
+           |${ctx.JAVA_INT} $childHash = 0;
+           |$body
+           |return ${ev.value};
+         """.stripMargin,
+      foldFunctions = _.map(funcCall => s"${ev.value} = $funcCall;").mkString("\n"))
+
+
+    ev.copy(code =
+      s"""
+         |${ctx.JAVA_INT} ${ev.value} = $seed;
+         |${ctx.JAVA_INT} $childHash = 0;
+         |$codes
+       """.stripMargin)
   }
 
   override def eval(input: InternalRow = null): Int = {
@@ -730,23 +769,29 @@ case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] {
       input: String,
       result: String,
       fields: Array[StructField]): String = {
-    val localResult = ctx.freshName("localResult")
     val childResult = ctx.freshName("childResult")
-    fields.zipWithIndex.map { case (field, index) =>
+    val fieldsHash = fields.zipWithIndex.map { case (field, index) =>
+      val computeFieldHash = nullSafeElementHash(
+        input, index.toString, field.nullable, field.dataType, childResult, ctx)
       s"""
-         $childResult = 0;
-         ${nullSafeElementHash(input, index.toString, field.nullable, field.dataType,
-           childResult, ctx)}
-         $localResult = (31 * $localResult) + $childResult;
-       """
-    }.mkString(
-      s"""
-         int $localResult = 0;
-         int $childResult = 0;
-       """,
-      "",
-      s"$result = (31 * $result) + $localResult;"
-    )
+         |$childResult = 0;
+         |$computeFieldHash
+         |$result = (31 * $result) + $childResult;
+       """.stripMargin
+    }
+
+    s"${ctx.JAVA_INT} $childResult = 0;\n" + ctx.splitExpressions(
+      expressions = fieldsHash,
+      funcName = "computeHashForStruct",
+      arguments = Seq("InternalRow" -> input, ctx.JAVA_INT -> result),
+      returnType = ctx.JAVA_INT,
+      makeSplitFunction = body =>
+        s"""
+           |${ctx.JAVA_INT} $childResult = 0;
+           |$body
+           |return $result;
+           """.stripMargin,
+      foldFunctions = _.map(funcCall => s"$result = $funcCall;").mkString("\n"))
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index eaeaf08c37b4e..383203a209833 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -290,7 +290,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
         case FloatType =>
           val v = value.asInstanceOf[Float]
           if (v.isNaN || v.isInfinite) {
-            val boxedValue = ctx.addReferenceMinorObj(v)
+            val boxedValue = ctx.addReferenceObj("boxedValue", v)
             val code = s"final $javaType ${ev.value} = ($javaType) $boxedValue;"
             ev.copy(code = code)
           } else {
@@ -299,7 +299,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
         case DoubleType =>
           val v = value.asInstanceOf[Double]
           if (v.isNaN || v.isInfinite) {
-            val boxedValue = ctx.addReferenceMinorObj(v)
+            val boxedValue = ctx.addReferenceObj("boxedValue", v)
             val code = s"final $javaType ${ev.value} = ($javaType) $boxedValue;"
             ev.copy(code = code)
           } else {
@@ -309,8 +309,9 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
           ev.copy(code = "", value = s"($javaType)$value")
         case TimestampType | LongType =>
           ev.copy(code = "", value = s"${value}L")
-        case other =>
-          ev.copy(code = "", value = ctx.addReferenceMinorObj(value, ctx.javaType(dataType)))
+        case _ =>
+          ev.copy(code = "", value = ctx.addReferenceObj("literal", value,
+            ctx.javaType(dataType)))
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index b86e271fe2958..4b9006ab5b423 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -81,7 +81,7 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa
 
     // Use unnamed reference that doesn't create a local field here to reduce the number of fields
     // because errMsgField is used only when the value is null or false.
-    val errMsgField = ctx.addReferenceMinorObj(errMsg)
+    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
     ExprCode(code = s"""${eval.code}
        |if (${eval.isNull} || !${eval.value}) {
        |  throw new RuntimeException($errMsgField);
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 173e171910b69..470d5da041ea5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -72,26 +72,52 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    ctx.addMutableState(ctx.JAVA_BOOLEAN, ev.isNull)
-    ctx.addMutableState(ctx.javaType(dataType), ev.value)
+    ev.isNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, ev.isNull)
 
+    // all the evals are meant to be in a do { ... } while (false); loop
     val evals = children.map { e =>
       val eval = e.genCode(ctx)
       s"""
-        if (${ev.isNull}) {
-          ${eval.code}
-          if (!${eval.isNull}) {
-            ${ev.isNull} = false;
-            ${ev.value} = ${eval.value};
-          }
-        }
-      """
+         |${eval.code}
+         |if (!${eval.isNull}) {
+         |  ${ev.isNull} = false;
+         |  ${ev.value} = ${eval.value};
+         |  continue;
+         |}
+       """.stripMargin
     }
 
-    ev.copy(code = s"""
-      ${ev.isNull} = true;
-      ${ev.value} = ${ctx.defaultValue(dataType)};
-      ${ctx.splitExpressions(evals)}""")
+    val resultType = ctx.javaType(dataType)
+    val codes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = evals,
+      funcName = "coalesce",
+      returnType = resultType,
+      makeSplitFunction = func =>
+        s"""
+           |$resultType ${ev.value} = ${ctx.defaultValue(dataType)};
+           |do {
+           |  $func
+           |} while (false);
+           |return ${ev.value};
+         """.stripMargin,
+      foldFunctions = _.map { funcCall =>
+        s"""
+           |${ev.value} = $funcCall;
+           |if (!${ev.isNull}) {
+           |  continue;
+           |}
+         """.stripMargin
+      }.mkString)
+
+
+    ev.copy(code =
+      s"""
+         |${ev.isNull} = true;
+         |$resultType ${ev.value} = ${ctx.defaultValue(dataType)};
+         |do {
+         |  $codes
+         |} while (false);
+       """.stripMargin)
   }
 }
 
@@ -358,53 +384,63 @@ case class AtLeastNNonNulls(n: Int, children: Seq[Expression]) extends Predicate
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val nonnull = ctx.freshName("nonnull")
+    // all evals are meant to be inside a do { ... } while (false); loop
     val evals = children.map { e =>
       val eval = e.genCode(ctx)
       e.dataType match {
         case DoubleType | FloatType =>
           s"""
-            if ($nonnull < $n) {
-              ${eval.code}
-              if (!${eval.isNull} && !Double.isNaN(${eval.value})) {
-                $nonnull += 1;
-              }
-            }
-          """
+             |if ($nonnull < $n) {
+             |  ${eval.code}
+             |  if (!${eval.isNull} && !Double.isNaN(${eval.value})) {
+             |    $nonnull += 1;
+             |  }
+             |} else {
+             |  continue;
+             |}
+           """.stripMargin
         case _ =>
           s"""
-            if ($nonnull < $n) {
-              ${eval.code}
-              if (!${eval.isNull}) {
-                $nonnull += 1;
-              }
-            }
-          """
+             |if ($nonnull < $n) {
+             |  ${eval.code}
+             |  if (!${eval.isNull}) {
+             |    $nonnull += 1;
+             |  }
+             |} else {
+             |  continue;
+             |}
+           """.stripMargin
       }
     }
 
-    val code = if (ctx.INPUT_ROW == null || ctx.currentVars != null) {
-      evals.mkString("\n")
-    } else {
-      ctx.splitExpressions(
-        expressions = evals,
-        funcName = "atLeastNNonNulls",
-        arguments = ("InternalRow", ctx.INPUT_ROW) :: ("int", nonnull) :: Nil,
-        returnType = "int",
-        makeSplitFunction = { body =>
-          s"""
-            $body
-            return $nonnull;
-          """
-        },
-        foldFunctions = { funcCalls =>
-          funcCalls.map(funcCall => s"$nonnull = $funcCall;").mkString("\n")
-        }
-      )
-    }
-
-    ev.copy(code = s"""
-      int $nonnull = 0;
-      $code
-      boolean ${ev.value} = $nonnull >= $n;""", isNull = "false")
+    val codes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = evals,
+      funcName = "atLeastNNonNulls",
+      extraArguments = (ctx.JAVA_INT, nonnull) :: Nil,
+      returnType = ctx.JAVA_INT,
+      makeSplitFunction = body =>
+        s"""
+           |do {
+           |  $body
+           |} while (false);
+           |return $nonnull;
+         """.stripMargin,
+      foldFunctions = _.map { funcCall =>
+        s"""
+           |$nonnull = $funcCall;
+           |if ($nonnull >= $n) {
+           |  continue;
+           |}
+         """.stripMargin
+      }.mkString)
+
+    ev.copy(code =
+      s"""
+         |${ctx.JAVA_INT} $nonnull = 0;
+         |do {
+         |  $codes
+         |} while (false);
+         |${ctx.JAVA_BOOLEAN} ${ev.value} = $nonnull >= $n;
+       """.stripMargin, isNull = "false")
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index e2bc79d98b33d..64da9bb9cdec1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -51,7 +51,7 @@ trait InvokeLike extends Expression with NonSQLExpression {
    *
    * - generate codes for argument.
    * - use ctx.splitExpressions() to not exceed 64kb JVM limit while preparing arguments.
-   * - avoid some of nullabilty checking which are not needed because the expression is not
+   * - avoid some of nullability checking which are not needed because the expression is not
    *   nullable.
    * - when needNullCheck == true, short circuit if we found one of arguments is null because
    *   preparing rest of arguments can be skipped in the case.
@@ -62,15 +62,13 @@ trait InvokeLike extends Expression with NonSQLExpression {
   def prepareArguments(ctx: CodegenContext): (String, String, String) = {
 
     val resultIsNull = if (needNullCheck) {
-      val resultIsNull = ctx.freshName("resultIsNull")
-      ctx.addMutableState(ctx.JAVA_BOOLEAN, resultIsNull)
+      val resultIsNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, "resultIsNull")
       resultIsNull
     } else {
       "false"
     }
     val argValues = arguments.map { e =>
-      val argValue = ctx.freshName("argValue")
-      ctx.addMutableState(ctx.javaType(e.dataType), argValue)
+      val argValue = ctx.addMutableState(ctx.javaType(e.dataType), "argValue")
       argValue
     }
 
@@ -101,7 +99,7 @@ trait InvokeLike extends Expression with NonSQLExpression {
         """
       }
     }
-    val argCode = ctx.splitExpressions(argCodes)
+    val argCode = ctx.splitExpressionsWithCurrentInputs(argCodes)
 
     (argCode, argValues.mkString(", "), resultIsNull)
   }
@@ -195,7 +193,8 @@ case class StaticInvoke(
  * @param targetObject An expression that will return the object to call the method on.
  * @param functionName The name of the method to call.
  * @param dataType The expected return type of the function.
- * @param arguments An optional list of expressions, whos evaluation will be passed to the function.
+ * @param arguments An optional list of expressions, whose evaluation will be passed to the
+  *                 function.
  * @param propagateNull When true, and any of the arguments is null, null will be returned instead
  *                      of calling the function.
  * @param returnNullable When false, indicating the invoked method will always return
@@ -548,7 +547,7 @@ case class MapObjects private(
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val elementJavaType = ctx.javaType(loopVarDataType)
-    ctx.addMutableState(elementJavaType, loopValue)
+    ctx.addMutableState(elementJavaType, loopValue, forceInline = true, useFreshName = false)
     val genInputData = inputData.genCode(ctx)
     val genFunction = lambdaFunction.genCode(ctx)
     val dataLength = ctx.freshName("dataLength")
@@ -644,7 +643,7 @@ case class MapObjects private(
     }
 
     val loopNullCheck = if (loopIsNull != "false") {
-      ctx.addMutableState(ctx.JAVA_BOOLEAN, loopIsNull)
+      ctx.addMutableState(ctx.JAVA_BOOLEAN, loopIsNull, forceInline = true, useFreshName = false)
       inputDataType match {
         case _: ArrayType => s"$loopIsNull = ${genInputData.value}.isNullAt($loopIndex);"
         case _ => s"$loopIsNull = $loopValue == null;"
@@ -808,10 +807,11 @@ case class CatalystToExternalMap private(
 
     val mapType = inputDataType(inputData.dataType).asInstanceOf[MapType]
     val keyElementJavaType = ctx.javaType(mapType.keyType)
-    ctx.addMutableState(keyElementJavaType, keyLoopValue)
+    ctx.addMutableState(keyElementJavaType, keyLoopValue, forceInline = true, useFreshName = false)
     val genKeyFunction = keyLambdaFunction.genCode(ctx)
     val valueElementJavaType = ctx.javaType(mapType.valueType)
-    ctx.addMutableState(valueElementJavaType, valueLoopValue)
+    ctx.addMutableState(valueElementJavaType, valueLoopValue, forceInline = true,
+      useFreshName = false)
     val genValueFunction = valueLambdaFunction.genCode(ctx)
     val genInputData = inputData.genCode(ctx)
     val dataLength = ctx.freshName("dataLength")
@@ -844,7 +844,8 @@ case class CatalystToExternalMap private(
     val genValueFunctionValue = genFunctionValue(valueLambdaFunction, genValueFunction)
 
     val valueLoopNullCheck = if (valueLoopIsNull != "false") {
-      ctx.addMutableState(ctx.JAVA_BOOLEAN, valueLoopIsNull)
+      ctx.addMutableState(ctx.JAVA_BOOLEAN, valueLoopIsNull, forceInline = true,
+        useFreshName = false)
       s"$valueLoopIsNull = $valueArray.isNullAt($loopIndex);"
     } else {
       ""
@@ -994,8 +995,8 @@ case class ExternalMapToCatalyst private(
 
     val keyElementJavaType = ctx.javaType(keyType)
     val valueElementJavaType = ctx.javaType(valueType)
-    ctx.addMutableState(keyElementJavaType, key)
-    ctx.addMutableState(valueElementJavaType, value)
+    ctx.addMutableState(keyElementJavaType, key, forceInline = true, useFreshName = false)
+    ctx.addMutableState(valueElementJavaType, value, forceInline = true, useFreshName = false)
 
     val (defineEntries, defineKeyValue) = child.dataType match {
       case ObjectType(cls) if classOf[java.util.Map[_, _]].isAssignableFrom(cls) =>
@@ -1031,14 +1032,14 @@ case class ExternalMapToCatalyst private(
     }
 
     val keyNullCheck = if (keyIsNull != "false") {
-      ctx.addMutableState(ctx.JAVA_BOOLEAN, keyIsNull)
+      ctx.addMutableState(ctx.JAVA_BOOLEAN, keyIsNull, forceInline = true, useFreshName = false)
       s"$keyIsNull = $key == null;"
     } else {
       ""
     }
 
     val valueNullCheck = if (valueIsNull != "false") {
-      ctx.addMutableState(ctx.JAVA_BOOLEAN, valueIsNull)
+      ctx.addMutableState(ctx.JAVA_BOOLEAN, valueIsNull, forceInline = true, useFreshName = false)
       s"$valueIsNull = $value == null;"
     } else {
       ""
@@ -1106,27 +1107,31 @@ case class CreateExternalRow(children: Seq[Expression], schema: StructType)
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rowClass = classOf[GenericRowWithSchema].getName
     val values = ctx.freshName("values")
-    ctx.addMutableState("Object[]", values)
 
     val childrenCodes = children.zipWithIndex.map { case (e, i) =>
       val eval = e.genCode(ctx)
-      eval.code + s"""
-          if (${eval.isNull}) {
-            $values[$i] = null;
-          } else {
-            $values[$i] = ${eval.value};
-          }
-         """
+      s"""
+         |${eval.code}
+         |if (${eval.isNull}) {
+         |  $values[$i] = null;
+         |} else {
+         |  $values[$i] = ${eval.value};
+         |}
+       """.stripMargin
     }
 
-    val childrenCode = ctx.splitExpressions(childrenCodes)
+    val childrenCode = ctx.splitExpressionsWithCurrentInputs(
+      expressions = childrenCodes,
+      funcName = "createExternalRow",
+      extraArguments = "Object[]" -> values :: Nil)
     val schemaField = ctx.addReferenceObj("schema", schema)
 
-    val code = s"""
-      $values = new Object[${children.size}];
-      $childrenCode
-      final ${classOf[Row].getName} ${ev.value} = new $rowClass($values, $schemaField);
-      """
+    val code =
+      s"""
+         |Object[] $values = new Object[${children.size}];
+         |$childrenCode
+         |final ${classOf[Row].getName} ${ev.value} = new $rowClass($values, $schemaField);
+       """.stripMargin
     ev.copy(code = code, isNull = "false")
   }
 }
@@ -1144,25 +1149,28 @@ case class EncodeUsingSerializer(child: Expression, kryo: Boolean)
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     // Code to initialize the serializer.
-    val serializer = ctx.freshName("serializer")
-    val (serializerClass, serializerInstanceClass) = {
+    val (serializer, serializerClass, serializerInstanceClass) = {
       if (kryo) {
-        (classOf[KryoSerializer].getName, classOf[KryoSerializerInstance].getName)
+        ("kryoSerializer",
+          classOf[KryoSerializer].getName,
+          classOf[KryoSerializerInstance].getName)
       } else {
-        (classOf[JavaSerializer].getName, classOf[JavaSerializerInstance].getName)
+        ("javaSerializer",
+          classOf[JavaSerializer].getName,
+          classOf[JavaSerializerInstance].getName)
       }
     }
     // try conf from env, otherwise create a new one
     val env = s"${classOf[SparkEnv].getName}.get()"
     val sparkConf = s"new ${classOf[SparkConf].getName}()"
-    val serializerInit = s"""
-      if ($env == null) {
-        $serializer = ($serializerInstanceClass) new $serializerClass($sparkConf).newInstance();
-       } else {
-         $serializer = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance();
-       }
-     """
-    ctx.addMutableState(serializerInstanceClass, serializer, serializerInit)
+    ctx.addImmutableStateIfNotExists(serializerInstanceClass, serializer, v =>
+      s"""
+         |if ($env == null) {
+         |  $v = ($serializerInstanceClass) new $serializerClass($sparkConf).newInstance();
+         |} else {
+         |  $v = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance();
+         |}
+       """.stripMargin)
 
     // Code to serialize.
     val input = child.genCode(ctx)
@@ -1190,25 +1198,28 @@ case class DecodeUsingSerializer[T](child: Expression, tag: ClassTag[T], kryo: B
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     // Code to initialize the serializer.
-    val serializer = ctx.freshName("serializer")
-    val (serializerClass, serializerInstanceClass) = {
+    val (serializer, serializerClass, serializerInstanceClass) = {
       if (kryo) {
-        (classOf[KryoSerializer].getName, classOf[KryoSerializerInstance].getName)
+        ("kryoSerializer",
+          classOf[KryoSerializer].getName,
+          classOf[KryoSerializerInstance].getName)
       } else {
-        (classOf[JavaSerializer].getName, classOf[JavaSerializerInstance].getName)
+        ("javaSerializer",
+          classOf[JavaSerializer].getName,
+          classOf[JavaSerializerInstance].getName)
       }
     }
     // try conf from env, otherwise create a new one
     val env = s"${classOf[SparkEnv].getName}.get()"
     val sparkConf = s"new ${classOf[SparkConf].getName}()"
-    val serializerInit = s"""
-      if ($env == null) {
-        $serializer = ($serializerInstanceClass) new $serializerClass($sparkConf).newInstance();
-       } else {
-         $serializer = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance();
-       }
-     """
-    ctx.addMutableState(serializerInstanceClass, serializer, serializerInit)
+    ctx.addImmutableStateIfNotExists(serializerInstanceClass, serializer, v =>
+      s"""
+         |if ($env == null) {
+         |  $v = ($serializerInstanceClass) new $serializerClass($sparkConf).newInstance();
+         |} else {
+         |  $v = ($serializerInstanceClass) new $serializerClass($env.conf()).newInstance();
+         |}
+       """.stripMargin)
 
     // Code to deserialize.
     val input = child.genCode(ctx)
@@ -1244,25 +1255,28 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp
 
     val javaBeanInstance = ctx.freshName("javaBean")
     val beanInstanceJavaType = ctx.javaType(beanInstance.dataType)
-    ctx.addMutableState(beanInstanceJavaType, javaBeanInstance)
 
     val initialize = setters.map {
       case (setterMethod, fieldValue) =>
         val fieldGen = fieldValue.genCode(ctx)
         s"""
-           ${fieldGen.code}
-           ${javaBeanInstance}.$setterMethod(${fieldGen.value});
-         """
+           |${fieldGen.code}
+           |$javaBeanInstance.$setterMethod(${fieldGen.value});
+         """.stripMargin
     }
-    val initializeCode = ctx.splitExpressions(initialize.toSeq)
+    val initializeCode = ctx.splitExpressionsWithCurrentInputs(
+      expressions = initialize.toSeq,
+      funcName = "initializeJavaBean",
+      extraArguments = beanInstanceJavaType -> javaBeanInstance :: Nil)
 
-    val code = s"""
-      ${instanceGen.code}
-      ${javaBeanInstance} = ${instanceGen.value};
-      if (!${instanceGen.isNull}) {
-        $initializeCode
-      }
-     """
+    val code =
+      s"""
+         |${instanceGen.code}
+         |$beanInstanceJavaType $javaBeanInstance = ${instanceGen.value};
+         |if (!${instanceGen.isNull}) {
+         |  $initializeCode
+         |}
+       """.stripMargin
     ev.copy(code = code, isNull = instanceGen.isNull, value = instanceGen.value)
   }
 }
@@ -1303,7 +1317,7 @@ case class AssertNotNull(child: Expression, walkedTypePath: Seq[String] = Nil)
 
     // Use unnamed reference that doesn't create a local field here to reduce the number of fields
     // because errMsgField is used only when the value is null.
-    val errMsgField = ctx.addReferenceMinorObj(errMsg)
+    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
 
     val code = s"""
       ${childGen.code}
@@ -1340,7 +1354,7 @@ case class GetExternalRowField(
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     // Use unnamed reference that doesn't create a local field here to reduce the number of fields
     // because errMsgField is used only when the field is null.
-    val errMsgField = ctx.addReferenceMinorObj(errMsg)
+    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
     val row = child.genCode(ctx)
     val code = s"""
       ${row.code}
@@ -1380,7 +1394,7 @@ case class ValidateExternalType(child: Expression, expected: DataType)
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     // Use unnamed reference that doesn't create a local field here to reduce the number of fields
     // because errMsgField is used only when the type doesn't match.
-    val errMsgField = ctx.addReferenceMinorObj(errMsg)
+    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
     val input = child.genCode(ctx)
     val obj = input.value
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index eb7475354b104..b469f5cb7586a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -195,7 +195,7 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
           }
         case _ =>
           TypeCheckResult.TypeCheckFailure(s"Arguments must be same type but were: " +
-            s"${value.dataType} != ${mismatchOpt.get.dataType}")
+            s"${value.dataType.simpleString} != ${mismatchOpt.get.dataType.simpleString}")
       }
     } else {
       TypeUtils.checkForOrderingExpr(value.dataType, s"function $prettyName")
@@ -234,38 +234,66 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val javaDataType = ctx.javaType(value.dataType)
     val valueGen = value.genCode(ctx)
     val listGen = list.map(_.genCode(ctx))
-    ctx.addMutableState(ctx.JAVA_BOOLEAN, ev.value)
-    ctx.addMutableState(ctx.JAVA_BOOLEAN, ev.isNull)
+    // inTmpResult has 3 possible values:
+    // -1 means no matches found and there is at least one value in the list evaluated to null
+    val HAS_NULL = -1
+    // 0 means no matches found and all values in the list are not null
+    val NOT_MATCHED = 0
+    // 1 means one value in the list is matched
+    val MATCHED = 1
+    val tmpResult = ctx.freshName("inTmpResult")
     val valueArg = ctx.freshName("valueArg")
+    // All the blocks are meant to be inside a do { ... } while (false); loop.
+    // The evaluation of variables can be stopped when we find a matching value.
     val listCode = listGen.map(x =>
       s"""
-        if (!${ev.value}) {
-          ${x.code}
-          if (${x.isNull}) {
-            ${ev.isNull} = true;
-          } else if (${ctx.genEqual(value.dataType, valueArg, x.value)}) {
-            ${ev.isNull} = false;
-            ${ev.value} = true;
-          }
-        }
-       """)
-    val listCodes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
-      val args = ("InternalRow", ctx.INPUT_ROW) :: (ctx.javaType(value.dataType), valueArg) :: Nil
-      ctx.splitExpressions(expressions = listCode, funcName = "valueIn", arguments = args)
-    } else {
-      listCode.mkString("\n")
-    }
-    ev.copy(code = s"""
-      ${valueGen.code}
-      ${ev.value} = false;
-      ${ev.isNull} = ${valueGen.isNull};
-      if (!${ev.isNull}) {
-        ${ctx.javaType(value.dataType)} $valueArg = ${valueGen.value};
-        $listCodes
-      }
-    """)
+         |${x.code}
+         |if (${x.isNull}) {
+         |  $tmpResult = $HAS_NULL; // ${ev.isNull} = true;
+         |} else if (${ctx.genEqual(value.dataType, valueArg, x.value)}) {
+         |  $tmpResult = $MATCHED; // ${ev.isNull} = false; ${ev.value} = true;
+         |  continue;
+         |}
+       """.stripMargin)
+
+    val codes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = listCode,
+      funcName = "valueIn",
+      extraArguments = (javaDataType, valueArg) :: (ctx.JAVA_BYTE, tmpResult) :: Nil,
+      returnType = ctx.JAVA_BYTE,
+      makeSplitFunction = body =>
+        s"""
+           |do {
+           |  $body
+           |} while (false);
+           |return $tmpResult;
+         """.stripMargin,
+      foldFunctions = _.map { funcCall =>
+        s"""
+           |$tmpResult = $funcCall;
+           |if ($tmpResult == $MATCHED) {
+           |  continue;
+           |}
+         """.stripMargin
+      }.mkString("\n"))
+
+    ev.copy(code =
+      s"""
+         |${valueGen.code}
+         |byte $tmpResult = $HAS_NULL;
+         |if (!${valueGen.isNull}) {
+         |  $tmpResult = $NOT_MATCHED;
+         |  $javaDataType $valueArg = ${valueGen.value};
+         |  do {
+         |    $codes
+         |  } while (false);
+         |}
+         |final boolean ${ev.isNull} = ($tmpResult == $HAS_NULL);
+         |final boolean ${ev.value} = ($tmpResult == $MATCHED);
+       """.stripMargin)
   }
 
   override def sql: String = {
@@ -300,7 +328,7 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
     }
   }
 
-  @transient private[this] lazy val set = child.dataType match {
+  @transient lazy val set: Set[Any] = child.dataType match {
     case _: AtomicType => hset
     case _: NullType => hset
     case _ =>
@@ -308,34 +336,24 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
       TreeSet.empty(TypeUtils.getInterpretedOrdering(child.dataType)) ++ hset
   }
 
-  def getSet(): Set[Any] = set
-
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val setName = classOf[Set[Any]].getName
-    val InSetName = classOf[InSet].getName
+    val setTerm = ctx.addReferenceObj("set", set)
     val childGen = child.genCode(ctx)
-    ctx.references += this
-    val setTerm = ctx.freshName("set")
-    val setNull = if (hasNull) {
-      s"""
-         |if (!${ev.value}) {
-         |  ${ev.isNull} = true;
-         |}
-       """.stripMargin
+    val setIsNull = if (hasNull) {
+      s"${ev.isNull} = !${ev.value};"
     } else {
       ""
     }
-    ctx.addMutableState(setName, setTerm,
-      s"$setTerm = (($InSetName)references[${ctx.references.size - 1}]).getSet();")
-    ev.copy(code = s"""
-      ${childGen.code}
-      boolean ${ev.isNull} = ${childGen.isNull};
-      boolean ${ev.value} = false;
-      if (!${ev.isNull}) {
-        ${ev.value} = $setTerm.contains(${childGen.value});
-        $setNull
-      }
-     """)
+    ev.copy(code =
+      s"""
+         |${childGen.code}
+         |${ctx.JAVA_BOOLEAN} ${ev.isNull} = ${childGen.isNull};
+         |${ctx.JAVA_BOOLEAN} ${ev.value} = false;
+         |if (!${ev.isNull}) {
+         |  ${ev.value} = $setTerm.contains(${childGen.value});
+         |  $setIsNull
+         |}
+       """.stripMargin)
   }
 
   override def sql: String = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index b4aefe6cff73e..8bc936fcbfc31 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -77,9 +77,8 @@ case class Rand(child: Expression) extends RDG {
   override protected def evalInternal(input: InternalRow): Double = rng.nextDouble()
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val rngTerm = ctx.freshName("rng")
     val className = classOf[XORShiftRandom].getName
-    ctx.addMutableState(className, rngTerm)
+    val rngTerm = ctx.addMutableState(className, "rng")
     ctx.addPartitionInitializationStatement(
       s"$rngTerm = new $className(${seed}L + partitionIndex);")
     ev.copy(code = s"""
@@ -112,9 +111,8 @@ case class Randn(child: Expression) extends RDG {
   override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val rngTerm = ctx.freshName("rng")
     val className = classOf[XORShiftRandom].getName
-    ctx.addMutableState(className, rngTerm)
+    val rngTerm = ctx.addMutableState(className, "rng")
     ctx.addPartitionInitializationStatement(
       s"$rngTerm = new $className(${seed}L + partitionIndex);")
     ev.copy(code = s"""
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index d0d663f63f5db..f3e8f6de58975 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -112,15 +112,14 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val patternClass = classOf[Pattern].getName
     val escapeFunc = StringUtils.getClass.getName.stripSuffix("$") + ".escapeLikeRegex"
-    val pattern = ctx.freshName("pattern")
 
     if (right.foldable) {
       val rVal = right.eval()
       if (rVal != null) {
         val regexStr =
           StringEscapeUtils.escapeJava(escape(rVal.asInstanceOf[UTF8String].toString()))
-        ctx.addMutableState(patternClass, pattern,
-          s"""$pattern = ${patternClass}.compile("$regexStr");""")
+        val pattern = ctx.addMutableState(patternClass, "patternLike",
+          v => s"""$v = $patternClass.compile("$regexStr");""")
 
         // We don't use nullSafeCodeGen here because we don't want to re-evaluate right again.
         val eval = left.genCode(ctx)
@@ -139,12 +138,13 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
         """)
       }
     } else {
+      val pattern = ctx.freshName("pattern")
       val rightStr = ctx.freshName("rightStr")
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
         s"""
-          String $rightStr = ${eval2}.toString();
-          ${patternClass} $pattern = ${patternClass}.compile($escapeFunc($rightStr));
-          ${ev.value} = $pattern.matcher(${eval1}.toString()).matches();
+          String $rightStr = $eval2.toString();
+          $patternClass $pattern = $patternClass.compile($escapeFunc($rightStr));
+          ${ev.value} = $pattern.matcher($eval1.toString()).matches();
         """
       })
     }
@@ -187,15 +187,14 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val patternClass = classOf[Pattern].getName
-    val pattern = ctx.freshName("pattern")
 
     if (right.foldable) {
       val rVal = right.eval()
       if (rVal != null) {
         val regexStr =
           StringEscapeUtils.escapeJava(rVal.asInstanceOf[UTF8String].toString())
-        ctx.addMutableState(patternClass, pattern,
-          s"""$pattern = ${patternClass}.compile("$regexStr");""")
+        val pattern = ctx.addMutableState(patternClass, "patternRLike",
+          v => s"""$v = $patternClass.compile("$regexStr");""")
 
         // We don't use nullSafeCodeGen here because we don't want to re-evaluate right again.
         val eval = left.genCode(ctx)
@@ -215,11 +214,12 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress
       }
     } else {
       val rightStr = ctx.freshName("rightStr")
+      val pattern = ctx.freshName("pattern")
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
         s"""
-          String $rightStr = ${eval2}.toString();
-          ${patternClass} $pattern = ${patternClass}.compile($rightStr);
-          ${ev.value} = $pattern.matcher(${eval1}.toString()).find(0);
+          String $rightStr = $eval2.toString();
+          $patternClass $pattern = $patternClass.compile($rightStr);
+          ${ev.value} = $pattern.matcher($eval1.toString()).find(0);
         """
       })
     }
@@ -316,26 +316,17 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
   override def prettyName: String = "regexp_replace"
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val termLastRegex = ctx.freshName("lastRegex")
-    val termPattern = ctx.freshName("pattern")
-
-    val termLastReplacement = ctx.freshName("lastReplacement")
-    val termLastReplacementInUTF8 = ctx.freshName("lastReplacementInUTF8")
-
-    val termResult = ctx.freshName("result")
+    val termResult = ctx.freshName("termResult")
 
     val classNamePattern = classOf[Pattern].getCanonicalName
     val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName
 
     val matcher = ctx.freshName("matcher")
 
-    ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = null;")
-    ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = null;")
-    ctx.addMutableState("String", termLastReplacement, s"${termLastReplacement} = null;")
-    ctx.addMutableState("UTF8String",
-      termLastReplacementInUTF8, s"${termLastReplacementInUTF8} = null;")
-    ctx.addMutableState(classNameStringBuffer,
-      termResult, s"${termResult} = new $classNameStringBuffer();")
+    val termLastRegex = ctx.addMutableState("UTF8String", "lastRegex")
+    val termPattern = ctx.addMutableState(classNamePattern, "pattern")
+    val termLastReplacement = ctx.addMutableState("String", "lastReplacement")
+    val termLastReplacementInUTF8 = ctx.addMutableState("UTF8String", "lastReplacementInUTF8")
 
     val setEvNotNull = if (nullable) {
       s"${ev.isNull} = false;"
@@ -345,24 +336,25 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
 
     nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => {
     s"""
-      if (!$regexp.equals(${termLastRegex})) {
+      if (!$regexp.equals($termLastRegex)) {
         // regex value changed
-        ${termLastRegex} = $regexp.clone();
-        ${termPattern} = ${classNamePattern}.compile(${termLastRegex}.toString());
+        $termLastRegex = $regexp.clone();
+        $termPattern = $classNamePattern.compile($termLastRegex.toString());
       }
-      if (!$rep.equals(${termLastReplacementInUTF8})) {
+      if (!$rep.equals($termLastReplacementInUTF8)) {
         // replacement string changed
-        ${termLastReplacementInUTF8} = $rep.clone();
-        ${termLastReplacement} = ${termLastReplacementInUTF8}.toString();
+        $termLastReplacementInUTF8 = $rep.clone();
+        $termLastReplacement = $termLastReplacementInUTF8.toString();
       }
-      ${termResult}.delete(0, ${termResult}.length());
-      java.util.regex.Matcher ${matcher} = ${termPattern}.matcher($subject.toString());
+      $classNameStringBuffer $termResult = new $classNameStringBuffer();
+      java.util.regex.Matcher $matcher = $termPattern.matcher($subject.toString());
 
-      while (${matcher}.find()) {
-        ${matcher}.appendReplacement(${termResult}, ${termLastReplacement});
+      while ($matcher.find()) {
+        $matcher.appendReplacement($termResult, $termLastReplacement);
       }
-      ${matcher}.appendTail(${termResult});
-      ${ev.value} = UTF8String.fromString(${termResult}.toString());
+      $matcher.appendTail($termResult);
+      ${ev.value} = UTF8String.fromString($termResult.toString());
+      $termResult = null;
       $setEvNotNull
     """
     })
@@ -416,14 +408,12 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
   override def prettyName: String = "regexp_extract"
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val termLastRegex = ctx.freshName("lastRegex")
-    val termPattern = ctx.freshName("pattern")
     val classNamePattern = classOf[Pattern].getCanonicalName
     val matcher = ctx.freshName("matcher")
     val matchResult = ctx.freshName("matchResult")
 
-    ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = null;")
-    ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = null;")
+    val termLastRegex = ctx.addMutableState("UTF8String", "lastRegex")
+    val termPattern = ctx.addMutableState(classNamePattern, "pattern")
 
     val setEvNotNull = if (nullable) {
       s"${ev.isNull} = false;"
@@ -433,19 +423,19 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
 
     nullSafeCodeGen(ctx, ev, (subject, regexp, idx) => {
       s"""
-      if (!$regexp.equals(${termLastRegex})) {
+      if (!$regexp.equals($termLastRegex)) {
         // regex value changed
-        ${termLastRegex} = $regexp.clone();
-        ${termPattern} = ${classNamePattern}.compile(${termLastRegex}.toString());
+        $termLastRegex = $regexp.clone();
+        $termPattern = $classNamePattern.compile($termLastRegex.toString());
       }
-      java.util.regex.Matcher ${matcher} =
-        ${termPattern}.matcher($subject.toString());
-      if (${matcher}.find()) {
-        java.util.regex.MatchResult ${matchResult} = ${matcher}.toMatchResult();
-        if (${matchResult}.group($idx) == null) {
+      java.util.regex.Matcher $matcher =
+        $termPattern.matcher($subject.toString());
+      if ($matcher.find()) {
+        java.util.regex.MatchResult $matchResult = $matcher.toMatchResult();
+        if ($matchResult.group($idx) == null) {
           ${ev.value} = UTF8String.EMPTY_UTF8;
         } else {
-          ${ev.value} = UTF8String.fromString(${matchResult}.group($idx));
+          ${ev.value} = UTF8String.fromString($matchResult.group($idx));
         }
         $setEvNotNull
       } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index ee5cf925d3cef..e004bfc6af473 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -24,11 +24,10 @@ import java.util.regex.Pattern
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, TypeUtils}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
 
@@ -38,7 +37,8 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
 
 
 /**
- * An expression that concatenates multiple input strings into a single string.
+ * An expression that concatenates multiple inputs into a single output.
+ * If all inputs are binary, concat returns an output as binary. Otherwise, it returns as string.
  * If any input is null, concat returns null.
  */
 @ExpressionDescription(
@@ -48,17 +48,37 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
       > SELECT _FUNC_('Spark', 'SQL');
        SparkSQL
   """)
-case class Concat(children: Seq[Expression]) extends Expression with ImplicitCastInputTypes {
+case class Concat(children: Seq[Expression]) extends Expression {
 
-  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
-  override def dataType: DataType = StringType
+  private lazy val isBinaryMode: Boolean = dataType == BinaryType
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (children.isEmpty) {
+      TypeCheckResult.TypeCheckSuccess
+    } else {
+      val childTypes = children.map(_.dataType)
+      if (childTypes.exists(tpe => !Seq(StringType, BinaryType).contains(tpe))) {
+        return TypeCheckResult.TypeCheckFailure(
+          s"input to function $prettyName should have StringType or BinaryType, but it's " +
+            childTypes.map(_.simpleString).mkString("[", ", ", "]"))
+      }
+      TypeUtils.checkForSameTypeInputExpr(childTypes, s"function $prettyName")
+    }
+  }
+
+  override def dataType: DataType = children.map(_.dataType).headOption.getOrElse(StringType)
 
   override def nullable: Boolean = children.exists(_.nullable)
   override def foldable: Boolean = children.forall(_.foldable)
 
   override def eval(input: InternalRow): Any = {
-    val inputs = children.map(_.eval(input).asInstanceOf[UTF8String])
-    UTF8String.concat(inputs : _*)
+    if (isBinaryMode) {
+      val inputs = children.map(_.eval(input).asInstanceOf[Array[Byte]])
+      ByteArray.concat(inputs: _*)
+    } else {
+      val inputs = children.map(_.eval(input).asInstanceOf[UTF8String])
+      UTF8String.concat(inputs : _*)
+    }
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -73,21 +93,27 @@ case class Concat(children: Seq[Expression]) extends Expression with ImplicitCas
         }
       """
     }
-    val codes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
-      ctx.splitExpressions(
-        expressions = inputs,
-        funcName = "valueConcat",
-        arguments = ("InternalRow", ctx.INPUT_ROW) :: ("UTF8String[]", args) :: Nil)
+
+    val (concatenator, initCode) = if (isBinaryMode) {
+      (classOf[ByteArray].getName, s"byte[][] $args = new byte[${evals.length}][];")
     } else {
-      inputs.mkString("\n")
+      ("UTF8String", s"UTF8String[] $args = new UTF8String[${evals.length}];")
     }
+    val codes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = inputs,
+      funcName = "valueConcat",
+      extraArguments = (s"${ctx.javaType(dataType)}[]", args) :: Nil)
     ev.copy(s"""
-      UTF8String[] $args = new UTF8String[${evals.length}];
+      $initCode
       $codes
-      UTF8String ${ev.value} = UTF8String.concat($args);
+      ${ctx.javaType(dataType)} ${ev.value} = $concatenator.concat($args);
       boolean ${ev.isNull} = ${ev.value} == null;
     """)
   }
+
+  override def toString: String = s"concat(${children.mkString(", ")})"
+
+  override def sql: String = s"concat(${children.map(_.sql).mkString(", ")})"
 }
 
 
@@ -156,14 +182,10 @@ case class ConcatWs(children: Seq[Expression])
           ""
         }
       }
-      val codes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
-        ctx.splitExpressions(
+      val codes = ctx.splitExpressionsWithCurrentInputs(
           expressions = inputs,
           funcName = "valueConcatWs",
-          arguments = ("InternalRow", ctx.INPUT_ROW) :: ("UTF8String[]", args) :: Nil)
-      } else {
-        inputs.mkString("\n")
-      }
+          extraArguments = ("UTF8String[]", args) :: Nil)
       ev.copy(s"""
         UTF8String[] $args = new UTF8String[$numArgs];
         ${separator.code}
@@ -208,31 +230,32 @@ case class ConcatWs(children: Seq[Expression])
         }
       }.unzip
 
-      val codes = ctx.splitExpressions(evals.map(_.code))
-      val varargCounts = ctx.splitExpressions(
+      val codes = ctx.splitExpressionsWithCurrentInputs(evals.map(_.code))
+
+      val varargCounts = ctx.splitExpressionsWithCurrentInputs(
         expressions = varargCount,
         funcName = "varargCountsConcatWs",
-        arguments = ("InternalRow", ctx.INPUT_ROW) :: Nil,
         returnType = "int",
         makeSplitFunction = body =>
           s"""
-           int $varargNum = 0;
-           $body
-           return $varargNum;
-           """,
-        foldFunctions = _.mkString(s"$varargNum += ", s";\n$varargNum += ", ";"))
-      val varargBuilds = ctx.splitExpressions(
+             |int $varargNum = 0;
+             |$body
+             |return $varargNum;
+           """.stripMargin,
+        foldFunctions = _.map(funcCall => s"$varargNum += $funcCall;").mkString("\n"))
+
+      val varargBuilds = ctx.splitExpressionsWithCurrentInputs(
         expressions = varargBuild,
         funcName = "varargBuildsConcatWs",
-        arguments =
-          ("InternalRow", ctx.INPUT_ROW) :: ("UTF8String []", array) :: ("int", idxInVararg) :: Nil,
+        extraArguments = ("UTF8String []", array) :: ("int", idxInVararg) :: Nil,
         returnType = "int",
         makeSplitFunction = body =>
           s"""
-           $body
-           return $idxInVararg;
-           """,
-        foldFunctions = _.mkString(s"$idxInVararg = ", s";\n$idxInVararg = ", ";"))
+             |$body
+             |return $idxInVararg;
+           """.stripMargin,
+        foldFunctions = _.map(funcCall => s"$idxInVararg = $funcCall;").mkString("\n"))
+
       ev.copy(
         s"""
         $codes
@@ -248,33 +271,45 @@ case class ConcatWs(children: Seq[Expression])
   }
 }
 
+/**
+ * An expression that returns the `n`-th input in given inputs.
+ * If all inputs are binary, `elt` returns an output as binary. Otherwise, it returns as string.
+ * If any input is null, `elt` returns null.
+ */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(n, str1, str2, ...) - Returns the `n`-th string, e.g., returns `str2` when `n` is 2.",
+  usage = "_FUNC_(n, input1, input2, ...) - Returns the `n`-th input, e.g., returns `input2` when `n` is 2.",
   examples = """
     Examples:
       > SELECT _FUNC_(1, 'scala', 'java');
        scala
   """)
 // scalastyle:on line.size.limit
-case class Elt(children: Seq[Expression])
-  extends Expression with ImplicitCastInputTypes {
+case class Elt(children: Seq[Expression]) extends Expression {
 
   private lazy val indexExpr = children.head
-  private lazy val stringExprs = children.tail.toArray
+  private lazy val inputExprs = children.tail.toArray
 
   /** This expression is always nullable because it returns null if index is out of range. */
   override def nullable: Boolean = true
 
-  override def dataType: DataType = StringType
-
-  override def inputTypes: Seq[DataType] = IntegerType +: Seq.fill(children.size - 1)(StringType)
+  override def dataType: DataType = inputExprs.map(_.dataType).headOption.getOrElse(StringType)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.size < 2) {
       TypeCheckResult.TypeCheckFailure("elt function requires at least two arguments")
     } else {
-      super[ImplicitCastInputTypes].checkInputDataTypes()
+      val (indexType, inputTypes) = (indexExpr.dataType, inputExprs.map(_.dataType))
+      if (indexType != IntegerType) {
+        return TypeCheckResult.TypeCheckFailure(s"first input to function $prettyName should " +
+          s"have IntegerType, but it's $indexType")
+      }
+      if (inputTypes.exists(tpe => !Seq(StringType, BinaryType).contains(tpe))) {
+        return TypeCheckResult.TypeCheckFailure(
+          s"input to function $prettyName should have StringType or BinaryType, but it's " +
+            inputTypes.map(_.simpleString).mkString("[", ", ", "]"))
+      }
+      TypeUtils.checkForSameTypeInputExpr(inputTypes, s"function $prettyName")
     }
   }
 
@@ -284,65 +319,67 @@ case class Elt(children: Seq[Expression])
       null
     } else {
       val index = indexObj.asInstanceOf[Int]
-      if (index <= 0 || index > stringExprs.length) {
+      if (index <= 0 || index > inputExprs.length) {
         null
       } else {
-        stringExprs(index - 1).eval(input)
+        inputExprs(index - 1).eval(input)
       }
     }
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val index = indexExpr.genCode(ctx)
-    val strings = stringExprs.map(_.genCode(ctx))
+    val inputs = inputExprs.map(_.genCode(ctx))
     val indexVal = ctx.freshName("index")
-    val stringVal = ctx.freshName("stringVal")
-    val assignStringValue = strings.zipWithIndex.map { case (eval, index) =>
-      s"""
-        case ${index + 1}:
-          ${eval.code}
-          $stringVal = ${eval.isNull} ? null : ${eval.value};
-          break;
-      """
-    }
+    val indexMatched = ctx.freshName("eltIndexMatched")
 
-    val cases = ctx.buildCodeBlocks(assignStringValue)
-    val codes = if (cases.length == 1) {
+    val inputVal = ctx.addMutableState(ctx.javaType(dataType), "inputVal")
+
+    val assignInputValue = inputs.zipWithIndex.map { case (eval, index) =>
       s"""
-        UTF8String $stringVal = null;
-        switch ($indexVal) {
-          ${cases.head}
-        }
-       """
-    } else {
-      var prevFunc = "null"
-      for (c <- cases.reverse) {
-        val funcName = ctx.freshName("eltFunc")
-        val funcBody = s"""
-         private UTF8String $funcName(InternalRow ${ctx.INPUT_ROW}, int $indexVal) {
-           UTF8String $stringVal = null;
-           switch ($indexVal) {
-             $c
-             default:
-               return $prevFunc;
-           }
-           return $stringVal;
-         }
-        """
-        val fullFuncName = ctx.addNewFunction(funcName, funcBody)
-        prevFunc = s"$fullFuncName(${ctx.INPUT_ROW}, $indexVal)"
-      }
-      s"UTF8String $stringVal = $prevFunc;"
+         |if ($indexVal == ${index + 1}) {
+         |  ${eval.code}
+         |  $inputVal = ${eval.isNull} ? null : ${eval.value};
+         |  $indexMatched = true;
+         |  continue;
+         |}
+      """.stripMargin
     }
 
+    val codes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = assignInputValue,
+      funcName = "eltFunc",
+      extraArguments = ("int", indexVal) :: Nil,
+      returnType = ctx.JAVA_BOOLEAN,
+      makeSplitFunction = body =>
+        s"""
+           |${ctx.JAVA_BOOLEAN} $indexMatched = false;
+           |do {
+           |  $body
+           |} while (false);
+           |return $indexMatched;
+         """.stripMargin,
+      foldFunctions = _.map { funcCall =>
+        s"""
+           |$indexMatched = $funcCall;
+           |if ($indexMatched) {
+           |  continue;
+           |}
+         """.stripMargin
+      }.mkString)
+
     ev.copy(
       s"""
-      ${index.code}
-      final int $indexVal = ${index.value};
-      $codes
-      UTF8String ${ev.value} = $stringVal;
-      final boolean ${ev.isNull} = ${ev.value} == null;
-    """)
+         |${index.code}
+         |final int $indexVal = ${index.value};
+         |${ctx.JAVA_BOOLEAN} $indexMatched = false;
+         |$inputVal = null;
+         |do {
+         |  $codes
+         |} while (false);
+         |final ${ctx.javaType(dataType)} ${ev.value} = $inputVal;
+         |final boolean ${ev.isNull} = ${ev.value} == null;
+       """.stripMargin)
   }
 }
 
@@ -536,14 +573,11 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val termLastMatching = ctx.freshName("lastMatching")
-    val termLastReplace = ctx.freshName("lastReplace")
-    val termDict = ctx.freshName("dict")
     val classNameDict = classOf[JMap[Character, Character]].getCanonicalName
 
-    ctx.addMutableState("UTF8String", termLastMatching, s"$termLastMatching = null;")
-    ctx.addMutableState("UTF8String", termLastReplace, s"$termLastReplace = null;")
-    ctx.addMutableState(classNameDict, termDict, s"$termDict = null;")
+    val termLastMatching = ctx.addMutableState("UTF8String", "lastMatching")
+    val termLastReplace = ctx.addMutableState("UTF8String", "lastReplace")
+    val termDict = ctx.addMutableState(classNameDict, "dict")
 
     nullSafeCodeGen(ctx, ev, (src, matching, replace) => {
       val check = if (matchingExpr.foldable && replaceExpr.foldable) {
@@ -1388,14 +1422,10 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
          $argList[$index] = $value;
        """
     }
-    val argListCodes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
-      ctx.splitExpressions(
-        expressions = argListCode,
-        funcName = "valueFormatString",
-        arguments = ("InternalRow", ctx.INPUT_ROW) :: ("Object[]", argList) :: Nil)
-    } else {
-      argListCode.mkString("\n")
-    }
+    val argListCodes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = argListCode,
+      funcName = "valueFormatString",
+      extraArguments = ("Object[]", argList) :: Nil)
 
     val form = ctx.freshName("formatter")
     val formatter = classOf[java.util.Formatter].getName
@@ -2073,15 +2103,12 @@ case class FormatNumber(x: Expression, d: Expression)
       // SPARK-13515: US Locale configures the DecimalFormat object to use a dot ('.')
       // as a decimal separator.
       val usLocale = "US"
-      val lastDValue = ctx.freshName("lastDValue")
-      val pattern = ctx.freshName("pattern")
-      val numberFormat = ctx.freshName("numberFormat")
       val i = ctx.freshName("i")
       val dFormat = ctx.freshName("dFormat")
-      ctx.addMutableState(ctx.JAVA_INT, lastDValue, s"$lastDValue = -100;")
-      ctx.addMutableState(sb, pattern, s"$pattern = new $sb();")
-      ctx.addMutableState(df, numberFormat,
-      s"""$numberFormat = new $df("", new $dfs($l.$usLocale));""")
+      val lastDValue = ctx.addMutableState(ctx.JAVA_INT, "lastDValue", v => s"$v = -100;")
+      val pattern = ctx.addMutableState(sb, "pattern", v => s"$v = new $sb();")
+      val numberFormat = ctx.addMutableState(df, "numberFormat",
+        v => s"""$v = new $df("", new $dfs($l.$usLocale));""")
 
       s"""
         if ($d >= 0) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index e11e3a105f597..dd13d9a3bba51 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -70,9 +70,9 @@ case class WindowSpecDefinition(
       case f: SpecifiedWindowFrame if f.frameType == RangeFrame && f.isValueBound &&
           !isValidFrameType(f.valueBoundary.head.dataType) =>
         TypeCheckFailure(
-          s"The data type '${orderSpec.head.dataType}' used in the order specification does " +
-            s"not match the data type '${f.valueBoundary.head.dataType}' which is used in the " +
-            "range frame.")
+          s"The data type '${orderSpec.head.dataType.simpleString}' used in the order " +
+            "specification does not match the data type " +
+            s"'${f.valueBoundary.head.dataType.simpleString}' which is used in the range frame.")
       case _ => TypeCheckSuccess
     }
   }
@@ -251,8 +251,8 @@ case class SpecifiedWindowFrame(
       TypeCheckFailure(s"Window frame $location bound '$e' is not a literal.")
     case e: Expression if !frameType.inputType.acceptsType(e.dataType) =>
       TypeCheckFailure(
-        s"The data type of the $location bound '${e.dataType} does not match " +
-          s"the expected data type '${frameType.inputType}'.")
+        s"The data type of the $location bound '${e.dataType.simpleString}' does not match " +
+          s"the expected data type '${frameType.inputType.simpleString}'.")
     case _ => TypeCheckSuccess
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 0d961bf2e6e5e..c794ba8619322 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -47,7 +47,62 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
   protected def fixedPoint = FixedPoint(SQLConf.get.optimizerMaxIterations)
 
   def batches: Seq[Batch] = {
-    Batch("Eliminate Distinct", Once, EliminateDistinct) ::
+    val operatorOptimizationRuleSet =
+      Seq(
+        // Operator push down
+        PushProjectionThroughUnion,
+        ReorderJoin,
+        EliminateOuterJoin,
+        PushPredicateThroughJoin,
+        PushDownPredicate,
+        LimitPushDown,
+        ColumnPruning,
+        InferFiltersFromConstraints,
+        // Operator combine
+        CollapseRepartition,
+        CollapseProject,
+        CollapseWindow,
+        CombineFilters,
+        CombineLimits,
+        CombineUnions,
+        // Constant folding and strength reduction
+        NullPropagation,
+        ConstantPropagation,
+        FoldablePropagation,
+        OptimizeIn,
+        ConstantFolding,
+        ReorderAssociativeOperator,
+        LikeSimplification,
+        BooleanSimplification,
+        SimplifyConditionals,
+        RemoveDispensableExpressions,
+        SimplifyBinaryComparison,
+        PruneFilters,
+        EliminateSorts,
+        SimplifyCasts,
+        SimplifyCaseConversionExpressions,
+        RewriteCorrelatedScalarSubquery,
+        EliminateSerialization,
+        RemoveRedundantAliases,
+        RemoveRedundantProject,
+        SimplifyCreateStructOps,
+        SimplifyCreateArrayOps,
+        SimplifyCreateMapOps,
+        CombineConcats) ++
+        extendedOperatorOptimizationRules
+
+    val operatorOptimizationBatch: Seq[Batch] = {
+      val rulesWithoutInferFiltersFromConstraints =
+        operatorOptimizationRuleSet.filterNot(_ == InferFiltersFromConstraints)
+      Batch("Operator Optimization before Inferring Filters", fixedPoint,
+        rulesWithoutInferFiltersFromConstraints: _*) ::
+      Batch("Infer Filters", Once,
+        InferFiltersFromConstraints) ::
+      Batch("Operator Optimization after Inferring Filters", fixedPoint,
+        rulesWithoutInferFiltersFromConstraints: _*) :: Nil
+    }
+
+    (Batch("Eliminate Distinct", Once, EliminateDistinct) ::
     // Technically some of the rules in Finish Analysis are not optimizer rules and belong more
     // in the analyzer, because they are needed for correctness (e.g. ComputeCurrentTime).
     // However, because we also use the analyzer to canonicalized queries (for view definition),
@@ -81,66 +136,26 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       ReplaceDistinctWithAggregate) ::
     Batch("Aggregate", fixedPoint,
       RemoveLiteralFromGroupExpressions,
-      RemoveRepetitionFromGroupExpressions) ::
-    Batch("Operator Optimizations", fixedPoint, Seq(
-      // Operator push down
-      PushProjectionThroughUnion,
-      ReorderJoin,
-      EliminateOuterJoin,
-      InferFiltersFromConstraints,
-      BooleanSimplification,
-      PushPredicateThroughJoin,
-      PushDownPredicate,
-      LimitPushDown,
-      ColumnPruning,
-      // Operator combine
-      CollapseRepartition,
-      CollapseProject,
-      CollapseWindow,
-      CombineFilters,
-      CombineLimits,
-      CombineUnions,
-      // Constant folding and strength reduction
-      NullPropagation,
-      ConstantPropagation,
-      FoldablePropagation,
-      OptimizeIn,
-      ConstantFolding,
-      ReorderAssociativeOperator,
-      LikeSimplification,
-      BooleanSimplification,
-      SimplifyConditionals,
-      RemoveDispensableExpressions,
-      SimplifyBinaryComparison,
-      PruneFilters,
-      EliminateSorts,
-      SimplifyCasts,
-      SimplifyCaseConversionExpressions,
-      RewriteCorrelatedScalarSubquery,
-      EliminateSerialization,
-      RemoveRedundantAliases,
-      RemoveRedundantProject,
-      SimplifyCreateStructOps,
-      SimplifyCreateArrayOps,
-      SimplifyCreateMapOps,
-      CombineConcats) ++
-      extendedOperatorOptimizationRules: _*) ::
+      RemoveRepetitionFromGroupExpressions) :: Nil ++
+    operatorOptimizationBatch) :+
     Batch("Join Reorder", Once,
-      CostBasedJoinReorder) ::
+      CostBasedJoinReorder) :+
     Batch("Decimal Optimizations", fixedPoint,
-      DecimalAggregates) ::
+      DecimalAggregates) :+
     Batch("Object Expressions Optimization", fixedPoint,
       EliminateMapObjects,
-      CombineTypedFilters) ::
+      CombineTypedFilters) :+
     Batch("LocalRelation", fixedPoint,
       ConvertToLocalRelation,
-      PropagateEmptyRelation) ::
+      PropagateEmptyRelation) :+
     // The following batch should be executed after batch "Join Reorder" and "LocalRelation".
     Batch("Check Cartesian Products", Once,
-      CheckCartesianProducts) ::
+      CheckCartesianProducts) :+
     Batch("RewriteSubquery", Once,
       RewritePredicateSubquery,
-      CollapseProject) :: Nil
+      ColumnPruning,
+      CollapseProject,
+      RemoveRedundantProject)
   }
 
   /**
@@ -441,12 +456,15 @@ object ColumnPruning extends Rule[LogicalPlan] {
       f.copy(child = prunedChild(child, f.references))
     case e @ Expand(_, _, child) if (child.outputSet -- e.references).nonEmpty =>
       e.copy(child = prunedChild(child, e.references))
-    case g: Generate if !g.join && (g.child.outputSet -- g.references).nonEmpty =>
-      g.copy(child = prunedChild(g.child, g.references))
 
-    // Turn off `join` for Generate if no column from it's child is used
-    case p @ Project(_, g: Generate) if g.join && p.references.subsetOf(g.generatedSet) =>
-      p.copy(child = g.copy(join = false))
+    // prune unrequired references
+    case p @ Project(_, g: Generate) if p.references != g.outputSet =>
+      val requiredAttrs = p.references -- g.producedAttributes ++ g.generator.references
+      val newChild = prunedChild(g.child, requiredAttrs)
+      val unrequired = g.generator.references -- p.references
+      val unrequiredIndices = newChild.output.zipWithIndex.filter(t => unrequired.contains(t._1))
+        .map(_._2)
+      p.copy(child = g.copy(child = newChild, unrequiredChildIndex = unrequiredIndices))
 
     // Eliminate unneeded attributes from right side of a Left Existence Join.
     case j @ Join(_, right, LeftExistence(_), _) =>
@@ -692,7 +710,9 @@ object CombineUnions extends Rule[LogicalPlan] {
  */
 object CombineFilters extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case Filter(fc, nf @ Filter(nc, grandChild)) =>
+    // The query execution/optimization does not guarantee the expressions are evaluated in order.
+    // We only can combine them if and only if both are deterministic.
+    case Filter(fc, nf @ Filter(nc, grandChild)) if fc.deterministic && nc.deterministic =>
       (ExpressionSet(splitConjunctivePredicates(fc)) --
         ExpressionSet(splitConjunctivePredicates(nc))).reduceOption(And) match {
         case Some(ac) =>
@@ -775,7 +795,8 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
       project.copy(child = Filter(replaceAlias(condition, aliasMap), grandChild))
 
     case filter @ Filter(condition, aggregate: Aggregate)
-      if aggregate.aggregateExpressions.forall(_.deterministic) =>
+      if aggregate.aggregateExpressions.forall(_.deterministic)
+        && aggregate.groupingExpressions.nonEmpty =>
       // Find all the aliased expressions in the aggregate list that don't include any actual
       // AggregateExpression, and create a map from the alias to the expression
       val aliasMap = AttributeMap(aggregate.aggregateExpressions.collect {
@@ -785,15 +806,15 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
 
       // For each filter, expand the alias and check if the filter can be evaluated using
       // attributes produced by the aggregate operator's child operator.
-      val (candidates, containingNonDeterministic) =
-        splitConjunctivePredicates(condition).span(_.deterministic)
+      val (candidates, nonDeterministic) =
+        splitConjunctivePredicates(condition).partition(_.deterministic)
 
       val (pushDown, rest) = candidates.partition { cond =>
         val replaced = replaceAlias(cond, aliasMap)
         cond.references.nonEmpty && replaced.references.subsetOf(aggregate.child.outputSet)
       }
 
-      val stayUp = rest ++ containingNonDeterministic
+      val stayUp = rest ++ nonDeterministic
 
       if (pushDown.nonEmpty) {
         val pushDownPredicate = pushDown.reduce(And)
@@ -815,14 +836,14 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
       if w.partitionSpec.forall(_.isInstanceOf[AttributeReference]) =>
       val partitionAttrs = AttributeSet(w.partitionSpec.flatMap(_.references))
 
-      val (candidates, containingNonDeterministic) =
-        splitConjunctivePredicates(condition).span(_.deterministic)
+      val (candidates, nonDeterministic) =
+        splitConjunctivePredicates(condition).partition(_.deterministic)
 
       val (pushDown, rest) = candidates.partition { cond =>
         cond.references.subsetOf(partitionAttrs)
       }
 
-      val stayUp = rest ++ containingNonDeterministic
+      val stayUp = rest ++ nonDeterministic
 
       if (pushDown.nonEmpty) {
         val pushDownPredicate = pushDown.reduce(And)
@@ -834,7 +855,7 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
 
     case filter @ Filter(condition, union: Union) =>
       // Union could change the rows, so non-deterministic predicate can't be pushed down
-      val (pushDown, stayUp) = splitConjunctivePredicates(condition).span(_.deterministic)
+      val (pushDown, stayUp) = splitConjunctivePredicates(condition).partition(_.deterministic)
 
       if (pushDown.nonEmpty) {
         val pushDownCond = pushDown.reduceLeft(And)
@@ -858,13 +879,9 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
       }
 
     case filter @ Filter(condition, watermark: EventTimeWatermark) =>
-      // We can only push deterministic predicates which don't reference the watermark attribute.
-      // We could in theory span() only on determinism and pull out deterministic predicates
-      // on the watermark separately. But it seems unnecessary and a bit confusing to not simply
-      // use the prefix as we do for nondeterminism in other cases.
-
-      val (pushDown, stayUp) = splitConjunctivePredicates(condition).span(
-        p => p.deterministic && !p.references.contains(watermark.eventTime))
+      val (pushDown, stayUp) = splitConjunctivePredicates(condition).partition { p =>
+        p.deterministic && !p.references.contains(watermark.eventTime)
+      }
 
       if (pushDown.nonEmpty) {
         val pushDownPredicate = pushDown.reduceLeft(And)
@@ -905,14 +922,14 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
     // come from grandchild.
     // TODO: non-deterministic predicates could be pushed through some operators that do not change
     // the rows.
-    val (candidates, containingNonDeterministic) =
-      splitConjunctivePredicates(filter.condition).span(_.deterministic)
+    val (candidates, nonDeterministic) =
+      splitConjunctivePredicates(filter.condition).partition(_.deterministic)
 
     val (pushDown, rest) = candidates.partition { cond =>
       cond.references.subsetOf(grandchild.outputSet)
     }
 
-    val stayUp = rest ++ containingNonDeterministic
+    val stayUp = rest ++ nonDeterministic
 
     if (pushDown.nonEmpty) {
       val newChild = insertFilter(pushDown.reduceLeft(And))
@@ -955,23 +972,19 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
   /**
    * Splits join condition expressions or filter predicates (on a given join's output) into three
    * categories based on the attributes required to evaluate them. Note that we explicitly exclude
-   * on-deterministic (i.e., stateful) condition expressions in canEvaluateInLeft or
+   * non-deterministic (i.e., stateful) condition expressions in canEvaluateInLeft or
    * canEvaluateInRight to prevent pushing these predicates on either side of the join.
    *
    * @return (canEvaluateInLeft, canEvaluateInRight, haveToEvaluateInBoth)
    */
   private def split(condition: Seq[Expression], left: LogicalPlan, right: LogicalPlan) = {
-    // Note: In order to ensure correctness, it's important to not change the relative ordering of
-    // any deterministic expression that follows a non-deterministic expression. To achieve this,
-    // we only consider pushing down those expressions that precede the first non-deterministic
-    // expression in the condition.
-    val (pushDownCandidates, containingNonDeterministic) = condition.span(_.deterministic)
+    val (pushDownCandidates, nonDeterministic) = condition.partition(_.deterministic)
     val (leftEvaluateCondition, rest) =
       pushDownCandidates.partition(_.references.subsetOf(left.outputSet))
     val (rightEvaluateCondition, commonCondition) =
         rest.partition(expr => expr.references.subsetOf(right.outputSet))
 
-    (leftEvaluateCondition, rightEvaluateCondition, commonCondition ++ containingNonDeterministic)
+    (leftEvaluateCondition, rightEvaluateCondition, commonCondition ++ nonDeterministic)
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
@@ -1209,7 +1222,13 @@ object ReplaceDeduplicateWithAggregate extends Rule[LogicalPlan] {
           Alias(new First(attr).toAggregateExpression(), attr.name)(attr.exprId)
         }
       }
-      Aggregate(keys, aggCols, child)
+      // SPARK-22951: Physical aggregate operators distinguishes global aggregation and grouping
+      // aggregations by checking the number of grouping keys. The key difference here is that a
+      // global aggregation always returns at least one row even if there are no input rows. Here
+      // we append a literal when the grouping key list is empty so that the result aggregate
+      // operator is properly treated as a grouping aggregation.
+      val nonemptyKeys = if (keys.isEmpty) Literal(1) :: Nil else keys
+      Aggregate(nonemptyKeys, aggCols, child)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index 52fbb4df2f58e..a6e5aa6daca65 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -41,6 +41,10 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
   private def empty(plan: LogicalPlan) =
     LocalRelation(plan.output, data = Seq.empty, isStreaming = plan.isStreaming)
 
+  // Construct a project list from plan's output, while the value is always NULL.
+  private def nullValueProjectList(plan: LogicalPlan): Seq[NamedExpression] =
+    plan.output.map{ a => Alias(Literal(null), a.name)(a.exprId) }
+
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case p: Union if p.children.forall(isEmptyLocalRelation) =>
       empty(p)
@@ -49,16 +53,28 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
     // as stateful streaming joins need to perform other state management operations other than
     // just processing the input data.
     case p @ Join(_, _, joinType, _)
-        if !p.children.exists(_.isStreaming) && p.children.exists(isEmptyLocalRelation) =>
-      joinType match {
-        case _: InnerLike => empty(p)
-        // Intersect is handled as LeftSemi by `ReplaceIntersectWithSemiJoin` rule.
-        // Except is handled as LeftAnti by `ReplaceExceptWithAntiJoin` rule.
-        case LeftOuter | LeftSemi | LeftAnti if isEmptyLocalRelation(p.left) => empty(p)
-        case RightOuter if isEmptyLocalRelation(p.right) => empty(p)
-        case FullOuter if p.children.forall(isEmptyLocalRelation) => empty(p)
-        case _ => p
-    }
+        if !p.children.exists(_.isStreaming) =>
+      val isLeftEmpty = isEmptyLocalRelation(p.left)
+      val isRightEmpty = isEmptyLocalRelation(p.right)
+      if (isLeftEmpty || isRightEmpty) {
+        joinType match {
+          case _: InnerLike => empty(p)
+          // Intersect is handled as LeftSemi by `ReplaceIntersectWithSemiJoin` rule.
+          // Except is handled as LeftAnti by `ReplaceExceptWithAntiJoin` rule.
+          case LeftOuter | LeftSemi | LeftAnti if isLeftEmpty => empty(p)
+          case LeftSemi if isRightEmpty => empty(p)
+          case LeftAnti if isRightEmpty => p.left
+          case FullOuter if isLeftEmpty && isRightEmpty => empty(p)
+          case LeftOuter | FullOuter if isRightEmpty =>
+            Project(p.left.output ++ nullValueProjectList(p.right), p.left)
+          case RightOuter if isRightEmpty => empty(p)
+          case RightOuter | FullOuter if isLeftEmpty =>
+            Project(nullValueProjectList(p.left) ++ p.right.output, p.right)
+          case _ => p
+        }
+      } else {
+        p
+      }
 
     case p: UnaryNode if p.children.nonEmpty && p.children.forall(isEmptyLocalRelation) => p match {
       case _: Project => empty(p)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 785e815b41185..1c0b7bd806801 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -21,6 +21,7 @@ import scala.collection.immutable.HashSet
 import scala.collection.mutable.{ArrayBuffer, Stack}
 
 import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.analysis.TypeCoercion.ImplicitTypeCasts
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
@@ -64,49 +65,89 @@ object ConstantFolding extends Rule[LogicalPlan] {
  * }}}
  *
  * Approach used:
- * - Start from AND operator as the root
- * - Get all the children conjunctive predicates which are EqualTo / EqualNullSafe such that they
- *   don't have a `NOT` or `OR` operator in them
  * - Populate a mapping of attribute => constant value by looking at all the equals predicates
  * - Using this mapping, replace occurrence of the attributes with the corresponding constant values
  *   in the AND node.
  */
 object ConstantPropagation extends Rule[LogicalPlan] with PredicateHelper {
-  private def containsNonConjunctionPredicates(expression: Expression): Boolean = expression.find {
-    case _: Not | _: Or => true
-    case _ => false
-  }.isDefined
-
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case f: Filter => f transformExpressionsUp {
-      case and: And =>
-        val conjunctivePredicates =
-          splitConjunctivePredicates(and)
-            .filter(expr => expr.isInstanceOf[EqualTo] || expr.isInstanceOf[EqualNullSafe])
-            .filterNot(expr => containsNonConjunctionPredicates(expr))
-
-        val equalityPredicates = conjunctivePredicates.collect {
-          case e @ EqualTo(left: AttributeReference, right: Literal) => ((left, right), e)
-          case e @ EqualTo(left: Literal, right: AttributeReference) => ((right, left), e)
-          case e @ EqualNullSafe(left: AttributeReference, right: Literal) => ((left, right), e)
-          case e @ EqualNullSafe(left: Literal, right: AttributeReference) => ((right, left), e)
-        }
+    case f: Filter =>
+      val (newCondition, _) = traverse(f.condition, replaceChildren = true)
+      if (newCondition.isDefined) {
+        f.copy(condition = newCondition.get)
+      } else {
+        f
+      }
+  }
 
-        val constantsMap = AttributeMap(equalityPredicates.map(_._1))
-        val predicates = equalityPredicates.map(_._2).toSet
+  type EqualityPredicates = Seq[((AttributeReference, Literal), BinaryComparison)]
 
-        def replaceConstants(expression: Expression) = expression transform {
-          case a: AttributeReference =>
-            constantsMap.get(a) match {
-              case Some(literal) => literal
-              case None => a
-            }
+  /**
+   * Traverse a condition as a tree and replace attributes with constant values.
+   * - On matching [[And]], recursively traverse each children and get propagated mappings.
+   *   If the current node is not child of another [[And]], replace all occurrences of the
+   *   attributes with the corresponding constant values.
+   * - If a child of [[And]] is [[EqualTo]] or [[EqualNullSafe]], propagate the mapping
+   *   of attribute => constant.
+   * - On matching [[Or]] or [[Not]], recursively traverse each children, propagate empty mapping.
+   * - Otherwise, stop traversal and propagate empty mapping.
+   * @param condition condition to be traversed
+   * @param replaceChildren whether to replace attributes with constant values in children
+   * @return A tuple including:
+   *         1. Option[Expression]: optional changed condition after traversal
+   *         2. EqualityPredicates: propagated mapping of attribute => constant
+   */
+  private def traverse(condition: Expression, replaceChildren: Boolean)
+    : (Option[Expression], EqualityPredicates) =
+    condition match {
+      case e @ EqualTo(left: AttributeReference, right: Literal) => (None, Seq(((left, right), e)))
+      case e @ EqualTo(left: Literal, right: AttributeReference) => (None, Seq(((right, left), e)))
+      case e @ EqualNullSafe(left: AttributeReference, right: Literal) =>
+        (None, Seq(((left, right), e)))
+      case e @ EqualNullSafe(left: Literal, right: AttributeReference) =>
+        (None, Seq(((right, left), e)))
+      case a: And =>
+        val (newLeft, equalityPredicatesLeft) = traverse(a.left, replaceChildren = false)
+        val (newRight, equalityPredicatesRight) = traverse(a.right, replaceChildren = false)
+        val equalityPredicates = equalityPredicatesLeft ++ equalityPredicatesRight
+        val newSelf = if (equalityPredicates.nonEmpty && replaceChildren) {
+          Some(And(replaceConstants(newLeft.getOrElse(a.left), equalityPredicates),
+            replaceConstants(newRight.getOrElse(a.right), equalityPredicates)))
+        } else {
+          if (newLeft.isDefined || newRight.isDefined) {
+            Some(And(newLeft.getOrElse(a.left), newRight.getOrElse(a.right)))
+          } else {
+            None
+          }
         }
-
-        and transform {
-          case e @ EqualTo(_, _) if !predicates.contains(e) => replaceConstants(e)
-          case e @ EqualNullSafe(_, _) if !predicates.contains(e) => replaceConstants(e)
+        (newSelf, equalityPredicates)
+      case o: Or =>
+        // Ignore the EqualityPredicates from children since they are only propagated through And.
+        val (newLeft, _) = traverse(o.left, replaceChildren = true)
+        val (newRight, _) = traverse(o.right, replaceChildren = true)
+        val newSelf = if (newLeft.isDefined || newRight.isDefined) {
+          Some(Or(left = newLeft.getOrElse(o.left), right = newRight.getOrElse((o.right))))
+        } else {
+          None
         }
+        (newSelf, Seq.empty)
+      case n: Not =>
+        // Ignore the EqualityPredicates from children since they are only propagated through And.
+        val (newChild, _) = traverse(n.child, replaceChildren = true)
+        (newChild.map(Not), Seq.empty)
+      case _ => (None, Seq.empty)
+    }
+
+  private def replaceConstants(condition: Expression, equalityPredicates: EqualityPredicates)
+    : Expression = {
+    val constantsMap = AttributeMap(equalityPredicates.map(_._1))
+    val predicates = equalityPredicates.map(_._2).toSet
+    def replaceConstants0(expression: Expression) = expression transform {
+      case a: AttributeReference => constantsMap.getOrElse(a, a)
+    }
+    condition transform {
+      case e @ EqualTo(_, _) if !predicates.contains(e) => replaceConstants0(e)
+      case e @ EqualNullSafe(_, _) if !predicates.contains(e) => replaceConstants0(e)
     }
   }
 }
@@ -465,18 +506,21 @@ object NullPropagation extends Rule[LogicalPlan] {
 
 
 /**
- * Propagate foldable expressions:
  * Replace attributes with aliases of the original foldable expressions if possible.
- * Other optimizations will take advantage of the propagated foldable expressions.
- *
+ * Other optimizations will take advantage of the propagated foldable expressions. For example,
+ * this rule can optimize
  * {{{
  *   SELECT 1.0 x, 'abc' y, Now() z ORDER BY x, y, 3
- *   ==>  SELECT 1.0 x, 'abc' y, Now() z ORDER BY 1.0, 'abc', Now()
  * }}}
+ * to
+ * {{{
+ *   SELECT 1.0 x, 'abc' y, Now() z ORDER BY 1.0, 'abc', Now()
+ * }}}
+ * and other rules can further optimize it and remove the ORDER BY operator.
  */
 object FoldablePropagation extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
-    val foldableMap = AttributeMap(plan.flatMap {
+    var foldableMap = AttributeMap(plan.flatMap {
       case Project(projectList, _) => projectList.collect {
         case a: Alias if a.child.foldable => (a.toAttribute, a)
       }
@@ -489,38 +533,44 @@ object FoldablePropagation extends Rule[LogicalPlan] {
     if (foldableMap.isEmpty) {
       plan
     } else {
-      var stop = false
       CleanupAliases(plan.transformUp {
-        // A leaf node should not stop the folding process (note that we are traversing up the
-        // tree, starting at the leaf nodes); so we are allowing it.
-        case l: LeafNode =>
-          l
-
         // We can only propagate foldables for a subset of unary nodes.
-        case u: UnaryNode if !stop && canPropagateFoldables(u) =>
+        case u: UnaryNode if foldableMap.nonEmpty && canPropagateFoldables(u) =>
           u.transformExpressions(replaceFoldable)
 
-        // Allow inner joins. We do not allow outer join, although its output attributes are
-        // derived from its children, they are actually different attributes: the output of outer
-        // join is not always picked from its children, but can also be null.
+        // Join derives the output attributes from its child while they are actually not the
+        // same attributes. For example, the output of outer join is not always picked from its
+        // children, but can also be null. We should exclude these miss-derived attributes when
+        // propagating the foldable expressions.
         // TODO(cloud-fan): It seems more reasonable to use new attributes as the output attributes
         // of outer join.
-        case j @ Join(_, _, Inner, _) if !stop =>
-          j.transformExpressions(replaceFoldable)
-
-        // We can fold the projections an expand holds. However expand changes the output columns
-        // and often reuses the underlying attributes; so we cannot assume that a column is still
-        // foldable after the expand has been applied.
-        // TODO(hvanhovell): Expand should use new attributes as the output attributes.
-        case expand: Expand if !stop =>
-          val newExpand = expand.copy(projections = expand.projections.map { projection =>
+        case j @ Join(left, right, joinType, _) if foldableMap.nonEmpty =>
+          val newJoin = j.transformExpressions(replaceFoldable)
+          val missDerivedAttrsSet: AttributeSet = AttributeSet(joinType match {
+            case _: InnerLike | LeftExistence(_) => Nil
+            case LeftOuter => right.output
+            case RightOuter => left.output
+            case FullOuter => left.output ++ right.output
+          })
+          foldableMap = AttributeMap(foldableMap.baseMap.values.filterNot {
+            case (attr, _) => missDerivedAttrsSet.contains(attr)
+          }.toSeq)
+          newJoin
+
+        // We can not replace the attributes in `Expand.output`. If there are other non-leaf
+        // operators that have the `output` field, we should put them here too.
+        case expand: Expand if foldableMap.nonEmpty =>
+          expand.copy(projections = expand.projections.map { projection =>
             projection.map(_.transform(replaceFoldable))
           })
-          stop = true
-          newExpand
 
-        case other =>
-          stop = true
+        // For other plans, they are not safe to apply foldable propagation, and they should not
+        // propagate foldable expressions from children.
+        case other if foldableMap.nonEmpty =>
+          val childrenOutputSet = AttributeSet(other.children.flatMap(_.output))
+          foldableMap = AttributeMap(foldableMap.baseMap.values.filterNot {
+            case (attr, _) => childrenOutputSet.contains(attr)
+          }.toSeq)
           other
       })
     }
@@ -574,7 +624,6 @@ object SimplifyCasts extends Rule[LogicalPlan] {
 object RemoveDispensableExpressions extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
     case UnaryPositive(child) => child
-    case PromotePrecision(child) => child
   }
 }
 
@@ -606,6 +655,12 @@ object CombineConcats extends Rule[LogicalPlan] {
       stack.pop() match {
         case Concat(children) =>
           stack.pushAll(children.reverse)
+        // If `spark.sql.function.concatBinaryAsString` is false, nested `Concat` exprs possibly
+        // have `Concat`s with binary output. Since `TypeCoercion` casts them into strings,
+        // we need to handle the case to combine all nested `Concat`s.
+        case c @ Cast(Concat(children), StringType, _) =>
+          val newChildren = children.map { e => c.copy(child = e) }
+          stack.pushAll(newChildren.reverse)
         case child =>
           flattened += child
       }
@@ -613,8 +668,14 @@ object CombineConcats extends Rule[LogicalPlan] {
     Concat(flattened)
   }
 
+  private def hasNestedConcats(concat: Concat): Boolean = concat.children.exists {
+    case c: Concat => true
+    case c @ Cast(Concat(children), StringType, _) => true
+    case _ => false
+  }
+
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformExpressionsDown {
-    case concat: Concat if concat.children.exists(_.isInstanceOf[Concat]) =>
+    case concat: Concat if hasNestedConcats(concat) =>
       flattenConcats(concat)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 64b28565eb27c..2673bea648d09 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -270,7 +270,7 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
   /**
    * Pull up the correlated predicates and rewrite all subqueries in an operator tree..
    */
-  def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case f @ Filter(_, a: Aggregate) =>
       rewriteSubQueries(f, Seq(a, a.child))
     // Only a few unary nodes (Project/Filter/Aggregate) can contain subqueries.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 7651d11ee65a8..bdc357d54a878 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -623,7 +623,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     val expressions = expressionList(ctx.expression)
     Generate(
       UnresolvedGenerator(visitFunctionName(ctx.qualifiedName), expressions),
-      join = true,
+      unrequiredChildIndex = Nil,
       outer = ctx.OUTER != null,
       Some(ctx.tblName.getText.toLowerCase),
       ctx.colName.asScala.map(_.getText).map(UnresolvedAttribute.apply),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 9b127f91648e6..89347f4b1f7bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.sql.catalyst.parser
 
+import java.util
+
 import scala.collection.mutable.StringBuilder
 
 import org.antlr.v4.runtime.{ParserRuleContext, Token}
@@ -39,6 +41,13 @@ object ParserUtils {
     throw new ParseException(s"Operation not allowed: $message", ctx)
   }
 
+  def checkDuplicateClauses[T](
+      nodes: util.List[T], clauseName: String, ctx: ParserRuleContext): Unit = {
+    if (nodes.size() > 1) {
+      throw new ParseException(s"Found duplicate clauses: $clauseName", ctx)
+    }
+  }
+
   /** Check if duplicate keys exist in a set of key-value pairs. */
   def checkDuplicateKeys[T](keyPairs: Seq[(String, T)], ctx: ParserRuleContext): Unit = {
     keyPairs.groupBy(_._1).filter(_._2.size > 1).foreach { case (key, _) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
index 06196b5afb031..7a927e1e083b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
@@ -38,7 +38,7 @@ object EventTimeWatermark {
 case class EventTimeWatermark(
     eventTime: Attribute,
     delay: CalendarInterval,
-    child: LogicalPlan) extends LogicalPlan {
+    child: LogicalPlan) extends UnaryNode {
 
   // Update the metadata on the eventTime column to include the desired delay.
   override val output: Seq[Attribute] = child.output.map { a =>
@@ -60,6 +60,4 @@ case class EventTimeWatermark(
       a
     }
   }
-
-  override val children: Seq[LogicalPlan] = child :: Nil
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 14188829db2af..ff2a0ec588567 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -33,58 +33,9 @@ abstract class LogicalPlan
   with QueryPlanConstraints
   with Logging {
 
-  private var _analyzed: Boolean = false
-
-  /**
-   * Marks this plan as already analyzed. This should only be called by [[CheckAnalysis]].
-   */
-  private[catalyst] def setAnalyzed(): Unit = { _analyzed = true }
-
-  /**
-   * Returns true if this node and its children have already been gone through analysis and
-   * verification.  Note that this is only an optimization used to avoid analyzing trees that
-   * have already been analyzed, and can be reset by transformations.
-   */
-  def analyzed: Boolean = _analyzed
-
   /** Returns true if this subtree has data from a streaming data source. */
   def isStreaming: Boolean = children.exists(_.isStreaming == true)
 
-  /**
-   * Returns a copy of this node where `rule` has been recursively applied first to all of its
-   * children and then itself (post-order). When `rule` does not apply to a given node, it is left
-   * unchanged.  This function is similar to `transformUp`, but skips sub-trees that have already
-   * been marked as analyzed.
-   *
-   * @param rule the function use to transform this nodes children
-   */
-  def resolveOperators(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = {
-    if (!analyzed) {
-      val afterRuleOnChildren = mapChildren(_.resolveOperators(rule))
-      if (this fastEquals afterRuleOnChildren) {
-        CurrentOrigin.withOrigin(origin) {
-          rule.applyOrElse(this, identity[LogicalPlan])
-        }
-      } else {
-        CurrentOrigin.withOrigin(origin) {
-          rule.applyOrElse(afterRuleOnChildren, identity[LogicalPlan])
-        }
-      }
-    } else {
-      this
-    }
-  }
-
-  /**
-   * Recursively transforms the expressions of a tree, skipping nodes that have already
-   * been analyzed.
-   */
-  def resolveExpressions(r: PartialFunction[Expression, Expression]): LogicalPlan = {
-    this resolveOperators  {
-      case p => p.transformExpressions(r)
-    }
-  }
-
   override def verboseStringWithSuffix: String = {
     super.verboseString + statsCache.map(", " + _.toString).getOrElse("")
   }
@@ -296,6 +247,8 @@ abstract class UnaryNode extends LogicalPlan {
   protected def getAliasedConstraints(projectList: Seq[NamedExpression]): Set[Expression] = {
     var allConstraints = child.constraints.asInstanceOf[Set[Expression]]
     projectList.foreach {
+      case a @ Alias(l: Literal, _) =>
+        allConstraints += EqualTo(a.toAttribute, l)
       case a @ Alias(e, _) =>
         // For every alias in `projectList`, replace the reference in constraints by its attribute.
         allConstraints ++= allConstraints.map(_ transform {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
index b0f611fd38dea..9c0a30a47f839 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
@@ -98,7 +98,7 @@ trait QueryPlanConstraints { self: LogicalPlan =>
   // we may avoid producing recursive constraints.
   private lazy val aliasMap: AttributeMap[Expression] = AttributeMap(
     expressions.collect {
-      case a: Alias => (a.toAttribute, a.child)
+      case a: Alias if !a.child.isInstanceOf[Literal] => (a.toAttribute, a.child)
     } ++ children.flatMap(_.asInstanceOf[QueryPlanConstraints].aliasMap))
     // Note: the explicit cast is necessary, since Scala compiler fails to infer the type.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index c2750c3079814..a4fca790dd086 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -22,7 +22,8 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.statsEstimation._
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning,
+  RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 import org.apache.spark.util.random.RandomSampler
@@ -72,8 +73,13 @@ case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extend
  * their output.
  *
  * @param generator the generator expression
- * @param join  when true, each output row is implicitly joined with the input tuple that produced
- *              it.
+ * @param unrequiredChildIndex this paramter starts as Nil and gets filled by the Optimizer.
+ *                             It's used as an optimization for omitting data generation that will
+ *                             be discarded next by a projection.
+ *                             A common use case is when we explode(array(..)) and are interested
+ *                             only in the exploded data and not in the original array. before this
+ *                             optimization the array got duplicated for each of its elements,
+ *                             causing O(n^^2) memory consumption. (see [SPARK-21657])
  * @param outer when true, each input row will be output at least once, even if the output of the
  *              given `generator` is empty.
  * @param qualifier Qualifier for the attributes of generator(UDTF)
@@ -82,15 +88,17 @@ case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extend
  */
 case class Generate(
     generator: Generator,
-    join: Boolean,
+    unrequiredChildIndex: Seq[Int],
     outer: Boolean,
     qualifier: Option[String],
     generatorOutput: Seq[Attribute],
     child: LogicalPlan)
   extends UnaryNode {
 
-  /** The set of all attributes produced by this node. */
-  def generatedSet: AttributeSet = AttributeSet(generatorOutput)
+  lazy val requiredChildOutput: Seq[Attribute] = {
+    val unrequiredSet = unrequiredChildIndex.toSet
+    child.output.zipWithIndex.filterNot(t => unrequiredSet.contains(t._2)).map(_._1)
+  }
 
   override lazy val resolved: Boolean = {
     generator.resolved &&
@@ -113,9 +121,7 @@ case class Generate(
     nullableOutput
   }
 
-  def output: Seq[Attribute] = {
-    if (join) child.output ++ qualifiedGeneratorOutput else qualifiedGeneratorOutput
-  }
+  def output: Seq[Attribute] = requiredChildOutput ++ qualifiedGeneratorOutput
 }
 
 case class Filter(condition: Expression, child: LogicalPlan)
@@ -838,6 +844,27 @@ case class RepartitionByExpression(
 
   require(numPartitions > 0, s"Number of partitions ($numPartitions) must be positive.")
 
+  val partitioning: Partitioning = {
+    val (sortOrder, nonSortOrder) = partitionExpressions.partition(_.isInstanceOf[SortOrder])
+
+    require(sortOrder.isEmpty || nonSortOrder.isEmpty,
+      s"${getClass.getSimpleName} expects that either all its `partitionExpressions` are of type " +
+        "`SortOrder`, which means `RangePartitioning`, or none of them are `SortOrder`, which " +
+        "means `HashPartitioning`. In this case we have:" +
+      s"""
+         |SortOrder: $sortOrder
+         |NonSortOrder: $nonSortOrder
+       """.stripMargin)
+
+    if (sortOrder.nonEmpty) {
+      RangePartitioning(sortOrder.map(_.asInstanceOf[SortOrder]), numPartitions)
+    } else if (nonSortOrder.nonEmpty) {
+      HashPartitioning(nonSortOrder, numPartitions)
+    } else {
+      RoundRobinPartitioning(numPartitions)
+    }
+  }
+
   override def maxRows: Option[Long] = child.maxRows
   override def shuffle: Boolean = true
 }
@@ -861,3 +888,23 @@ case class Deduplicate(
 
   override def output: Seq[Attribute] = child.output
 }
+
+/**
+ * A logical plan for setting a barrier of analysis.
+ *
+ * The SQL Analyzer goes through a whole query plan even most part of it is analyzed. This
+ * increases the time spent on query analysis for long pipelines in ML, especially.
+ *
+ * This logical plan wraps an analyzed logical plan to prevent it from analysis again. The barrier
+ * is applied to the analyzed logical plan in Dataset. It won't change the output of wrapped
+ * logical plan and just acts as a wrapper to hide it from analyzer. New operations on the dataset
+ * will be put on the barrier, so only the new nodes created will be analyzed.
+ *
+ * This analysis barrier will be removed at the end of analysis stage.
+ */
+case class AnalysisBarrier(child: LogicalPlan) extends LeafNode {
+  override protected def innerChildren: Seq[LogicalPlan] = Seq(child)
+  override def output: Seq[Attribute] = child.output
+  override def isStreaming: Boolean = child.isStreaming
+  override def doCanonicalize(): LogicalPlan = child.canonicalized
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
index 9c34a9b7aa756..d793f77413d18 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
@@ -17,10 +17,11 @@
 
 package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
 
+import scala.collection.mutable.ArrayBuffer
 import scala.math.BigDecimal.RoundingMode
 
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap}
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LogicalPlan, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types.{DecimalType, _}
 
 
@@ -88,30 +89,296 @@ object EstimationUtils {
   }
 
   /**
-   * For simplicity we use Decimal to unify operations for data types whose min/max values can be
+   * For simplicity we use Double to unify operations for data types whose min/max values can be
    * represented as numbers, e.g. Boolean can be represented as 0 (false) or 1 (true).
    * The two methods below are the contract of conversion.
    */
-  def toDecimal(value: Any, dataType: DataType): Decimal = {
+  def toDouble(value: Any, dataType: DataType): Double = {
     dataType match {
-      case _: NumericType | DateType | TimestampType => Decimal(value.toString)
-      case BooleanType => if (value.asInstanceOf[Boolean]) Decimal(1) else Decimal(0)
+      case _: NumericType | DateType | TimestampType => value.toString.toDouble
+      case BooleanType => if (value.asInstanceOf[Boolean]) 1 else 0
     }
   }
 
-  def fromDecimal(dec: Decimal, dataType: DataType): Any = {
+  def fromDouble(double: Double, dataType: DataType): Any = {
     dataType match {
-      case BooleanType => dec.toLong == 1
-      case DateType => dec.toInt
-      case TimestampType => dec.toLong
-      case ByteType => dec.toByte
-      case ShortType => dec.toShort
-      case IntegerType => dec.toInt
-      case LongType => dec.toLong
-      case FloatType => dec.toFloat
-      case DoubleType => dec.toDouble
-      case _: DecimalType => dec
+      case BooleanType => double.toInt == 1
+      case DateType => double.toInt
+      case TimestampType => double.toLong
+      case ByteType => double.toByte
+      case ShortType => double.toShort
+      case IntegerType => double.toInt
+      case LongType => double.toLong
+      case FloatType => double.toFloat
+      case DoubleType => double
+      case _: DecimalType => Decimal(double)
     }
   }
 
+  /**
+   * Returns the index of the first bin into which the given value falls for a specified
+   * numeric equi-height histogram.
+   */
+  private def findFirstBinForValue(value: Double, bins: Array[HistogramBin]): Int = {
+    var i = 0
+    while ((i < bins.length) && (value > bins(i).hi)) {
+      i += 1
+    }
+    i
+  }
+
+  /**
+   * Returns the index of the last bin into which the given value falls for a specified
+   * numeric equi-height histogram.
+   */
+  private def findLastBinForValue(value: Double, bins: Array[HistogramBin]): Int = {
+    var i = bins.length - 1
+    while ((i >= 0) && (value < bins(i).lo)) {
+      i -= 1
+    }
+    i
+  }
+
+  /**
+   * Returns the possibility of the given histogram bin holding values within the given range
+   * [lowerBound, upperBound].
+   */
+  private def binHoldingRangePossibility(
+      upperBound: Double,
+      lowerBound: Double,
+      bin: HistogramBin): Double = {
+    assert(bin.lo <= lowerBound && lowerBound <= upperBound && upperBound <= bin.hi)
+    if (bin.hi == bin.lo) {
+      // the entire bin is covered in the range
+      1.0
+    } else if (upperBound == lowerBound) {
+      // set percentage to 1/NDV
+      1.0 / bin.ndv.toDouble
+    } else {
+      // Use proration since the range falls inside this bin.
+      math.min((upperBound - lowerBound) / (bin.hi - bin.lo), 1.0)
+    }
+  }
+
+  /**
+   * Returns the number of histogram bins holding values within the given range
+   * [lowerBound, upperBound].
+   *
+   * Note that the returned value is double type, because the range boundaries usually occupy a
+   * portion of a bin. An extreme case is [value, value] which is generated by equal predicate
+   * `col = value`, we can get higher accuracy by allowing returning portion of histogram bins.
+   *
+   * @param upperBound the highest value of the given range
+   * @param upperBoundInclusive whether the upperBound is included in the range
+   * @param lowerBound the lowest value of the given range
+   * @param lowerBoundInclusive whether the lowerBound is included in the range
+   * @param bins an array of bins for a given numeric equi-height histogram
+   */
+  def numBinsHoldingRange(
+      upperBound: Double,
+      upperBoundInclusive: Boolean,
+      lowerBound: Double,
+      lowerBoundInclusive: Boolean,
+      bins: Array[HistogramBin]): Double = {
+    assert(bins.head.lo <= lowerBound && lowerBound <= upperBound && upperBound <= bins.last.hi,
+      "Given range does not fit in the given histogram.")
+    assert(upperBound != lowerBound || upperBoundInclusive || lowerBoundInclusive,
+      s"'$lowerBound < value < $upperBound' is an invalid range.")
+
+    val upperBinIndex = if (upperBoundInclusive) {
+      findLastBinForValue(upperBound, bins)
+    } else {
+      findFirstBinForValue(upperBound, bins)
+    }
+    val lowerBinIndex = if (lowerBoundInclusive) {
+      findFirstBinForValue(lowerBound, bins)
+    } else {
+      findLastBinForValue(lowerBound, bins)
+    }
+    assert(lowerBinIndex <= upperBinIndex, "Invalid histogram data.")
+
+
+    if (lowerBinIndex == upperBinIndex) {
+      binHoldingRangePossibility(upperBound, lowerBound, bins(lowerBinIndex))
+    } else {
+      // Computes the occupied portion of bins of the upperBound and lowerBound.
+      val lowerBin = bins(lowerBinIndex)
+      val lowerPart = binHoldingRangePossibility(lowerBin.hi, lowerBound, lowerBin)
+
+      val higherBin = bins(upperBinIndex)
+      val higherPart = binHoldingRangePossibility(upperBound, higherBin.lo, higherBin)
+
+      // The total number of bins is lowerPart + higherPart + bins between them
+      lowerPart + higherPart + upperBinIndex - lowerBinIndex - 1
+    }
+  }
+
+  /**
+   * Returns overlapped ranges between two histograms, in the given value range
+   * [lowerBound, upperBound].
+   */
+  def getOverlappedRanges(
+    leftHistogram: Histogram,
+    rightHistogram: Histogram,
+    lowerBound: Double,
+    upperBound: Double): Seq[OverlappedRange] = {
+    val overlappedRanges = new ArrayBuffer[OverlappedRange]()
+    // Only bins whose range intersect [lowerBound, upperBound] have join possibility.
+    val leftBins = leftHistogram.bins
+      .filter(b => b.lo <= upperBound && b.hi >= lowerBound)
+    val rightBins = rightHistogram.bins
+      .filter(b => b.lo <= upperBound && b.hi >= lowerBound)
+
+    leftBins.foreach { lb =>
+      rightBins.foreach { rb =>
+        val (left, leftHeight) = trimBin(lb, leftHistogram.height, lowerBound, upperBound)
+        val (right, rightHeight) = trimBin(rb, rightHistogram.height, lowerBound, upperBound)
+        // Only collect overlapped ranges.
+        if (left.lo <= right.hi && left.hi >= right.lo) {
+          // Collect overlapped ranges.
+          val range = if (right.lo >= left.lo && right.hi >= left.hi) {
+            // Case1: the left bin is "smaller" than the right bin
+            //      left.lo            right.lo     left.hi          right.hi
+            // --------+------------------+------------+----------------+------->
+            if (left.hi == right.lo) {
+              // The overlapped range has only one value.
+              OverlappedRange(
+                lo = right.lo,
+                hi = right.lo,
+                leftNdv = 1,
+                rightNdv = 1,
+                leftNumRows = leftHeight / left.ndv,
+                rightNumRows = rightHeight / right.ndv
+              )
+            } else {
+              val leftRatio = (left.hi - right.lo) / (left.hi - left.lo)
+              val rightRatio = (left.hi - right.lo) / (right.hi - right.lo)
+              OverlappedRange(
+                lo = right.lo,
+                hi = left.hi,
+                leftNdv = left.ndv * leftRatio,
+                rightNdv = right.ndv * rightRatio,
+                leftNumRows = leftHeight * leftRatio,
+                rightNumRows = rightHeight * rightRatio
+              )
+            }
+          } else if (right.lo <= left.lo && right.hi <= left.hi) {
+            // Case2: the left bin is "larger" than the right bin
+            //      right.lo           left.lo      right.hi         left.hi
+            // --------+------------------+------------+----------------+------->
+            if (right.hi == left.lo) {
+              // The overlapped range has only one value.
+              OverlappedRange(
+                lo = right.hi,
+                hi = right.hi,
+                leftNdv = 1,
+                rightNdv = 1,
+                leftNumRows = leftHeight / left.ndv,
+                rightNumRows = rightHeight / right.ndv
+              )
+            } else {
+              val leftRatio = (right.hi - left.lo) / (left.hi - left.lo)
+              val rightRatio = (right.hi - left.lo) / (right.hi - right.lo)
+              OverlappedRange(
+                lo = left.lo,
+                hi = right.hi,
+                leftNdv = left.ndv * leftRatio,
+                rightNdv = right.ndv * rightRatio,
+                leftNumRows = leftHeight * leftRatio,
+                rightNumRows = rightHeight * rightRatio
+              )
+            }
+          } else if (right.lo >= left.lo && right.hi <= left.hi) {
+            // Case3: the left bin contains the right bin
+            //      left.lo            right.lo     right.hi         left.hi
+            // --------+------------------+------------+----------------+------->
+            val leftRatio = (right.hi - right.lo) / (left.hi - left.lo)
+            OverlappedRange(
+              lo = right.lo,
+              hi = right.hi,
+              leftNdv = left.ndv * leftRatio,
+              rightNdv = right.ndv,
+              leftNumRows = leftHeight * leftRatio,
+              rightNumRows = rightHeight
+            )
+          } else {
+            assert(right.lo <= left.lo && right.hi >= left.hi)
+            // Case4: the right bin contains the left bin
+            //      right.lo           left.lo      left.hi          right.hi
+            // --------+------------------+------------+----------------+------->
+            val rightRatio = (left.hi - left.lo) / (right.hi - right.lo)
+            OverlappedRange(
+              lo = left.lo,
+              hi = left.hi,
+              leftNdv = left.ndv,
+              rightNdv = right.ndv * rightRatio,
+              leftNumRows = leftHeight,
+              rightNumRows = rightHeight * rightRatio
+            )
+          }
+          overlappedRanges += range
+        }
+      }
+    }
+    overlappedRanges
+  }
+
+  /**
+   * Given an original bin and a value range [lowerBound, upperBound], returns the trimmed part
+   * of the bin in that range and its number of rows.
+   * @param bin the input histogram bin.
+   * @param height the number of rows of the given histogram bin inside an equi-height histogram.
+   * @param lowerBound lower bound of the given range.
+   * @param upperBound upper bound of the given range.
+   * @return trimmed part of the given bin and its number of rows.
+   */
+  def trimBin(bin: HistogramBin, height: Double, lowerBound: Double, upperBound: Double)
+  : (HistogramBin, Double) = {
+    val (lo, hi) = if (bin.lo <= lowerBound && bin.hi >= upperBound) {
+      //       bin.lo          lowerBound     upperBound      bin.hi
+      // --------+------------------+------------+-------------+------->
+      (lowerBound, upperBound)
+    } else if (bin.lo <= lowerBound && bin.hi >= lowerBound) {
+      //       bin.lo          lowerBound      bin.hi      upperBound
+      // --------+------------------+------------+-------------+------->
+      (lowerBound, bin.hi)
+    } else if (bin.lo <= upperBound && bin.hi >= upperBound) {
+      //    lowerBound            bin.lo     upperBound       bin.hi
+      // --------+------------------+------------+-------------+------->
+      (bin.lo, upperBound)
+    } else {
+      //    lowerBound            bin.lo        bin.hi     upperBound
+      // --------+------------------+------------+-------------+------->
+      assert(bin.lo >= lowerBound && bin.hi <= upperBound)
+      (bin.lo, bin.hi)
+    }
+
+    if (hi == lo) {
+      // Note that bin.hi == bin.lo also falls into this branch.
+      (HistogramBin(lo, hi, 1), height / bin.ndv)
+    } else {
+      assert(bin.hi != bin.lo)
+      val ratio = (hi - lo) / (bin.hi - bin.lo)
+      (HistogramBin(lo, hi, math.ceil(bin.ndv * ratio).toLong), height * ratio)
+    }
+  }
+
+  /**
+   * A join between two equi-height histograms may produce multiple overlapped ranges.
+   * Each overlapped range is produced by a part of one bin in the left histogram and a part of
+   * one bin in the right histogram.
+   * @param lo lower bound of this overlapped range.
+   * @param hi higher bound of this overlapped range.
+   * @param leftNdv ndv in the left part.
+   * @param rightNdv ndv in the right part.
+   * @param leftNumRows number of rows in the left part.
+   * @param rightNumRows number of rows in the right part.
+   */
+  case class OverlappedRange(
+    lo: Double,
+    hi: Double,
+    leftNdv: Double,
+    rightNdv: Double,
+    leftNumRows: Double,
+    rightNumRows: Double)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
index 74820eb97d081..4cc32de2d32d7 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Filter, LeafNode, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._
 import org.apache.spark.sql.types._
 
@@ -31,7 +31,7 @@ case class FilterEstimation(plan: Filter) extends Logging {
 
   private val childStats = plan.child.stats
 
-  private val colStatsMap = new ColumnStatsMap(childStats.attributeStats)
+  private val colStatsMap = ColumnStatsMap(childStats.attributeStats)
 
   /**
    * Returns an option of Statistics for a Filter logical plan node.
@@ -47,7 +47,7 @@ case class FilterEstimation(plan: Filter) extends Logging {
 
     // Estimate selectivity of this filter predicate, and update column stats if needed.
     // For not-supported condition, set filter selectivity to a conservative estimate 100%
-    val filterSelectivity = calculateFilterSelectivity(plan.condition).getOrElse(BigDecimal(1))
+    val filterSelectivity = calculateFilterSelectivity(plan.condition).getOrElse(1.0)
 
     val filteredRowCount: BigInt = ceil(BigDecimal(childStats.rowCount.get) * filterSelectivity)
     val newColStats = if (filteredRowCount == 0) {
@@ -79,17 +79,16 @@ case class FilterEstimation(plan: Filter) extends Logging {
    * @return an optional double value to show the percentage of rows meeting a given condition.
    *         It returns None if the condition is not supported.
    */
-  def calculateFilterSelectivity(condition: Expression, update: Boolean = true)
-    : Option[BigDecimal] = {
+  def calculateFilterSelectivity(condition: Expression, update: Boolean = true): Option[Double] = {
     condition match {
       case And(cond1, cond2) =>
-        val percent1 = calculateFilterSelectivity(cond1, update).getOrElse(BigDecimal(1))
-        val percent2 = calculateFilterSelectivity(cond2, update).getOrElse(BigDecimal(1))
+        val percent1 = calculateFilterSelectivity(cond1, update).getOrElse(1.0)
+        val percent2 = calculateFilterSelectivity(cond2, update).getOrElse(1.0)
         Some(percent1 * percent2)
 
       case Or(cond1, cond2) =>
-        val percent1 = calculateFilterSelectivity(cond1, update = false).getOrElse(BigDecimal(1))
-        val percent2 = calculateFilterSelectivity(cond2, update = false).getOrElse(BigDecimal(1))
+        val percent1 = calculateFilterSelectivity(cond1, update = false).getOrElse(1.0)
+        val percent2 = calculateFilterSelectivity(cond2, update = false).getOrElse(1.0)
         Some(percent1 + percent2 - (percent1 * percent2))
 
       // Not-operator pushdown
@@ -131,7 +130,7 @@ case class FilterEstimation(plan: Filter) extends Logging {
    * @return an optional double value to show the percentage of rows meeting a given condition.
    *         It returns None if the condition is not supported.
    */
-  def calculateSingleCondition(condition: Expression, update: Boolean): Option[BigDecimal] = {
+  def calculateSingleCondition(condition: Expression, update: Boolean): Option[Double] = {
     condition match {
       case l: Literal =>
         evaluateLiteral(l)
@@ -225,17 +224,17 @@ case class FilterEstimation(plan: Filter) extends Logging {
   def evaluateNullCheck(
       attr: Attribute,
       isNull: Boolean,
-      update: Boolean): Option[BigDecimal] = {
+      update: Boolean): Option[Double] = {
     if (!colStatsMap.contains(attr)) {
       logDebug("[CBO] No statistics for " + attr)
       return None
     }
     val colStat = colStatsMap(attr)
     val rowCountValue = childStats.rowCount.get
-    val nullPercent: BigDecimal = if (rowCountValue == 0) {
+    val nullPercent: Double = if (rowCountValue == 0) {
       0
     } else {
-      BigDecimal(colStat.nullCount) / BigDecimal(rowCountValue)
+      (BigDecimal(colStat.nullCount) / BigDecimal(rowCountValue)).toDouble
     }
 
     if (update) {
@@ -265,13 +264,13 @@ case class FilterEstimation(plan: Filter) extends Logging {
    * @param update a boolean flag to specify if we need to update ColumnStat of a given column
    *               for subsequent conditions
    * @return an optional double value to show the percentage of rows meeting a given condition
-    *         It returns None if no statistics exists for a given column or wrong value.
+   *         It returns None if no statistics exists for a given column or wrong value.
    */
   def evaluateBinary(
       op: BinaryComparison,
       attr: Attribute,
       literal: Literal,
-      update: Boolean): Option[BigDecimal] = {
+      update: Boolean): Option[Double] = {
     if (!colStatsMap.contains(attr)) {
       logDebug("[CBO] No statistics for " + attr)
       return None
@@ -305,13 +304,12 @@ case class FilterEstimation(plan: Filter) extends Logging {
   def evaluateEquality(
       attr: Attribute,
       literal: Literal,
-      update: Boolean): Option[BigDecimal] = {
+      update: Boolean): Option[Double] = {
     if (!colStatsMap.contains(attr)) {
       logDebug("[CBO] No statistics for " + attr)
       return None
     }
     val colStat = colStatsMap(attr)
-    val ndv = colStat.distinctCount
 
     // decide if the value is in [min, max] of the column.
     // We currently don't store min/max for binary/string type.
@@ -332,11 +330,16 @@ case class FilterEstimation(plan: Filter) extends Logging {
         colStatsMap.update(attr, newStats)
       }
 
-      Some(1.0 / BigDecimal(ndv))
-    } else {
+      if (colStat.histogram.isEmpty) {
+        // returns 1/ndv if there is no histogram
+        Some(1.0 / colStat.distinctCount.toDouble)
+      } else {
+        Some(computeEqualityPossibilityByHistogram(literal, colStat))
+      }
+
+    } else {  // not in interval
       Some(0.0)
     }
-
   }
 
   /**
@@ -349,7 +352,7 @@ case class FilterEstimation(plan: Filter) extends Logging {
    * @param literal a literal value (or constant)
    * @return an optional double value to show the percentage of rows meeting a given condition
    */
-  def evaluateLiteral(literal: Literal): Option[BigDecimal] = {
+  def evaluateLiteral(literal: Literal): Option[Double] = {
     literal match {
       case Literal(null, _) => Some(0.0)
       case FalseLiteral => Some(0.0)
@@ -374,7 +377,7 @@ case class FilterEstimation(plan: Filter) extends Logging {
   def evaluateInSet(
       attr: Attribute,
       hSet: Set[Any],
-      update: Boolean): Option[BigDecimal] = {
+      update: Boolean): Option[Double] = {
     if (!colStatsMap.contains(attr)) {
       logDebug("[CBO] No statistics for " + attr)
       return None
@@ -398,8 +401,8 @@ case class FilterEstimation(plan: Filter) extends Logging {
           return Some(0.0)
         }
 
-        val newMax = validQuerySet.maxBy(EstimationUtils.toDecimal(_, dataType))
-        val newMin = validQuerySet.minBy(EstimationUtils.toDecimal(_, dataType))
+        val newMax = validQuerySet.maxBy(EstimationUtils.toDouble(_, dataType))
+        val newMin = validQuerySet.minBy(EstimationUtils.toDouble(_, dataType))
         // newNdv should not be greater than the old ndv.  For example, column has only 2 values
         // 1 and 6. The predicate column IN (1, 2, 3, 4, 5). validQuerySet.size is 5.
         newNdv = ndv.min(BigInt(validQuerySet.size))
@@ -420,7 +423,7 @@ case class FilterEstimation(plan: Filter) extends Logging {
 
     // return the filter selectivity.  Without advanced statistics such as histograms,
     // we have to assume uniform distribution.
-    Some((BigDecimal(newNdv) / BigDecimal(ndv)).min(1.0))
+    Some(math.min(newNdv.toDouble / ndv.toDouble, 1.0))
   }
 
   /**
@@ -438,21 +441,17 @@ case class FilterEstimation(plan: Filter) extends Logging {
       op: BinaryComparison,
       attr: Attribute,
       literal: Literal,
-      update: Boolean): Option[BigDecimal] = {
+      update: Boolean): Option[Double] = {
 
     val colStat = colStatsMap(attr)
     val statsInterval =
       ValueInterval(colStat.min, colStat.max, attr.dataType).asInstanceOf[NumericValueInterval]
-    val max = statsInterval.max.toBigDecimal
-    val min = statsInterval.min.toBigDecimal
-    val ndv = BigDecimal(colStat.distinctCount)
+    val max = statsInterval.max
+    val min = statsInterval.min
+    val ndv = colStat.distinctCount.toDouble
 
     // determine the overlapping degree between predicate interval and column's interval
-    val numericLiteral = if (literal.dataType == BooleanType) {
-      if (literal.value.asInstanceOf[Boolean]) BigDecimal(1) else BigDecimal(0)
-    } else {
-      BigDecimal(literal.value.toString)
-    }
+    val numericLiteral = EstimationUtils.toDouble(literal.value, literal.dataType)
     val (noOverlap: Boolean, completeOverlap: Boolean) = op match {
       case _: LessThan =>
         (numericLiteral <= min, numericLiteral > max)
@@ -464,63 +463,65 @@ case class FilterEstimation(plan: Filter) extends Logging {
         (numericLiteral > max, numericLiteral <= min)
     }
 
-    var percent = BigDecimal(1)
+    var percent = 1.0
     if (noOverlap) {
       percent = 0.0
     } else if (completeOverlap) {
       percent = 1.0
     } else {
       // This is the partial overlap case:
-      // Without advanced statistics like histogram, we assume uniform data distribution.
-      // We just prorate the adjusted range over the initial range to compute filter selectivity.
-      assert(max > min)
-      percent = op match {
-        case _: LessThan =>
-          if (numericLiteral == max) {
-            // If the literal value is right on the boundary, we can minus the part of the
-            // boundary value (1/ndv).
-            1.0 - 1.0 / ndv
-          } else {
-            (numericLiteral - min) / (max - min)
-          }
-        case _: LessThanOrEqual =>
-          if (numericLiteral == min) {
-            // The boundary value is the only satisfying value.
-            1.0 / ndv
-          } else {
-            (numericLiteral - min) / (max - min)
-          }
-        case _: GreaterThan =>
-          if (numericLiteral == min) {
-            1.0 - 1.0 / ndv
-          } else {
-            (max - numericLiteral) / (max - min)
-          }
-        case _: GreaterThanOrEqual =>
-          if (numericLiteral == max) {
-            1.0 / ndv
-          } else {
-            (max - numericLiteral) / (max - min)
-          }
+
+      if (colStat.histogram.isEmpty) {
+        // Without advanced statistics like histogram, we assume uniform data distribution.
+        // We just prorate the adjusted range over the initial range to compute filter selectivity.
+        assert(max > min)
+        percent = op match {
+          case _: LessThan =>
+            if (numericLiteral == max) {
+              // If the literal value is right on the boundary, we can minus the part of the
+              // boundary value (1/ndv).
+              1.0 - 1.0 / ndv
+            } else {
+              (numericLiteral - min) / (max - min)
+            }
+          case _: LessThanOrEqual =>
+            if (numericLiteral == min) {
+              // The boundary value is the only satisfying value.
+              1.0 / ndv
+            } else {
+              (numericLiteral - min) / (max - min)
+            }
+          case _: GreaterThan =>
+            if (numericLiteral == min) {
+              1.0 - 1.0 / ndv
+            } else {
+              (max - numericLiteral) / (max - min)
+            }
+          case _: GreaterThanOrEqual =>
+            if (numericLiteral == max) {
+              1.0 / ndv
+            } else {
+              (max - numericLiteral) / (max - min)
+            }
+        }
+      } else {
+        percent = computeComparisonPossibilityByHistogram(op, literal, colStat)
       }
 
       if (update) {
         val newValue = Some(literal.value)
         var newMax = colStat.max
         var newMin = colStat.min
-        var newNdv = ceil(ndv * percent)
-        if (newNdv < 1) newNdv = 1
 
         op match {
           case _: GreaterThan | _: GreaterThanOrEqual =>
-            // If new ndv is 1, then new max must be equal to new min.
-            newMin = if (newNdv == 1) newMax else newValue
+            newMin = newValue
           case _: LessThan | _: LessThanOrEqual =>
-            newMax = if (newNdv == 1) newMin else newValue
+            newMax = newValue
         }
 
-        val newStats =
-          colStat.copy(distinctCount = newNdv, min = newMin, max = newMax, nullCount = 0)
+        val newStats = colStat.copy(distinctCount = ceil(ndv * percent),
+          min = newMin, max = newMax, nullCount = 0)
 
         colStatsMap.update(attr, newStats)
       }
@@ -529,6 +530,93 @@ case class FilterEstimation(plan: Filter) extends Logging {
     Some(percent)
   }
 
+  /**
+   * Computes the possibility of an equality predicate using histogram.
+   */
+  private def computeEqualityPossibilityByHistogram(
+      literal: Literal, colStat: ColumnStat): Double = {
+    val datum = EstimationUtils.toDouble(literal.value, literal.dataType)
+    val histogram = colStat.histogram.get
+
+    // find bins where column's current min and max locate.  Note that a column's [min, max]
+    // range may change due to another condition applied earlier.
+    val min = EstimationUtils.toDouble(colStat.min.get, literal.dataType)
+    val max = EstimationUtils.toDouble(colStat.max.get, literal.dataType)
+
+    // compute how many bins the column's current valid range [min, max] occupies.
+    val numBinsHoldingEntireRange = EstimationUtils.numBinsHoldingRange(
+      upperBound = max,
+      upperBoundInclusive = true,
+      lowerBound = min,
+      lowerBoundInclusive = true,
+      histogram.bins)
+
+    val numBinsHoldingDatum = EstimationUtils.numBinsHoldingRange(
+      upperBound = datum,
+      upperBoundInclusive = true,
+      lowerBound = datum,
+      lowerBoundInclusive = true,
+      histogram.bins)
+
+    numBinsHoldingDatum / numBinsHoldingEntireRange
+  }
+
+  /**
+   * Computes the possibility of a comparison predicate using histogram.
+   */
+  private def computeComparisonPossibilityByHistogram(
+      op: BinaryComparison, literal: Literal, colStat: ColumnStat): Double = {
+    val datum = EstimationUtils.toDouble(literal.value, literal.dataType)
+    val histogram = colStat.histogram.get
+
+    // find bins where column's current min and max locate.  Note that a column's [min, max]
+    // range may change due to another condition applied earlier.
+    val min = EstimationUtils.toDouble(colStat.min.get, literal.dataType)
+    val max = EstimationUtils.toDouble(colStat.max.get, literal.dataType)
+
+    // compute how many bins the column's current valid range [min, max] occupies.
+    val numBinsHoldingEntireRange = EstimationUtils.numBinsHoldingRange(
+      max, upperBoundInclusive = true, min, lowerBoundInclusive = true, histogram.bins)
+
+    val numBinsHoldingRange = op match {
+      // LessThan and LessThanOrEqual share the same logic, the only difference is whether to
+      // include the upperBound in the range.
+      case _: LessThan =>
+        EstimationUtils.numBinsHoldingRange(
+          upperBound = datum,
+          upperBoundInclusive = false,
+          lowerBound = min,
+          lowerBoundInclusive = true,
+          histogram.bins)
+      case _: LessThanOrEqual =>
+        EstimationUtils.numBinsHoldingRange(
+          upperBound = datum,
+          upperBoundInclusive = true,
+          lowerBound = min,
+          lowerBoundInclusive = true,
+          histogram.bins)
+
+      // GreaterThan and GreaterThanOrEqual share the same logic, the only difference is whether to
+      // include the lowerBound in the range.
+      case _: GreaterThan =>
+        EstimationUtils.numBinsHoldingRange(
+          upperBound = max,
+          upperBoundInclusive = true,
+          lowerBound = datum,
+          lowerBoundInclusive = false,
+          histogram.bins)
+      case _: GreaterThanOrEqual =>
+        EstimationUtils.numBinsHoldingRange(
+          upperBound = max,
+          upperBoundInclusive = true,
+          lowerBound = datum,
+          lowerBoundInclusive = true,
+          histogram.bins)
+    }
+
+    numBinsHoldingRange / numBinsHoldingEntireRange
+  }
+
   /**
    * Returns a percentage of rows meeting a binary comparison expression containing two columns.
    * In SQL queries, we also see predicate expressions involving two columns
@@ -547,7 +635,7 @@ case class FilterEstimation(plan: Filter) extends Logging {
       op: BinaryComparison,
       attrLeft: Attribute,
       attrRight: Attribute,
-      update: Boolean): Option[BigDecimal] = {
+      update: Boolean): Option[Double] = {
 
     if (!colStatsMap.contains(attrLeft)) {
       logDebug("[CBO] No statistics for " + attrLeft)
@@ -630,7 +718,7 @@ case class FilterEstimation(plan: Filter) extends Logging {
         )
     }
 
-    var percent = BigDecimal(1)
+    var percent = 1.0
     if (noOverlap) {
       percent = 0.0
     } else if (completeOverlap) {
@@ -644,11 +732,9 @@ case class FilterEstimation(plan: Filter) extends Logging {
         // Need to adjust new min/max after the filter condition is applied
 
         val ndvLeft = BigDecimal(colStatLeft.distinctCount)
-        var newNdvLeft = ceil(ndvLeft * percent)
-        if (newNdvLeft < 1) newNdvLeft = 1
+        val newNdvLeft = ceil(ndvLeft * percent)
         val ndvRight = BigDecimal(colStatRight.distinctCount)
-        var newNdvRight = ceil(ndvRight * percent)
-        if (newNdvRight < 1) newNdvRight = 1
+        val newNdvRight = ceil(ndvRight * percent)
 
         var newMaxLeft = colStatLeft.max
         var newMinLeft = colStatLeft.min
@@ -784,11 +870,16 @@ case class ColumnStatsMap(originalMap: AttributeMap[ColumnStat]) {
   def outputColumnStats(rowsBeforeFilter: BigInt, rowsAfterFilter: BigInt)
     : AttributeMap[ColumnStat] = {
     val newColumnStats = originalMap.map { case (attr, oriColStat) =>
-      // Update ndv based on the overall filter selectivity: scale down ndv if the number of rows
-      // decreases; otherwise keep it unchanged.
-      val newNdv = EstimationUtils.updateNdv(oldNumRows = rowsBeforeFilter,
-        newNumRows = rowsAfterFilter, oldNdv = oriColStat.distinctCount)
       val colStat = updatedMap.get(attr.exprId).map(_._2).getOrElse(oriColStat)
+      val newNdv = if (colStat.distinctCount > 1) {
+        // Update ndv based on the overall filter selectivity: scale down ndv if the number of rows
+        // decreases; otherwise keep it unchanged.
+        EstimationUtils.updateNdv(oldNumRows = rowsBeforeFilter,
+          newNumRows = rowsAfterFilter, oldNdv = oriColStat.distinctCount)
+      } else {
+        // no need to scale down since it is already down to 1 (for skewed distribution case)
+        colStat.distinctCount
+      }
       attr -> colStat.copy(distinctCount = newNdv)
     }
     AttributeMap(newColumnStats.toSeq)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
index b073108c26ee5..f0294a4246703 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
@@ -24,7 +24,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Join, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Histogram, Join, Statistics}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._
 
 
@@ -191,8 +191,19 @@ case class JoinEstimation(join: Join) extends Logging {
       val rInterval = ValueInterval(rightKeyStat.min, rightKeyStat.max, rightKey.dataType)
       if (ValueInterval.isIntersected(lInterval, rInterval)) {
         val (newMin, newMax) = ValueInterval.intersect(lInterval, rInterval, leftKey.dataType)
-        val (card, joinStat) = computeByNdv(leftKey, rightKey, newMin, newMax)
-        keyStatsAfterJoin += (leftKey -> joinStat, rightKey -> joinStat)
+        val (card, joinStat) = (leftKeyStat.histogram, rightKeyStat.histogram) match {
+          case (Some(l: Histogram), Some(r: Histogram)) =>
+            computeByHistogram(leftKey, rightKey, l, r, newMin, newMax)
+          case _ =>
+            computeByNdv(leftKey, rightKey, newMin, newMax)
+        }
+        keyStatsAfterJoin += (
+          // Histograms are propagated as unchanged. During future estimation, they should be
+          // truncated by the updated max/min. In this way, only pointers of the histograms are
+          // propagated and thus reduce memory consumption.
+          leftKey -> joinStat.copy(histogram = leftKeyStat.histogram),
+          rightKey -> joinStat.copy(histogram = rightKeyStat.histogram)
+        )
         // Return cardinality estimated from the most selective join keys.
         if (card < joinCard) joinCard = card
       } else {
@@ -225,6 +236,43 @@ case class JoinEstimation(join: Join) extends Logging {
     (ceil(card), newStats)
   }
 
+  /** Compute join cardinality using equi-height histograms. */
+  private def computeByHistogram(
+      leftKey: AttributeReference,
+      rightKey: AttributeReference,
+      leftHistogram: Histogram,
+      rightHistogram: Histogram,
+      newMin: Option[Any],
+      newMax: Option[Any]): (BigInt, ColumnStat) = {
+    val overlappedRanges = getOverlappedRanges(
+      leftHistogram = leftHistogram,
+      rightHistogram = rightHistogram,
+      // Only numeric values have equi-height histograms.
+      lowerBound = newMin.get.toString.toDouble,
+      upperBound = newMax.get.toString.toDouble)
+
+    var card: BigDecimal = 0
+    var totalNdv: Double = 0
+    for (i <- overlappedRanges.indices) {
+      val range = overlappedRanges(i)
+      if (i == 0 || range.hi != overlappedRanges(i - 1).hi) {
+        // If range.hi == overlappedRanges(i - 1).hi, that means the current range has only one
+        // value, and this value is already counted in the previous range. So there is no need to
+        // count it in this range.
+        totalNdv += math.min(range.leftNdv, range.rightNdv)
+      }
+      // Apply the formula in this overlapped range.
+      card += range.leftNumRows * range.rightNumRows / math.max(range.leftNdv, range.rightNdv)
+    }
+
+    val leftKeyStat = leftStats.attributeStats(leftKey)
+    val rightKeyStat = rightStats.attributeStats(rightKey)
+    val newMaxLen = math.min(leftKeyStat.maxLen, rightKeyStat.maxLen)
+    val newAvgLen = (leftKeyStat.avgLen + rightKeyStat.avgLen) / 2
+    val newStats = ColumnStat(ceil(totalNdv), newMin, newMax, 0, newAvgLen, newMaxLen)
+    (ceil(card), newStats)
+  }
+
   /**
    * Propagate or update column stats for output attributes.
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ValueInterval.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ValueInterval.scala
index 0caaf796a3b68..f46b4ed764e27 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ValueInterval.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ValueInterval.scala
@@ -26,10 +26,10 @@ trait ValueInterval {
   def contains(l: Literal): Boolean
 }
 
-/** For simplicity we use decimal to unify operations of numeric intervals. */
-case class NumericValueInterval(min: Decimal, max: Decimal) extends ValueInterval {
+/** For simplicity we use double to unify operations of numeric intervals. */
+case class NumericValueInterval(min: Double, max: Double) extends ValueInterval {
   override def contains(l: Literal): Boolean = {
-    val lit = EstimationUtils.toDecimal(l.value, l.dataType)
+    val lit = EstimationUtils.toDouble(l.value, l.dataType)
     min <= lit && max >= lit
   }
 }
@@ -56,8 +56,8 @@ object ValueInterval {
     case _ if min.isEmpty || max.isEmpty => new NullValueInterval()
     case _ =>
       NumericValueInterval(
-        min = EstimationUtils.toDecimal(min.get, dataType),
-        max = EstimationUtils.toDecimal(max.get, dataType))
+        min = EstimationUtils.toDouble(min.get, dataType),
+        max = EstimationUtils.toDouble(max.get, dataType))
   }
 
   def isIntersected(r1: ValueInterval, r2: ValueInterval): Boolean = (r1, r2) match {
@@ -84,8 +84,8 @@ object ValueInterval {
         // Choose the maximum of two min values, and the minimum of two max values.
         val newMin = if (n1.min <= n2.min) n2.min else n1.min
         val newMax = if (n1.max <= n2.max) n1.max else n2.max
-        (Some(EstimationUtils.fromDecimal(newMin, dt)),
-          Some(EstimationUtils.fromDecimal(newMax, dt)))
+        (Some(EstimationUtils.fromDouble(newMin, dt)),
+          Some(EstimationUtils.fromDouble(newMax, dt)))
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index e57c842ce2a36..0189bd73c56bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -30,18 +30,43 @@ import org.apache.spark.sql.types.{DataType, IntegerType}
  *  - Intra-partition ordering of data: In this case the distribution describes guarantees made
  *    about how tuples are distributed within a single partition.
  */
-sealed trait Distribution
+sealed trait Distribution {
+  /**
+   * The required number of partitions for this distribution. If it's None, then any number of
+   * partitions is allowed for this distribution.
+   */
+  def requiredNumPartitions: Option[Int]
+
+  /**
+   * Creates a default partitioning for this distribution, which can satisfy this distribution while
+   * matching the given number of partitions.
+   */
+  def createPartitioning(numPartitions: Int): Partitioning
+}
 
 /**
  * Represents a distribution where no promises are made about co-location of data.
  */
-case object UnspecifiedDistribution extends Distribution
+case object UnspecifiedDistribution extends Distribution {
+  override def requiredNumPartitions: Option[Int] = None
+
+  override def createPartitioning(numPartitions: Int): Partitioning = {
+    throw new IllegalStateException("UnspecifiedDistribution does not have default partitioning.")
+  }
+}
 
 /**
  * Represents a distribution that only has a single partition and all tuples of the dataset
  * are co-located.
  */
-case object AllTuples extends Distribution
+case object AllTuples extends Distribution {
+  override def requiredNumPartitions: Option[Int] = Some(1)
+
+  override def createPartitioning(numPartitions: Int): Partitioning = {
+    assert(numPartitions == 1, "The default partitioning of AllTuples can only have 1 partition.")
+    SinglePartition
+  }
+}
 
 /**
  * Represents data where tuples that share the same values for the `clustering`
@@ -51,12 +76,41 @@ case object AllTuples extends Distribution
  */
 case class ClusteredDistribution(
     clustering: Seq[Expression],
-    numPartitions: Option[Int] = None) extends Distribution {
+    requiredNumPartitions: Option[Int] = None) extends Distribution {
   require(
     clustering != Nil,
     "The clustering expressions of a ClusteredDistribution should not be Nil. " +
       "An AllTuples should be used to represent a distribution that only has " +
       "a single partition.")
+
+  override def createPartitioning(numPartitions: Int): Partitioning = {
+    assert(requiredNumPartitions.isEmpty || requiredNumPartitions.get == numPartitions,
+      s"This ClusteredDistribution requires ${requiredNumPartitions.get} partitions, but " +
+        s"the actual number of partitions is $numPartitions.")
+    HashPartitioning(clustering, numPartitions)
+  }
+}
+
+/**
+ * Represents data where tuples have been clustered according to the hash of the given
+ * `expressions`. The hash function is defined as `HashPartitioning.partitionIdExpression`, so only
+ * [[HashPartitioning]] can satisfy this distribution.
+ *
+ * This is a strictly stronger guarantee than [[ClusteredDistribution]]. Given a tuple and the
+ * number of partitions, this distribution strictly requires which partition the tuple should be in.
+ */
+case class HashClusteredDistribution(expressions: Seq[Expression]) extends Distribution {
+  require(
+    expressions != Nil,
+    "The expressions for hash of a HashPartitionedDistribution should not be Nil. " +
+      "An AllTuples should be used to represent a distribution that only has " +
+      "a single partition.")
+
+  override def requiredNumPartitions: Option[Int] = None
+
+  override def createPartitioning(numPartitions: Int): Partitioning = {
+    HashPartitioning(expressions, numPartitions)
+  }
 }
 
 /**
@@ -73,46 +127,31 @@ case class OrderedDistribution(ordering: Seq[SortOrder]) extends Distribution {
       "An AllTuples should be used to represent a distribution that only has " +
       "a single partition.")
 
-  // TODO: This is not really valid...
-  def clustering: Set[Expression] = ordering.map(_.child).toSet
+  override def requiredNumPartitions: Option[Int] = None
+
+  override def createPartitioning(numPartitions: Int): Partitioning = {
+    RangePartitioning(ordering, numPartitions)
+  }
 }
 
 /**
  * Represents data where tuples are broadcasted to every node. It is quite common that the
  * entire set of tuples is transformed into different data structure.
  */
-case class BroadcastDistribution(mode: BroadcastMode) extends Distribution
+case class BroadcastDistribution(mode: BroadcastMode) extends Distribution {
+  override def requiredNumPartitions: Option[Int] = Some(1)
+
+  override def createPartitioning(numPartitions: Int): Partitioning = {
+    assert(numPartitions == 1,
+      "The default partitioning of BroadcastDistribution can only have 1 partition.")
+    BroadcastPartitioning(mode)
+  }
+}
 
 /**
- * Describes how an operator's output is split across partitions. The `compatibleWith`,
- * `guarantees`, and `satisfies` methods describe relationships between child partitionings,
- * target partitionings, and [[Distribution]]s. These relations are described more precisely in
- * their individual method docs, but at a high level:
- *
- *  - `satisfies` is a relationship between partitionings and distributions.
- *  - `compatibleWith` is relationships between an operator's child output partitionings.
- *  - `guarantees` is a relationship between a child's existing output partitioning and a target
- *     output partitioning.
- *
- *  Diagrammatically:
- *
- *            +--------------+
- *            | Distribution |
- *            +--------------+
- *                    ^
- *                    |
- *               satisfies
- *                    |
- *            +--------------+                  +--------------+
- *            |    Child     |                  |    Target    |
- *       +----| Partitioning |----guarantees--->| Partitioning |
- *       |    +--------------+                  +--------------+
- *       |            ^
- *       |            |
- *       |     compatibleWith
- *       |            |
- *       +------------+
- *
+ * Describes how an operator's output is split across partitions. It has 2 major properties:
+ *   1. number of partitions.
+ *   2. if it can satisfy a given distribution.
  */
 sealed trait Partitioning {
   /** Returns the number of partitions that the data is split across */
@@ -123,113 +162,35 @@ sealed trait Partitioning {
    * to satisfy the partitioning scheme mandated by the `required` [[Distribution]],
    * i.e. the current dataset does not need to be re-partitioned for the `required`
    * Distribution (it is possible that tuples within a partition need to be reorganized).
-   */
-  def satisfies(required: Distribution): Boolean
-
-  /**
-   * Returns true iff we can say that the partitioning scheme of this [[Partitioning]]
-   * guarantees the same partitioning scheme described by `other`.
-   *
-   * Compatibility of partitionings is only checked for operators that have multiple children
-   * and that require a specific child output [[Distribution]], such as joins.
-   *
-   * Intuitively, partitionings are compatible if they route the same partitioning key to the same
-   * partition. For instance, two hash partitionings are only compatible if they produce the same
-   * number of output partitionings and hash records according to the same hash function and
-   * same partitioning key schema.
-   *
-   * Put another way, two partitionings are compatible with each other if they satisfy all of the
-   * same distribution guarantees.
-   */
-  def compatibleWith(other: Partitioning): Boolean
-
-  /**
-   * Returns true iff we can say that the partitioning scheme of this [[Partitioning]] guarantees
-   * the same partitioning scheme described by `other`. If a `A.guarantees(B)`, then repartitioning
-   * the child's output according to `B` will be unnecessary. `guarantees` is used as a performance
-   * optimization to allow the exchange planner to avoid redundant repartitionings. By default,
-   * a partitioning only guarantees partitionings that are equal to itself (i.e. the same number
-   * of partitions, same strategy (range or hash), etc).
-   *
-   * In order to enable more aggressive optimization, this strict equality check can be relaxed.
-   * For example, say that the planner needs to repartition all of an operator's children so that
-   * they satisfy the [[AllTuples]] distribution. One way to do this is to repartition all children
-   * to have the [[SinglePartition]] partitioning. If one of the operator's children already happens
-   * to be hash-partitioned with a single partition then we do not need to re-shuffle this child;
-   * this repartitioning can be avoided if a single-partition [[HashPartitioning]] `guarantees`
-   * [[SinglePartition]].
-   *
-   * The SinglePartition example given above is not particularly interesting; guarantees' real
-   * value occurs for more advanced partitioning strategies. SPARK-7871 will introduce a notion
-   * of null-safe partitionings, under which partitionings can specify whether rows whose
-   * partitioning keys contain null values will be grouped into the same partition or whether they
-   * will have an unknown / random distribution. If a partitioning does not require nulls to be
-   * clustered then a partitioning which _does_ cluster nulls will guarantee the null clustered
-   * partitioning. The converse is not true, however: a partitioning which clusters nulls cannot
-   * be guaranteed by one which does not cluster them. Thus, in general `guarantees` is not a
-   * symmetric relation.
    *
-   * Another way to think about `guarantees`: if `A.guarantees(B)`, then any partitioning of rows
-   * produced by `A` could have also been produced by `B`.
+   * By default a [[Partitioning]] can satisfy [[UnspecifiedDistribution]], and [[AllTuples]] if
+   * the [[Partitioning]] only have one partition. Implementations can overwrite this method with
+   * special logic.
    */
-  def guarantees(other: Partitioning): Boolean = this == other
-}
-
-object Partitioning {
-  def allCompatible(partitionings: Seq[Partitioning]): Boolean = {
-    // Note: this assumes transitivity
-    partitionings.sliding(2).map {
-      case Seq(a) => true
-      case Seq(a, b) =>
-        if (a.numPartitions != b.numPartitions) {
-          assert(!a.compatibleWith(b) && !b.compatibleWith(a))
-          false
-        } else {
-          a.compatibleWith(b) && b.compatibleWith(a)
-        }
-    }.forall(_ == true)
-  }
-}
-
-case class UnknownPartitioning(numPartitions: Int) extends Partitioning {
-  override def satisfies(required: Distribution): Boolean = required match {
+  def satisfies(required: Distribution): Boolean = required match {
     case UnspecifiedDistribution => true
+    case AllTuples => numPartitions == 1
     case _ => false
   }
-
-  override def compatibleWith(other: Partitioning): Boolean = false
-
-  override def guarantees(other: Partitioning): Boolean = false
 }
 
+case class UnknownPartitioning(numPartitions: Int) extends Partitioning
+
 /**
  * Represents a partitioning where rows are distributed evenly across output partitions
  * by starting from a random target partition number and distributing rows in a round-robin
  * fashion. This partitioning is used when implementing the DataFrame.repartition() operator.
  */
-case class RoundRobinPartitioning(numPartitions: Int) extends Partitioning {
-  override def satisfies(required: Distribution): Boolean = required match {
-    case UnspecifiedDistribution => true
-    case _ => false
-  }
-
-  override def compatibleWith(other: Partitioning): Boolean = false
-
-  override def guarantees(other: Partitioning): Boolean = false
-}
+case class RoundRobinPartitioning(numPartitions: Int) extends Partitioning
 
 case object SinglePartition extends Partitioning {
   val numPartitions = 1
 
   override def satisfies(required: Distribution): Boolean = required match {
     case _: BroadcastDistribution => false
-    case ClusteredDistribution(_, desiredPartitions) => desiredPartitions.forall(_ == 1)
+    case ClusteredDistribution(_, Some(requiredNumPartitions)) => requiredNumPartitions == 1
     case _ => true
   }
-
-  override def compatibleWith(other: Partitioning): Boolean = other.numPartitions == 1
-
-  override def guarantees(other: Partitioning): Boolean = other.numPartitions == 1
 }
 
 /**
@@ -244,22 +205,19 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
   override def nullable: Boolean = false
   override def dataType: DataType = IntegerType
 
-  override def satisfies(required: Distribution): Boolean = required match {
-    case UnspecifiedDistribution => true
-    case ClusteredDistribution(requiredClustering, desiredPartitions) =>
-      expressions.forall(x => requiredClustering.exists(_.semanticEquals(x))) &&
-        desiredPartitions.forall(_ == numPartitions) // if desiredPartitions = None, returns true
-    case _ => false
-  }
-
-  override def compatibleWith(other: Partitioning): Boolean = other match {
-    case o: HashPartitioning => this.semanticEquals(o)
-    case _ => false
-  }
-
-  override def guarantees(other: Partitioning): Boolean = other match {
-    case o: HashPartitioning => this.semanticEquals(o)
-    case _ => false
+  override def satisfies(required: Distribution): Boolean = {
+    super.satisfies(required) || {
+      required match {
+        case h: HashClusteredDistribution =>
+          expressions.length == h.expressions.length && expressions.zip(h.expressions).forall {
+            case (l, r) => l.semanticEquals(r)
+          }
+        case ClusteredDistribution(requiredClustering, requiredNumPartitions) =>
+          expressions.forall(x => requiredClustering.exists(_.semanticEquals(x))) &&
+            (requiredNumPartitions.isEmpty || requiredNumPartitions.get == numPartitions)
+        case _ => false
+      }
+    }
   }
 
   /**
@@ -288,25 +246,18 @@ case class RangePartitioning(ordering: Seq[SortOrder], numPartitions: Int)
   override def nullable: Boolean = false
   override def dataType: DataType = IntegerType
 
-  override def satisfies(required: Distribution): Boolean = required match {
-    case UnspecifiedDistribution => true
-    case OrderedDistribution(requiredOrdering) =>
-      val minSize = Seq(requiredOrdering.size, ordering.size).min
-      requiredOrdering.take(minSize) == ordering.take(minSize)
-    case ClusteredDistribution(requiredClustering, desiredPartitions) =>
-      ordering.map(_.child).forall(x => requiredClustering.exists(_.semanticEquals(x))) &&
-        desiredPartitions.forall(_ == numPartitions) // if desiredPartitions = None, returns true
-    case _ => false
-  }
-
-  override def compatibleWith(other: Partitioning): Boolean = other match {
-    case o: RangePartitioning => this.semanticEquals(o)
-    case _ => false
-  }
-
-  override def guarantees(other: Partitioning): Boolean = other match {
-    case o: RangePartitioning => this.semanticEquals(o)
-    case _ => false
+  override def satisfies(required: Distribution): Boolean = {
+    super.satisfies(required) || {
+      required match {
+        case OrderedDistribution(requiredOrdering) =>
+          val minSize = Seq(requiredOrdering.size, ordering.size).min
+          requiredOrdering.take(minSize) == ordering.take(minSize)
+        case ClusteredDistribution(requiredClustering, requiredNumPartitions) =>
+          ordering.map(_.child).forall(x => requiredClustering.exists(_.semanticEquals(x))) &&
+            (requiredNumPartitions.isEmpty || requiredNumPartitions.get == numPartitions)
+        case _ => false
+      }
+    }
   }
 }
 
@@ -347,20 +298,6 @@ case class PartitioningCollection(partitionings: Seq[Partitioning])
   override def satisfies(required: Distribution): Boolean =
     partitionings.exists(_.satisfies(required))
 
-  /**
-   * Returns true if any `partitioning` of this collection is compatible with
-   * the given [[Partitioning]].
-   */
-  override def compatibleWith(other: Partitioning): Boolean =
-    partitionings.exists(_.compatibleWith(other))
-
-  /**
-   * Returns true if any `partitioning` of this collection guarantees
-   * the given [[Partitioning]].
-   */
-  override def guarantees(other: Partitioning): Boolean =
-    partitionings.exists(_.guarantees(other))
-
   override def toString: String = {
     partitionings.map(_.toString).mkString("(", " or ", ")")
   }
@@ -377,9 +314,4 @@ case class BroadcastPartitioning(mode: BroadcastMode) extends Partitioning {
     case BroadcastDistribution(m) if m == mode => true
     case _ => false
   }
-
-  override def compatibleWith(other: Partitioning): Boolean = other match {
-    case BroadcastPartitioning(m) if m == mode => true
-    case _ => false
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 746c3e8950f7b..fa69b8af62c85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -45,7 +45,8 @@ object DateTimeUtils {
   // it's 2440587.5, rounding up to compatible with Hive
   final val JULIAN_DAY_OF_EPOCH = 2440588
   final val SECONDS_PER_DAY = 60 * 60 * 24L
-  final val MICROS_PER_SECOND = 1000L * 1000L
+  final val MICROS_PER_MILLIS = 1000L
+  final val MICROS_PER_SECOND = MICROS_PER_MILLIS * MILLIS_PER_SECOND
   final val MILLIS_PER_SECOND = 1000L
   final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L
   final val MICROS_PER_DAY = MICROS_PER_SECOND * SECONDS_PER_DAY
@@ -61,6 +62,7 @@ object DateTimeUtils {
   final val YearZero = -17999
   final val toYearZero = to2001 + 7304850
   final val TimeZoneGMT = TimeZone.getTimeZone("GMT")
+  final val TimeZoneUTC = TimeZone.getTimeZone("UTC")
   final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12)
 
   val TIMEZONE_OPTION = "timeZone"
@@ -908,6 +910,15 @@ object DateTimeUtils {
     math.round(diff * 1e8) / 1e8
   }
 
+  // Thursday = 0 since 1970/Jan/01 => Thursday
+  private val SUNDAY = 3
+  private val MONDAY = 4
+  private val TUESDAY = 5
+  private val WEDNESDAY = 6
+  private val THURSDAY = 0
+  private val FRIDAY = 1
+  private val SATURDAY = 2
+
   /*
    * Returns day of week from String. Starting from Thursday, marked as 0.
    * (Because 1970-01-01 is Thursday).
@@ -915,13 +926,13 @@ object DateTimeUtils {
   def getDayOfWeekFromString(string: UTF8String): Int = {
     val dowString = string.toString.toUpperCase(Locale.ROOT)
     dowString match {
-      case "SU" | "SUN" | "SUNDAY" => 3
-      case "MO" | "MON" | "MONDAY" => 4
-      case "TU" | "TUE" | "TUESDAY" => 5
-      case "WE" | "WED" | "WEDNESDAY" => 6
-      case "TH" | "THU" | "THURSDAY" => 0
-      case "FR" | "FRI" | "FRIDAY" => 1
-      case "SA" | "SAT" | "SATURDAY" => 2
+      case "SU" | "SUN" | "SUNDAY" => SUNDAY
+      case "MO" | "MON" | "MONDAY" => MONDAY
+      case "TU" | "TUE" | "TUESDAY" => TUESDAY
+      case "WE" | "WED" | "WEDNESDAY" => WEDNESDAY
+      case "TH" | "THU" | "THURSDAY" => THURSDAY
+      case "FR" | "FRI" | "FRIDAY" => FRIDAY
+      case "SA" | "SAT" | "SATURDAY" => SATURDAY
       case _ => -1
     }
   }
@@ -943,9 +954,16 @@ object DateTimeUtils {
     date + daysToMonthEnd
   }
 
-  private val TRUNC_TO_YEAR = 1
-  private val TRUNC_TO_MONTH = 2
-  private val TRUNC_INVALID = -1
+  // Visible for testing.
+  private[sql] val TRUNC_TO_YEAR = 1
+  private[sql] val TRUNC_TO_MONTH = 2
+  private[sql] val TRUNC_TO_QUARTER = 3
+  private[sql] val TRUNC_TO_WEEK = 4
+  private[sql] val TRUNC_TO_DAY = 5
+  private[sql] val TRUNC_TO_HOUR = 6
+  private[sql] val TRUNC_TO_MINUTE = 7
+  private[sql] val TRUNC_TO_SECOND = 8
+  private[sql] val TRUNC_INVALID = -1
 
   /**
    * Returns the trunc date from original date and trunc level.
@@ -963,7 +981,62 @@ object DateTimeUtils {
   }
 
   /**
-   * Returns the truncate level, could be TRUNC_YEAR, TRUNC_MONTH, or TRUNC_INVALID,
+   * Returns the trunc date time from original date time and trunc level.
+   * Trunc level should be generated using `parseTruncLevel()`, should be between 1 and 8
+   */
+  def truncTimestamp(t: SQLTimestamp, level: Int, timeZone: TimeZone): SQLTimestamp = {
+    var millis = t / MICROS_PER_MILLIS
+    val truncated = level match {
+      case TRUNC_TO_YEAR =>
+        val dDays = millisToDays(millis, timeZone)
+        daysToMillis(truncDate(dDays, level), timeZone)
+      case TRUNC_TO_MONTH =>
+        val dDays = millisToDays(millis, timeZone)
+        daysToMillis(truncDate(dDays, level), timeZone)
+      case TRUNC_TO_DAY =>
+        val offset = timeZone.getOffset(millis)
+        millis += offset
+        millis - millis % (MILLIS_PER_SECOND * SECONDS_PER_DAY) - offset
+      case TRUNC_TO_HOUR =>
+        val offset = timeZone.getOffset(millis)
+        millis += offset
+        millis - millis % (60 * 60 * MILLIS_PER_SECOND) - offset
+      case TRUNC_TO_MINUTE =>
+        millis - millis % (60 * MILLIS_PER_SECOND)
+      case TRUNC_TO_SECOND =>
+        millis - millis % MILLIS_PER_SECOND
+      case TRUNC_TO_WEEK =>
+        val dDays = millisToDays(millis, timeZone)
+        val prevMonday = getNextDateForDayOfWeek(dDays - 7, MONDAY)
+        daysToMillis(prevMonday, timeZone)
+      case TRUNC_TO_QUARTER =>
+        val dDays = millisToDays(millis, timeZone)
+        millis = daysToMillis(truncDate(dDays, TRUNC_TO_MONTH), timeZone)
+        val cal = Calendar.getInstance()
+        cal.setTimeInMillis(millis)
+        val quarter = getQuarter(dDays)
+        val month = quarter match {
+          case 1 => Calendar.JANUARY
+          case 2 => Calendar.APRIL
+          case 3 => Calendar.JULY
+          case 4 => Calendar.OCTOBER
+        }
+        cal.set(Calendar.MONTH, month)
+        cal.getTimeInMillis()
+      case _ =>
+        // caller make sure that this should never be reached
+        sys.error(s"Invalid trunc level: $level")
+    }
+    truncated * MICROS_PER_MILLIS
+  }
+
+  def truncTimestamp(d: SQLTimestamp, level: Int): SQLTimestamp = {
+    truncTimestamp(d, level, defaultTimeZone())
+  }
+
+  /**
+   * Returns the truncate level, could be TRUNC_YEAR, TRUNC_MONTH, TRUNC_TO_DAY, TRUNC_TO_HOUR,
+   * TRUNC_TO_MINUTE, TRUNC_TO_SECOND, TRUNC_TO_WEEK, TRUNC_TO_QUARTER or TRUNC_INVALID,
    * TRUNC_INVALID means unsupported truncate level.
    */
   def parseTruncLevel(format: UTF8String): Int = {
@@ -973,6 +1046,12 @@ object DateTimeUtils {
       format.toString.toUpperCase(Locale.ROOT) match {
         case "YEAR" | "YYYY" | "YY" => TRUNC_TO_YEAR
         case "MON" | "MONTH" | "MM" => TRUNC_TO_MONTH
+        case "DAY" | "DD" => TRUNC_TO_DAY
+        case "HOUR" => TRUNC_TO_HOUR
+        case "MINUTE" => TRUNC_TO_MINUTE
+        case "SECOND" => TRUNC_TO_SECOND
+        case "WEEK" => TRUNC_TO_WEEK
+        case "QUARTER" => TRUNC_TO_QUARTER
         case _ => TRUNC_INVALID
       }
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
index eb7941cf9e6af..b013add9c9778 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
@@ -105,7 +105,7 @@ class QuantileSummaries(
         if (newSamples.isEmpty || (sampleIdx == sampled.length && opsIdx == sorted.length - 1)) {
           0
         } else {
-          math.floor(2 * relativeError * currentCount).toInt
+          math.floor(2 * relativeError * currentCount).toLong
         }
 
       val tuple = Stats(currentSample, 1, delta)
@@ -192,10 +192,10 @@ class QuantileSummaries(
     }
 
     // Target rank
-    val rank = math.ceil(quantile * count).toInt
+    val rank = math.ceil(quantile * count).toLong
     val targetError = relativeError * count
     // Minimum rank at current sample
-    var minRank = 0
+    var minRank = 0L
     var i = 0
     while (i < sampled.length - 1) {
       val curSample = sampled(i)
@@ -235,7 +235,7 @@ object QuantileSummaries {
    * @param g the minimum rank jump from the previous value's minimum rank
    * @param delta the maximum span of the rank.
    */
-  case class Stats(value: Double, g: Int, delta: Int)
+  case class Stats(value: Double, g: Long, delta: Long)
 
   private def compressImmut(
       currentSamples: IndexedSeq[Stats],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 551cad70b3919..39387349755a5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -23,14 +23,17 @@ import java.util.concurrent.atomic.AtomicReference
 
 import scala.collection.JavaConverters._
 import scala.collection.immutable
+import scala.util.matching.Regex
 
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.{SparkContext, SparkEnv}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
+import org.apache.spark.util.Utils
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // This file defines the configuration options for Spark SQL.
@@ -69,7 +72,7 @@ object SQLConf {
    * Default config. Only used when there is no active SparkSession for the thread.
    * See [[get]] for more information.
    */
-  private val fallbackConf = new ThreadLocal[SQLConf] {
+  private lazy val fallbackConf = new ThreadLocal[SQLConf] {
     override def initialValue: SQLConf = new SQLConf
   }
 
@@ -246,7 +249,7 @@ object SQLConf {
   val CONSTRAINT_PROPAGATION_ENABLED = buildConf("spark.sql.constraintPropagation.enabled")
     .internal()
     .doc("When true, the query optimizer will infer and propagate data constraints in the query " +
-      "plan to optimize them. Constraint propagation can sometimes be computationally expensive" +
+      "plan to optimize them. Constraint propagation can sometimes be computationally expensive " +
       "for certain kinds of query plans (such as those with a large number of predicates and " +
       "aliases) which might negatively impact overall runtime.")
     .booleanConf
@@ -260,6 +263,15 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val FILE_COMRESSION_FACTOR = buildConf("spark.sql.sources.fileCompressionFactor")
+    .internal()
+    .doc("When estimating the output data size of a table scan, multiply the file size with this " +
+      "factor as the estimated data size, in case the data is compressed in the file and lead to" +
+      " a heavily underestimated result.")
+    .doubleConf
+    .checkValue(_ > 0, "the value of fileDataSizeFactor must be larger than 0")
+    .createWithDefault(1.0)
+
   val PARQUET_SCHEMA_MERGING_ENABLED = buildConf("spark.sql.parquet.mergeSchema")
     .doc("When true, the Parquet data source merges schemas collected from all data files, " +
          "otherwise the schema is picked from the summary file or a random data file " +
@@ -291,6 +303,13 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val PARQUET_INT96_TIMESTAMP_CONVERSION = buildConf("spark.sql.parquet.int96TimestampConversion")
+    .doc("This controls whether timestamp adjustments should be applied to INT96 data when " +
+      "converting to timestamps, for data written by Impala.  This is necessary because Impala " +
+      "stores INT96 data with a different timezone offset than Hive & Spark.")
+    .booleanConf
+    .createWithDefault(false)
+
   object ParquetOutputTimestampType extends Enumeration {
     val INT96, TIMESTAMP_MICROS, TIMESTAMP_MILLIS = Value
   }
@@ -321,11 +340,14 @@ object SQLConf {
     .createWithDefault(false)
 
   val PARQUET_COMPRESSION = buildConf("spark.sql.parquet.compression.codec")
-    .doc("Sets the compression codec use when writing Parquet files. Acceptable values include: " +
-      "uncompressed, snappy, gzip, lzo.")
+    .doc("Sets the compression codec used when writing Parquet files. If either `compression` or " +
+      "`parquet.compression` is specified in the table-specific options/properties, the " +
+      "precedence would be `compression`, `parquet.compression`, " +
+      "`spark.sql.parquet.compression.codec`. Acceptable values include: none, uncompressed, " +
+      "snappy, gzip, lzo.")
     .stringConf
     .transform(_.toLowerCase(Locale.ROOT))
-    .checkValues(Set("uncompressed", "snappy", "gzip", "lzo"))
+    .checkValues(Set("none", "uncompressed", "snappy", "gzip", "lzo"))
     .createWithDefault("snappy")
 
   val PARQUET_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.parquet.filterPushdown")
@@ -334,8 +356,8 @@ object SQLConf {
     .createWithDefault(true)
 
   val PARQUET_WRITE_LEGACY_FORMAT = buildConf("spark.sql.parquet.writeLegacyFormat")
-    .doc("Whether to follow Parquet's format specification when converting Parquet schema to " +
-      "Spark SQL schema and vice versa.")
+    .doc("Whether to be compatible with the legacy Parquet format adopted by Spark 1.4 and prior " +
+      "versions, when converting Parquet schema to Spark SQL schema and vice versa.")
     .booleanConf
     .createWithDefault(false)
 
@@ -367,13 +389,35 @@ object SQLConf {
       .createWithDefault(true)
 
   val ORC_COMPRESSION = buildConf("spark.sql.orc.compression.codec")
-    .doc("Sets the compression codec use when writing ORC files. Acceptable values include: " +
-      "none, uncompressed, snappy, zlib, lzo.")
+    .doc("Sets the compression codec used when writing ORC files. If either `compression` or " +
+      "`orc.compress` is specified in the table-specific options/properties, the precedence " +
+      "would be `compression`, `orc.compress`, `spark.sql.orc.compression.codec`." +
+      "Acceptable values include: none, uncompressed, snappy, zlib, lzo.")
     .stringConf
     .transform(_.toLowerCase(Locale.ROOT))
     .checkValues(Set("none", "uncompressed", "snappy", "zlib", "lzo"))
     .createWithDefault("snappy")
 
+  val ORC_IMPLEMENTATION = buildConf("spark.sql.orc.impl")
+    .doc("When native, use the native version of ORC support instead of the ORC library in Hive " +
+      "1.2.1. It is 'hive' by default prior to Spark 2.3.")
+    .internal()
+    .stringConf
+    .checkValues(Set("hive", "native"))
+    .createWithDefault("native")
+
+  val ORC_VECTORIZED_READER_ENABLED = buildConf("spark.sql.orc.enableVectorizedReader")
+    .doc("Enables vectorized orc decoding.")
+    .booleanConf
+    .createWithDefault(true)
+
+  val ORC_COPY_BATCH_TO_SPARK = buildConf("spark.sql.orc.copyBatchToSpark")
+    .doc("Whether or not to copy the ORC columnar batch to Spark columnar batch in the " +
+      "vectorized ORC reader.")
+    .internal()
+    .booleanConf
+    .createWithDefault(false)
+
   val ORC_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.orc.filterPushdown")
     .doc("When true, enable filter pushdown for ORC files.")
     .booleanConf
@@ -1031,6 +1075,60 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val SQL_STRING_REDACTION_PATTERN =
+    ConfigBuilder("spark.sql.redaction.string.regex")
+      .doc("Regex to decide which parts of strings produced by Spark contain sensitive " +
+        "information. When this regex matches a string part, that string part is replaced by a " +
+        "dummy value. This is currently used to redact the output of SQL explain commands. " +
+        "When this conf is not set, the value from `spark.redaction.string.regex` is used.")
+      .fallbackConf(org.apache.spark.internal.config.STRING_REDACTION_PATTERN)
+
+  val CONCAT_BINARY_AS_STRING = buildConf("spark.sql.function.concatBinaryAsString")
+    .doc("When this option is set to false and all inputs are binary, `functions.concat` returns " +
+      "an output as binary. Otherwise, it returns as a string. ")
+    .booleanConf
+    .createWithDefault(false)
+
+  val ELT_OUTPUT_AS_STRING = buildConf("spark.sql.function.eltOutputAsString")
+    .doc("When this option is set to false and all inputs are binary, `elt` returns " +
+      "an output as binary. Otherwise, it returns as a string. ")
+    .booleanConf
+    .createWithDefault(false)
+
+  val CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE =
+    buildConf("spark.sql.streaming.continuous.executorQueueSize")
+    .internal()
+    .doc("The size (measured in number of rows) of the queue used in continuous execution to" +
+      " buffer the results of a ContinuousDataReader.")
+    .intConf
+    .createWithDefault(1024)
+
+  val CONTINUOUS_STREAMING_EXECUTOR_POLL_INTERVAL_MS =
+    buildConf("spark.sql.streaming.continuous.executorPollIntervalMs")
+      .internal()
+      .doc("The interval at which continuous execution readers will poll to check whether" +
+        " the epoch has advanced on the driver.")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefault(100)
+
+  object PartitionOverwriteMode extends Enumeration {
+    val STATIC, DYNAMIC = Value
+  }
+
+  val PARTITION_OVERWRITE_MODE =
+    buildConf("spark.sql.sources.partitionOverwriteMode")
+      .doc("When INSERT OVERWRITE a partitioned data source table, we currently support 2 modes: " +
+        "static and dynamic. In static mode, Spark deletes all the partitions that match the " +
+        "partition specification(e.g. PARTITION(a=1,b)) in the INSERT statement, before " +
+        "overwriting. In dynamic mode, Spark doesn't delete partitions ahead, and only overwrite " +
+        "those partitions that have data written into it at runtime. By default we use static " +
+        "mode to keep the same behavior of Spark prior to 2.3. Note that this config doesn't " +
+        "affect Hive serde tables, as they are always overwritten with dynamic mode.")
+      .stringConf
+      .transform(_.toUpperCase(Locale.ROOT))
+      .checkValues(PartitionOverwriteMode.values.map(_.toString))
+      .createWithDefault(PartitionOverwriteMode.STATIC.toString)
+
   object Deprecated {
     val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
   }
@@ -1052,6 +1150,12 @@ object SQLConf {
 class SQLConf extends Serializable with Logging {
   import SQLConf._
 
+  if (Utils.isTesting && SparkEnv.get != null) {
+    // assert that we're only accessing it on the driver.
+    assert(SparkEnv.get.executorId == SparkContext.DRIVER_IDENTIFIER,
+      "SQLConf should only be created and accessed on the driver.")
+  }
+
   /** Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap. */
   @transient protected[spark] val settings = java.util.Collections.synchronizedMap(
     new java.util.HashMap[String, String]())
@@ -1111,6 +1215,8 @@ class SQLConf extends Serializable with Logging {
 
   def orcCompressionCodec: String = getConf(ORC_COMPRESSION)
 
+  def orcVectorizedReaderEnabled: Boolean = getConf(ORC_VECTORIZED_READER_ENABLED)
+
   def parquetCompressionCodec: String = getConf(PARQUET_COMPRESSION)
 
   def parquetVectorizedReaderEnabled: Boolean = getConf(PARQUET_VECTORIZED_READER_ENABLED)
@@ -1171,6 +1277,10 @@ class SQLConf extends Serializable with Logging {
 
   def escapedStringLiterals: Boolean = getConf(ESCAPED_STRING_LITERALS)
 
+  def fileCompressionFactor: Double = getConf(FILE_COMRESSION_FACTOR)
+
+  def stringRedationPattern: Option[Regex] = SQL_STRING_REDACTION_PATTERN.readFrom(reader)
+
   /**
    * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
    * identifiers are equal.
@@ -1211,6 +1321,8 @@ class SQLConf extends Serializable with Logging {
 
   def isParquetINT96AsTimestamp: Boolean = getConf(PARQUET_INT96_AS_TIMESTAMP)
 
+  def isParquetINT96TimestampConversion: Boolean = getConf(PARQUET_INT96_TIMESTAMP_CONVERSION)
+
   def isParquetINT64AsTimestampMillis: Boolean = getConf(PARQUET_INT64_AS_TIMESTAMP_MILLIS)
 
   def parquetOutputTimestampType: ParquetOutputTimestampType.Value = {
@@ -1352,6 +1464,18 @@ class SQLConf extends Serializable with Logging {
 
   def replaceExceptWithFilter: Boolean = getConf(REPLACE_EXCEPT_WITH_FILTER)
 
+  def continuousStreamingExecutorQueueSize: Int = getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE)
+
+  def continuousStreamingExecutorPollIntervalMs: Long =
+    getConf(CONTINUOUS_STREAMING_EXECUTOR_POLL_INTERVAL_MS)
+
+  def concatBinaryAsString: Boolean = getConf(CONCAT_BINARY_AS_STRING)
+
+  def eltOutputAsString: Boolean = getConf(ELT_OUTPUT_AS_STRING)
+
+  def partitionOverwriteMode: PartitionOverwriteMode.Value =
+    PartitionOverwriteMode.withName(getConf(PARTITION_OVERWRITE_MODE))
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
@@ -1385,7 +1509,7 @@ class SQLConf extends Serializable with Logging {
     Option(settings.get(key)).
       orElse {
         // Try to use the default value
-        Option(sqlConfEntries.get(key)).map(_.defaultValueString)
+        Option(sqlConfEntries.get(key)).map { e => e.stringConverter(e.readFrom(reader)) }
       }.
       getOrElse(throw new NoSuchElementException(key))
   }
@@ -1423,14 +1547,21 @@ class SQLConf extends Serializable with Logging {
    * not set yet, return `defaultValue`.
    */
   def getConfString(key: String, defaultValue: String): String = {
-    if (defaultValue != null && defaultValue != "<undefined>") {
+    if (defaultValue != null && defaultValue != ConfigEntry.UNDEFINED) {
       val entry = sqlConfEntries.get(key)
       if (entry != null) {
         // Only verify configs in the SQLConf object
         entry.valueConverter(defaultValue)
       }
     }
-    Option(settings.get(key)).getOrElse(defaultValue)
+    Option(settings.get(key)).getOrElse {
+      // If the key is not set, need to check whether the config entry is registered and is
+      // a fallback conf, so that we can check its parent.
+      sqlConfEntries.get(key) match {
+        case e: FallbackConfigEntry[_] => getConfString(e.fallback.key, defaultValue)
+        case _ => defaultValue
+      }
+    }
   }
 
   /**
@@ -1446,7 +1577,8 @@ class SQLConf extends Serializable with Logging {
    */
   def getAllDefinedConfs: Seq[(String, String, String)] = sqlConfEntries.synchronized {
     sqlConfEntries.values.asScala.filter(_.isPublic).map { entry =>
-      (entry.key, getConfString(entry.key, entry.defaultValueString), entry.doc)
+      val displayValue = Option(getConfString(entry.key, null)).getOrElse(entry.defaultValueString)
+      (entry.key, displayValue, entry.doc)
     }.toSeq
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index af2bb44332dd4..a7f594837d3cf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -99,4 +99,11 @@ object StaticSQLConf {
     .stringConf
     .toSequence
     .createOptional
+
+  val UI_RETAINED_EXECUTIONS =
+    buildStaticConf("spark.sql.ui.retainedExecutions")
+      .doc("Number of executions to retain in the Spark UI.")
+      .intConf
+      .createWithDefault(1000)
+
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/PartitioningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/PartitioningSuite.scala
deleted file mode 100644
index 5b802ccc637dd..0000000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/PartitioningSuite.scala
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.expressions.{InterpretedMutableProjection, Literal}
-import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, HashPartitioning}
-
-class PartitioningSuite extends SparkFunSuite {
-  test("HashPartitioning compatibility should be sensitive to expression ordering (SPARK-9785)") {
-    val expressions = Seq(Literal(2), Literal(3))
-    // Consider two HashPartitionings that have the same _set_ of hash expressions but which are
-    // created with different orderings of those expressions:
-    val partitioningA = HashPartitioning(expressions, 100)
-    val partitioningB = HashPartitioning(expressions.reverse, 100)
-    // These partitionings are not considered equal:
-    assert(partitioningA != partitioningB)
-    // However, they both satisfy the same clustered distribution:
-    val distribution = ClusteredDistribution(expressions)
-    assert(partitioningA.satisfies(distribution))
-    assert(partitioningB.satisfies(distribution))
-    // These partitionings compute different hashcodes for the same input row:
-    def computeHashCode(partitioning: HashPartitioning): Int = {
-      val hashExprProj = new InterpretedMutableProjection(partitioning.expressions, Seq.empty)
-      hashExprProj.apply(InternalRow.empty).hashCode()
-    }
-    assert(computeHashCode(partitioningA) != computeHashCode(partitioningB))
-    // Thus, these partitionings are incompatible:
-    assert(!partitioningA.compatibleWith(partitioningB))
-    assert(!partitioningB.compatibleWith(partitioningA))
-    assert(!partitioningA.guarantees(partitioningB))
-    assert(!partitioningB.guarantees(partitioningA))
-
-    // Just to be sure that we haven't cheated by having these methods always return false,
-    // check that identical partitionings are still compatible with and guarantee each other:
-    assert(partitioningA === partitioningA)
-    assert(partitioningA.guarantees(partitioningA))
-    assert(partitioningA.compatibleWith(partitioningA))
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index 23e866cdf4917..8c3db48a01f12 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -356,4 +356,13 @@ class ScalaReflectionSuite extends SparkFunSuite {
     assert(deserializerFor[Int].isInstanceOf[AssertNotNull])
     assert(!deserializerFor[String].isInstanceOf[AssertNotNull])
   }
+
+  test("SPARK-23025: schemaFor should support Null type") {
+    val schema = schemaFor[(Int, Null)]
+    assert(schema === Schema(
+      StructType(Seq(
+        StructField("_1", IntegerType, nullable = false),
+        StructField("_2", NullType, nullable = true))),
+      nullable = true))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index e56a5d6368318..f4514205d3ae0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -27,6 +27,8 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.{Cross, Inner}
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning,
+  RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.types._
 
 
@@ -514,4 +516,45 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       Seq("Number of column aliases does not match number of columns. " +
         "Number of column aliases: 5; number of columns: 4."))
   }
+
+  test("SPARK-22614 RepartitionByExpression partitioning") {
+    def checkPartitioning[T <: Partitioning](numPartitions: Int, exprs: Expression*): Unit = {
+      val partitioning = RepartitionByExpression(exprs, testRelation2, numPartitions).partitioning
+      assert(partitioning.isInstanceOf[T])
+    }
+
+    checkPartitioning[HashPartitioning](numPartitions = 10, exprs = Literal(20))
+    checkPartitioning[HashPartitioning](numPartitions = 10, exprs = 'a.attr, 'b.attr)
+
+    checkPartitioning[RangePartitioning](numPartitions = 10,
+      exprs = SortOrder(Literal(10), Ascending))
+    checkPartitioning[RangePartitioning](numPartitions = 10,
+      exprs = SortOrder('a.attr, Ascending), SortOrder('b.attr, Descending))
+
+    checkPartitioning[RoundRobinPartitioning](numPartitions = 10, exprs = Seq.empty: _*)
+
+    intercept[IllegalArgumentException] {
+      checkPartitioning(numPartitions = 0, exprs = Literal(20))
+    }
+    intercept[IllegalArgumentException] {
+      checkPartitioning(numPartitions = -1, exprs = Literal(20))
+    }
+    intercept[IllegalArgumentException] {
+      checkPartitioning(numPartitions = 10, exprs = SortOrder('a.attr, Ascending), 'b.attr)
+    }
+  }
+
+  test("SPARK-20392: analysis barrier") {
+    // [[AnalysisBarrier]] will be removed after analysis
+    checkAnalysis(
+      Project(Seq(UnresolvedAttribute("tbl.a")),
+        AnalysisBarrier(SubqueryAlias("tbl", testRelation))),
+      Project(testRelation.output, SubqueryAlias("tbl", testRelation)))
+
+    // Verify we won't go through a plan wrapped in a barrier.
+    // Since we wrap an unresolved plan and analyzer won't go through it. It remains unresolved.
+    val barrier = AnalysisBarrier(Project(Seq(UnresolvedAttribute("tbl.b")),
+      SubqueryAlias("tbl", testRelation)))
+    assertAnalysisError(barrier, Seq("cannot resolve '`tbl.b`'"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 5dcd653e9b341..52a7ebdafd7c7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -869,6 +869,114 @@ class TypeCoercionSuite extends AnalysisTest {
         Literal.create(null, IntegerType), Literal.create(null, StringType))))
   }
 
+  test("type coercion for Concat") {
+    val rule = TypeCoercion.ConcatCoercion(conf)
+
+    ruleTest(rule,
+      Concat(Seq(Literal("ab"), Literal("cde"))),
+      Concat(Seq(Literal("ab"), Literal("cde"))))
+    ruleTest(rule,
+      Concat(Seq(Literal(null), Literal("abc"))),
+      Concat(Seq(Cast(Literal(null), StringType), Literal("abc"))))
+    ruleTest(rule,
+      Concat(Seq(Literal(1), Literal("234"))),
+      Concat(Seq(Cast(Literal(1), StringType), Literal("234"))))
+    ruleTest(rule,
+      Concat(Seq(Literal("1"), Literal("234".getBytes()))),
+      Concat(Seq(Literal("1"), Cast(Literal("234".getBytes()), StringType))))
+    ruleTest(rule,
+      Concat(Seq(Literal(1L), Literal(2.toByte), Literal(0.1))),
+      Concat(Seq(Cast(Literal(1L), StringType), Cast(Literal(2.toByte), StringType),
+        Cast(Literal(0.1), StringType))))
+    ruleTest(rule,
+      Concat(Seq(Literal(true), Literal(0.1f), Literal(3.toShort))),
+      Concat(Seq(Cast(Literal(true), StringType), Cast(Literal(0.1f), StringType),
+        Cast(Literal(3.toShort), StringType))))
+    ruleTest(rule,
+      Concat(Seq(Literal(1L), Literal(0.1))),
+      Concat(Seq(Cast(Literal(1L), StringType), Cast(Literal(0.1), StringType))))
+    ruleTest(rule,
+      Concat(Seq(Literal(Decimal(10)))),
+      Concat(Seq(Cast(Literal(Decimal(10)), StringType))))
+    ruleTest(rule,
+      Concat(Seq(Literal(BigDecimal.valueOf(10)))),
+      Concat(Seq(Cast(Literal(BigDecimal.valueOf(10)), StringType))))
+    ruleTest(rule,
+      Concat(Seq(Literal(java.math.BigDecimal.valueOf(10)))),
+      Concat(Seq(Cast(Literal(java.math.BigDecimal.valueOf(10)), StringType))))
+    ruleTest(rule,
+      Concat(Seq(Literal(new java.sql.Date(0)), Literal(new Timestamp(0)))),
+      Concat(Seq(Cast(Literal(new java.sql.Date(0)), StringType),
+        Cast(Literal(new Timestamp(0)), StringType))))
+
+    withSQLConf("spark.sql.function.concatBinaryAsString" -> "true") {
+      ruleTest(rule,
+        Concat(Seq(Literal("123".getBytes), Literal("456".getBytes))),
+        Concat(Seq(Cast(Literal("123".getBytes), StringType),
+          Cast(Literal("456".getBytes), StringType))))
+    }
+
+    withSQLConf("spark.sql.function.concatBinaryAsString" -> "false") {
+      ruleTest(rule,
+        Concat(Seq(Literal("123".getBytes), Literal("456".getBytes))),
+        Concat(Seq(Literal("123".getBytes), Literal("456".getBytes))))
+    }
+  }
+
+  test("type coercion for Elt") {
+    val rule = TypeCoercion.EltCoercion(conf)
+
+    ruleTest(rule,
+      Elt(Seq(Literal(1), Literal("ab"), Literal("cde"))),
+      Elt(Seq(Literal(1), Literal("ab"), Literal("cde"))))
+    ruleTest(rule,
+      Elt(Seq(Literal(1.toShort), Literal("ab"), Literal("cde"))),
+      Elt(Seq(Cast(Literal(1.toShort), IntegerType), Literal("ab"), Literal("cde"))))
+    ruleTest(rule,
+      Elt(Seq(Literal(2), Literal(null), Literal("abc"))),
+      Elt(Seq(Literal(2), Cast(Literal(null), StringType), Literal("abc"))))
+    ruleTest(rule,
+      Elt(Seq(Literal(2), Literal(1), Literal("234"))),
+      Elt(Seq(Literal(2), Cast(Literal(1), StringType), Literal("234"))))
+    ruleTest(rule,
+      Elt(Seq(Literal(3), Literal(1L), Literal(2.toByte), Literal(0.1))),
+      Elt(Seq(Literal(3), Cast(Literal(1L), StringType), Cast(Literal(2.toByte), StringType),
+        Cast(Literal(0.1), StringType))))
+    ruleTest(rule,
+      Elt(Seq(Literal(2), Literal(true), Literal(0.1f), Literal(3.toShort))),
+      Elt(Seq(Literal(2), Cast(Literal(true), StringType), Cast(Literal(0.1f), StringType),
+        Cast(Literal(3.toShort), StringType))))
+    ruleTest(rule,
+      Elt(Seq(Literal(1), Literal(1L), Literal(0.1))),
+      Elt(Seq(Literal(1), Cast(Literal(1L), StringType), Cast(Literal(0.1), StringType))))
+    ruleTest(rule,
+      Elt(Seq(Literal(1), Literal(Decimal(10)))),
+      Elt(Seq(Literal(1), Cast(Literal(Decimal(10)), StringType))))
+    ruleTest(rule,
+      Elt(Seq(Literal(1), Literal(BigDecimal.valueOf(10)))),
+      Elt(Seq(Literal(1), Cast(Literal(BigDecimal.valueOf(10)), StringType))))
+    ruleTest(rule,
+      Elt(Seq(Literal(1), Literal(java.math.BigDecimal.valueOf(10)))),
+      Elt(Seq(Literal(1), Cast(Literal(java.math.BigDecimal.valueOf(10)), StringType))))
+    ruleTest(rule,
+      Elt(Seq(Literal(2), Literal(new java.sql.Date(0)), Literal(new Timestamp(0)))),
+      Elt(Seq(Literal(2), Cast(Literal(new java.sql.Date(0)), StringType),
+        Cast(Literal(new Timestamp(0)), StringType))))
+
+    withSQLConf("spark.sql.function.eltOutputAsString" -> "true") {
+      ruleTest(rule,
+        Elt(Seq(Literal(1), Literal("123".getBytes), Literal("456".getBytes))),
+        Elt(Seq(Literal(1), Cast(Literal("123".getBytes), StringType),
+          Cast(Literal("456".getBytes), StringType))))
+    }
+
+    withSQLConf("spark.sql.function.eltOutputAsString" -> "false") {
+      ruleTest(rule,
+        Elt(Seq(Literal(1), Literal("123".getBytes), Literal("456".getBytes))),
+        Elt(Seq(Literal(1), Literal("123".getBytes), Literal("456".getBytes))))
+    }
+  }
+
   test("BooleanEquality type cast") {
     val be = TypeCoercion.BooleanEquality
     // Use something more than a literal to avoid triggering the simplification rules.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 95c87ffa20cb7..6abab0073cca3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -279,7 +279,7 @@ abstract class SessionCatalogSuite extends AnalysisTest {
     }
   }
 
-  test("create temp table") {
+  test("create temp view") {
     withBasicCatalog { catalog =>
       val tempTable1 = Range(1, 10, 1, 10)
       val tempTable2 = Range(1, 20, 2, 10)
@@ -288,11 +288,11 @@ abstract class SessionCatalogSuite extends AnalysisTest {
       assert(catalog.getTempView("tbl1") == Option(tempTable1))
       assert(catalog.getTempView("tbl2") == Option(tempTable2))
       assert(catalog.getTempView("tbl3").isEmpty)
-      // Temporary table already exists
+      // Temporary view already exists
       intercept[TempTableAlreadyExistsException] {
         catalog.createTempView("tbl1", tempTable1, overrideIfExists = false)
       }
-      // Temporary table already exists but we override it
+      // Temporary view already exists but we override it
       catalog.createTempView("tbl1", tempTable2, overrideIfExists = true)
       assert(catalog.getTempView("tbl1") == Option(tempTable2))
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
index fb759eba6a9e2..6edb4348f8309 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
 import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.types._
 
 class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -343,4 +344,14 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Least(inputsExpr), "s" * 1, EmptyRow)
     checkEvaluation(Greatest(inputsExpr), "s" * N, EmptyRow)
   }
+
+  test("SPARK-22704: Least and greatest use less global variables") {
+    val ctx1 = new CodegenContext()
+    Least(Seq(Literal(1), Literal(1))).genCode(ctx1)
+    assert(ctx1.inlinedMutableStates.size == 1)
+
+    val ctx2 = new CodegenContext()
+    Greatest(Seq(Literal(1), Literal(1))).genCode(ctx2)
+    assert(ctx2.inlinedMutableStates.size == 1)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 7837d6529d12b..5b25bdf907c3a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -23,6 +23,7 @@ import java.util.{Calendar, Locale, TimeZone}
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT
@@ -845,4 +846,80 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     val outputOuter = Row.fromSeq((1 to N).map(_ => outputInner))
     checkEvaluation(cast(Literal.create(inputOuter, fromOuter), toOuter), outputOuter)
   }
+
+  test("SPARK-22570: Cast should not create a lot of global variables") {
+    val ctx = new CodegenContext
+    cast("1", IntegerType).genCode(ctx)
+    cast("2", LongType).genCode(ctx)
+    assert(ctx.inlinedMutableStates.length == 0)
+  }
+
+  test("SPARK-22825 Cast array to string") {
+    val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)
+    checkEvaluation(ret1, "[1, 2, 3, 4, 5]")
+    val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType)
+    checkEvaluation(ret2, "[ab, cde, f]")
+    val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType)
+    checkEvaluation(ret3, "[ab,, c]")
+    val ret4 = cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType)
+    checkEvaluation(ret4, "[ab, cde, f]")
+    val ret5 = cast(
+      Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)),
+      StringType)
+    checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]")
+    val ret6 = cast(
+      Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00").map(Timestamp.valueOf)),
+      StringType)
+    checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]")
+    val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType)
+    checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]")
+    val ret8 = cast(
+      Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))),
+      StringType)
+    checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
+  }
+
+  test("SPARK-22973 Cast map to string") {
+    val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType)
+    checkEvaluation(ret1, "[1 -> a, 2 -> b, 3 -> c]")
+    val ret2 = cast(
+      Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)),
+      StringType)
+    checkEvaluation(ret2, "[1 -> a, 2 ->, 3 -> c]")
+    val ret3 = cast(
+      Literal.create(Map(
+        1 -> Date.valueOf("2014-12-03"),
+        2 -> Date.valueOf("2014-12-04"),
+        3 -> Date.valueOf("2014-12-05"))),
+      StringType)
+    checkEvaluation(ret3, "[1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05]")
+    val ret4 = cast(
+      Literal.create(Map(
+        1 -> Timestamp.valueOf("2014-12-03 13:01:00"),
+        2 -> Timestamp.valueOf("2014-12-04 15:05:00"))),
+      StringType)
+    checkEvaluation(ret4, "[1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00]")
+    val ret5 = cast(
+      Literal.create(Map(
+        1 -> Array(1, 2, 3),
+        2 -> Array(4, 5, 6))),
+      StringType)
+    checkEvaluation(ret5, "[1 -> [1, 2, 3], 2 -> [4, 5, 6]]")
+  }
+
+  test("SPARK-22981 Cast struct to string") {
+    val ret1 = cast(Literal.create((1, "a", 0.1)), StringType)
+    checkEvaluation(ret1, "[1, a, 0.1]")
+    val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType)
+    checkEvaluation(ret2, "[1,, a]")
+    val ret3 = cast(Literal.create(
+      (Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType)
+    checkEvaluation(ret3, "[2014-12-03, 2014-12-03 15:05:00]")
+    val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType)
+    checkEvaluation(ret4, "[[1, a], 5, 0.1]")
+    val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType)
+    checkEvaluation(ret5, "[[1, 2, 3], a, 0.1]")
+    val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType)
+    checkEvaluation(ret6, "[1, [1 -> a, 2 -> b, 3 -> c]]")
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index a4198f826cedb..676ba3956ddc8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, CreateExternalRow, GetExternalRowField, ValidateExternalType}
+import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -380,4 +380,60 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
         s"Incorrect Evaluation: expressions: $exprAnd, actual: $actualAnd, expected: $expectedAnd")
     }
   }
+
+  test("SPARK-22696: CreateExternalRow should not use global variables") {
+    val ctx = new CodegenContext
+    val schema = new StructType().add("a", IntegerType).add("b", StringType)
+    CreateExternalRow(Seq(Literal(1), Literal("x")), schema).genCode(ctx)
+    assert(ctx.inlinedMutableStates.isEmpty)
+  }
+
+  test("SPARK-22696: InitializeJavaBean should not use global variables") {
+    val ctx = new CodegenContext
+    InitializeJavaBean(Literal.fromObject(new java.util.LinkedList[Int]),
+      Map("add" -> Literal(1))).genCode(ctx)
+    assert(ctx.inlinedMutableStates.isEmpty)
+  }
+
+  test("SPARK-22716: addReferenceObj should not add mutable states") {
+    val ctx = new CodegenContext
+    val foo = new Object()
+    ctx.addReferenceObj("foo", foo)
+    assert(ctx.inlinedMutableStates.isEmpty)
+  }
+
+  test("SPARK-18016: define mutable states by using an array") {
+    val ctx1 = new CodegenContext
+    for (i <- 1 to CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD + 10) {
+      ctx1.addMutableState(ctx1.JAVA_INT, "i", v => s"$v = $i;")
+    }
+    assert(ctx1.inlinedMutableStates.size == CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD)
+    // When the number of primitive type mutable states is over the threshold, others are
+    // allocated into an array
+    assert(ctx1.arrayCompactedMutableStates.get(ctx1.JAVA_INT).get.arrayNames.size == 1)
+    assert(ctx1.mutableStateInitCode.size == CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD + 10)
+
+    val ctx2 = new CodegenContext
+    for (i <- 1 to CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT + 10) {
+      ctx2.addMutableState("InternalRow[]", "r", v => s"$v = new InternalRow[$i];")
+    }
+    // When the number of non-primitive type mutable states is over the threshold, others are
+    // allocated into a new array
+    assert(ctx2.inlinedMutableStates.isEmpty)
+    assert(ctx2.arrayCompactedMutableStates.get("InternalRow[]").get.arrayNames.size == 2)
+    assert(ctx2.arrayCompactedMutableStates("InternalRow[]").getCurrentIndex == 10)
+    assert(ctx2.mutableStateInitCode.size == CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT + 10)
+  }
+
+  test("SPARK-22750: addImmutableStateIfNotExists") {
+    val ctx = new CodegenContext
+    val mutableState1 = "field1"
+    val mutableState2 = "field2"
+    ctx.addImmutableStateIfNotExists("int", mutableState1)
+    ctx.addImmutableStateIfNotExists("int", mutableState1)
+    ctx.addImmutableStateIfNotExists("String", mutableState2)
+    ctx.addImmutableStateIfNotExists("int", mutableState1)
+    ctx.addImmutableStateIfNotExists("String", mutableState2)
+    assert(ctx.inlinedMutableStates.length == 2)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index b0eaad1c80f89..84190f0bd5f7d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue
 import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -299,4 +300,10 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
       new StringToMap(Literal("a=1_b=2_c=3"), Literal("_"), NonFoldableLiteral("="))
         .checkInputDataTypes().isFailure)
   }
+
+  test("SPARK-22693: CreateNamedStruct should not use global variables") {
+    val ctx = new CodegenContext
+    CreateNamedStruct(Seq("a", "x", "b", 2.0)).genCode(ctx)
+    assert(ctx.inlinedMutableStates.isEmpty)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
index 3e11c3d2d4fe3..a099119732e25 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.types._
 
 class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -145,4 +146,10 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       IndexedSeq((Literal(12) === Literal(1), Literal(42)),
         (Literal(12) === Literal(42), Literal(1))))
   }
+
+  test("SPARK-22705: case when should use less global variables") {
+    val ctx = new CodegenContext()
+    CaseWhen(Seq((Literal.create(false, BooleanType), Literal(1))), Literal(-1)).genCode(ctx)
+    assert(ctx.inlinedMutableStates.size == 1)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 89d99f9678cda..786266a2c13c0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -22,6 +22,7 @@ import java.text.SimpleDateFormat
 import java.util.{Calendar, Locale, TimeZone}
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT
@@ -527,7 +528,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null, StringType)), null)
   }
 
-  test("function trunc") {
+  test("TruncDate") {
     def testTrunc(input: Date, fmt: String, expected: Date): Unit = {
       checkEvaluation(TruncDate(Literal.create(input, DateType), Literal.create(fmt, StringType)),
         expected)
@@ -543,11 +544,82 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       testTrunc(date, fmt, Date.valueOf("2015-07-01"))
     }
     testTrunc(date, "DD", null)
+    testTrunc(date, "SECOND", null)
+    testTrunc(date, "HOUR", null)
     testTrunc(date, null, null)
     testTrunc(null, "MON", null)
     testTrunc(null, null, null)
   }
 
+  test("TruncTimestamp") {
+    def testTrunc(input: Timestamp, fmt: String, expected: Timestamp): Unit = {
+      checkEvaluation(
+        TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)),
+        expected)
+      checkEvaluation(
+        TruncTimestamp(
+          NonFoldableLiteral.create(fmt, StringType), Literal.create(input, TimestampType)),
+        expected)
+    }
+
+    withDefaultTimeZone(TimeZoneGMT) {
+      val inputDate = Timestamp.valueOf("2015-07-22 05:30:06")
+
+      Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { fmt =>
+        testTrunc(
+          inputDate, fmt,
+          Timestamp.valueOf("2015-01-01 00:00:00"))
+      }
+
+      Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { fmt =>
+        testTrunc(
+          inputDate, fmt,
+          Timestamp.valueOf("2015-07-01 00:00:00"))
+      }
+
+      Seq("DAY", "day", "DD", "dd").foreach { fmt =>
+        testTrunc(
+          inputDate, fmt,
+          Timestamp.valueOf("2015-07-22 00:00:00"))
+      }
+
+      Seq("HOUR", "hour").foreach { fmt =>
+        testTrunc(
+          inputDate, fmt,
+          Timestamp.valueOf("2015-07-22 05:00:00"))
+      }
+
+      Seq("MINUTE", "minute").foreach { fmt =>
+        testTrunc(
+          inputDate, fmt,
+          Timestamp.valueOf("2015-07-22 05:30:00"))
+      }
+
+      Seq("SECOND", "second").foreach { fmt =>
+        testTrunc(
+          inputDate, fmt,
+          Timestamp.valueOf("2015-07-22 05:30:06"))
+      }
+
+      Seq("WEEK", "week").foreach { fmt =>
+        testTrunc(
+          inputDate, fmt,
+          Timestamp.valueOf("2015-07-20 00:00:00"))
+      }
+
+      Seq("QUARTER", "quarter").foreach { fmt =>
+        testTrunc(
+          inputDate, fmt,
+          Timestamp.valueOf("2015-07-01 00:00:00"))
+      }
+
+      testTrunc(inputDate, "INVALID", null)
+      testTrunc(inputDate, null, null)
+      testTrunc(null, "MON", null)
+      testTrunc(null, null, null)
+    }
+  }
+
   test("from_unixtime") {
     val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
     val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
@@ -720,6 +792,9 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     test(null, "UTC", null)
     test("2015-07-24 00:00:00", null, null)
     test(null, null, null)
+    // Test escaping of timezone
+    GenerateUnsafeProjection.generate(
+      ToUTCTimestamp(Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal("\"quote")) :: Nil)
   }
 
   test("from_utc_timestamp") {
@@ -740,5 +815,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     test(null, "UTC", null)
     test("2015-07-24 00:00:00", null, null)
     test(null, null, null)
+    // Test escaping of timezone
+    GenerateUnsafeProjection.generate(FromUTCTimestamp(Literal(0), Literal("\"quote")) :: Nil)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
index 112a4a09728ae..4281c89ac475d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
@@ -27,6 +27,7 @@ import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.{RandomDataGenerator, Row}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder}
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData}
@@ -620,23 +621,30 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-18207: Compute hash for a lot of expressions") {
+    def checkResult(schema: StructType, input: InternalRow): Unit = {
+      val exprs = schema.fields.zipWithIndex.map { case (f, i) =>
+        BoundReference(i, f.dataType, true)
+      }
+      val murmur3HashExpr = Murmur3Hash(exprs, 42)
+      val murmur3HashPlan = GenerateMutableProjection.generate(Seq(murmur3HashExpr))
+      val murmursHashEval = Murmur3Hash(exprs, 42).eval(input)
+      assert(murmur3HashPlan(input).getInt(0) == murmursHashEval)
+
+      val hiveHashExpr = HiveHash(exprs)
+      val hiveHashPlan = GenerateMutableProjection.generate(Seq(hiveHashExpr))
+      val hiveHashEval = HiveHash(exprs).eval(input)
+      assert(hiveHashPlan(input).getInt(0) == hiveHashEval)
+    }
+
     val N = 1000
     val wideRow = new GenericInternalRow(
       Seq.tabulate(N)(i => UTF8String.fromString(i.toString)).toArray[Any])
-    val schema = StructType((1 to N).map(i => StructField("", StringType)))
-
-    val exprs = schema.fields.zipWithIndex.map { case (f, i) =>
-      BoundReference(i, f.dataType, true)
-    }
-    val murmur3HashExpr = Murmur3Hash(exprs, 42)
-    val murmur3HashPlan = GenerateMutableProjection.generate(Seq(murmur3HashExpr))
-    val murmursHashEval = Murmur3Hash(exprs, 42).eval(wideRow)
-    assert(murmur3HashPlan(wideRow).getInt(0) == murmursHashEval)
+    val schema = StructType((1 to N).map(i => StructField(i.toString, StringType)))
+    checkResult(schema, wideRow)
 
-    val hiveHashExpr = HiveHash(exprs)
-    val hiveHashPlan = GenerateMutableProjection.generate(Seq(hiveHashExpr))
-    val hiveHashEval = HiveHash(exprs).eval(wideRow)
-    assert(hiveHashPlan(wideRow).getInt(0) == hiveHashEval)
+    val nestedRow = InternalRow(wideRow)
+    val nestedSchema = new StructType().add("nested", schema)
+    checkResult(nestedSchema, nestedRow)
   }
 
   test("SPARK-22284: Compute hash for nested structs") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
index 4fe7b436982b1..facc863081303 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
@@ -43,5 +43,4 @@ class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Length(Uuid()), 36)
     assert(evaluate(Uuid()) !== evaluate(Uuid()))
   }
-
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
index 40ef7770da33f..cc6c15cb2c909 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
 import org.apache.spark.sql.types._
@@ -155,6 +156,12 @@ class NullExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Coalesce(inputs), "x_1")
   }
 
+  test("SPARK-22705: Coalesce should use less global variables") {
+    val ctx = new CodegenContext()
+    Coalesce(Seq(Literal("a"), Literal("b"))).genCode(ctx)
+    assert(ctx.inlinedMutableStates.size == 1)
+  }
+
   test("AtLeastNNonNulls should not throw 64kb exception") {
     val inputs = (1 to 4000).map(x => Literal(s"x_$x"))
     checkEvaluation(AtLeastNNonNulls(1, inputs), true)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index 865092a659f26..8a8f8e10225fa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.RandomDataGenerator
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
 import org.apache.spark.sql.types._
 
@@ -245,6 +246,12 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(In(Literal(1.0D), sets), true)
   }
 
+  test("SPARK-22705: In should use less global variables") {
+    val ctx = new CodegenContext()
+    In(Literal(1.0D), Seq(Literal(1.0D), Literal(2.0D))).genCode(ctx)
+    assert(ctx.inlinedMutableStates.isEmpty)
+  }
+
   test("INSET") {
     val hS = HashSet[Any]() + 1 + 2
     val nS = HashSet[Any]() + 1 + 2 + null
@@ -291,26 +298,26 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   private val udt = new ExamplePointUDT
 
   private val smallValues =
-    Seq(1.toByte, 1.toShort, 1, 1L, Decimal(1), Array(1.toByte), new Date(2000, 1, 1),
+    Seq(1.toByte, 1.toShort, 1, 1L, Decimal(1), Array(1.toByte), Date.valueOf("2000-01-01"),
       new Timestamp(1), "a", 1f, 1d, 0f, 0d, false, Array(1L, 2L))
       .map(Literal(_)) ++ Seq(Literal.create(MyStruct(1L, "b")),
       Literal.create(MyStruct2(MyStruct(1L, "a"), Array(1, 1))),
       Literal.create(ArrayData.toArrayData(Array(1.0, 2.0)), udt))
   private val largeValues =
-    Seq(2.toByte, 2.toShort, 2, 2L, Decimal(2), Array(2.toByte), new Date(2000, 1, 2),
+    Seq(2.toByte, 2.toShort, 2, 2L, Decimal(2), Array(2.toByte), Date.valueOf("2000-01-02"),
       new Timestamp(2), "b", 2f, 2d, Float.NaN, Double.NaN, true, Array(2L, 1L))
       .map(Literal(_)) ++ Seq(Literal.create(MyStruct(2L, "b")),
       Literal.create(MyStruct2(MyStruct(1L, "a"), Array(1, 2))),
       Literal.create(ArrayData.toArrayData(Array(1.0, 3.0)), udt))
 
   private val equalValues1 =
-    Seq(1.toByte, 1.toShort, 1, 1L, Decimal(1), Array(1.toByte), new Date(2000, 1, 1),
+    Seq(1.toByte, 1.toShort, 1, 1L, Decimal(1), Array(1.toByte), Date.valueOf("2000-01-01"),
       new Timestamp(1), "a", 1f, 1d, Float.NaN, Double.NaN, true, Array(1L, 2L))
       .map(Literal(_)) ++ Seq(Literal.create(MyStruct(1L, "b")),
       Literal.create(MyStruct2(MyStruct(1L, "a"), Array(1, 1))),
       Literal.create(ArrayData.toArrayData(Array(1.0, 2.0)), udt))
   private val equalValues2 =
-    Seq(1.toByte, 1.toShort, 1, 1L, Decimal(1), Array(1.toByte), new Date(2000, 1, 1),
+    Seq(1.toByte, 1.toShort, 1, 1L, Decimal(1), Array(1.toByte), Date.valueOf("2000-01-01"),
       new Timestamp(1), "a", 1f, 1d, Float.NaN, Double.NaN, true, Array(1L, 2L))
       .map(Literal(_)) ++ Seq(Literal.create(MyStruct(1L, "b")),
       Literal.create(MyStruct2(MyStruct(1L, "a"), Array(1, 1))),
@@ -429,4 +436,10 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     val infinity = Literal(Double.PositiveInfinity)
     checkEvaluation(EqualTo(infinity, infinity), true)
   }
+
+  test("SPARK-22693: InSet should not use global variables") {
+    val ctx = new CodegenContext
+    InSet(Literal(1), Set(1, 2, 3, 4)).genCode(ctx)
+    assert(ctx.inlinedMutableStates.isEmpty)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 1ce150e091981..2a0a42c65b086 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -20,7 +20,8 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.types.{IntegerType, StringType}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.types.StringType
 
 /**
  * Unit tests for regular expression (regexp) related SQL expressions.
@@ -178,6 +179,15 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(nonNullExpr, "num-num", row1)
   }
 
+  test("SPARK-22570: RegExpReplace should not create a lot of global variables") {
+    val ctx = new CodegenContext
+    RegExpReplace(Literal("100"), Literal("(\\d+)"), Literal("num")).genCode(ctx)
+    // four global variables (lastRegex, pattern, lastReplacement, and lastReplacementInUTF8)
+    // are always required, which are allocated in type-based global array
+    assert(ctx.inlinedMutableStates.length == 0)
+    assert(ctx.mutableStateInitCode.length == 4)
+  }
+
   test("RegexExtract") {
     val row1 = create_row("100-200", "(\\d+)-(\\d+)", 1)
     val row2 = create_row("100-200", "(\\d+)-(\\d+)", 2)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
index 13bd363c8b692..10e3ffd0dff97 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import java.util.Locale
 
 import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.types.{IntegerType, StringType}
 
 class ScalaUDFSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -47,4 +48,9 @@ class ScalaUDFSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(e2.getMessage.contains("Failed to execute user defined function"))
   }
 
+  test("SPARK-22695: ScalaUDF should not use global variables") {
+    val ctx = new CodegenContext
+    ScalaUDF((s: String) => s + "x", StringType, Literal("a") :: Nil).genCode(ctx)
+    assert(ctx.inlinedMutableStates.isEmpty)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 54cde77176e27..97ddbeba2c5ca 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -51,6 +51,18 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Concat(strs.map(Literal.create(_, StringType))), strs.mkString, EmptyRow)
   }
 
+  test("SPARK-22771 Check Concat.checkInputDataTypes results") {
+    assert(Concat(Seq.empty[Expression]).checkInputDataTypes().isSuccess)
+    assert(Concat(Literal.create("a") :: Literal.create("b") :: Nil)
+      .checkInputDataTypes().isSuccess)
+    assert(Concat(Literal.create("a".getBytes) :: Literal.create("b".getBytes) :: Nil)
+      .checkInputDataTypes().isSuccess)
+    assert(Concat(Literal.create(1) :: Literal.create(2) :: Nil)
+      .checkInputDataTypes().isFailure)
+    assert(Concat(Literal.create("a") :: Literal.create("b".getBytes) :: Nil)
+      .checkInputDataTypes().isFailure)
+  }
+
   test("concat_ws") {
     def testConcatWs(expected: String, sep: String, inputs: Any*): Unit = {
       val inputExprs = inputs.map {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAggSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAggSuite.scala
index 10479630f3f99..30e3bc9fb5779 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAggSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAggSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.sketch.CountMinSketch
 
 /**
- * Unit test suite for the count-min sketch SQL aggregate funciton [[CountMinSketchAgg]].
+ * Unit test suite for the count-min sketch SQL aggregate function [[CountMinSketchAgg]].
  */
 class CountMinSketchAggSuite extends SparkFunSuite {
   private val childExpression = BoundReference(0, IntegerType, nullable = true)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
index f203f25ad10d4..75c6beeb32150 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
@@ -22,8 +22,10 @@ import scala.util.Random
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.RandomDataGenerator
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
-import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.{JoinedRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * Test suite for [[GenerateUnsafeRowJoiner]].
@@ -45,6 +47,32 @@ class GenerateUnsafeRowJoinerSuite extends SparkFunSuite {
     testConcat(64, 64, fixed)
   }
 
+  test("rows with all empty strings") {
+    val schema = StructType(Seq(
+      StructField("f1", StringType), StructField("f2", StringType)))
+    val row: UnsafeRow = UnsafeProjection.create(schema).apply(
+      InternalRow(UTF8String.EMPTY_UTF8, UTF8String.EMPTY_UTF8))
+     testConcat(schema, row, schema, row)
+  }
+
+  test("rows with all empty int arrays") {
+    val schema = StructType(Seq(
+      StructField("f1", ArrayType(IntegerType)), StructField("f2", ArrayType(IntegerType))))
+    val emptyIntArray =
+      ExpressionEncoder[Array[Int]]().resolveAndBind().toRow(Array.emptyIntArray).getArray(0)
+    val row: UnsafeRow = UnsafeProjection.create(schema).apply(
+      InternalRow(emptyIntArray, emptyIntArray))
+    testConcat(schema, row, schema, row)
+  }
+
+  test("alternating empty and non-empty strings") {
+    val schema = StructType(Seq(
+      StructField("f1", StringType), StructField("f2", StringType)))
+    val row: UnsafeRow = UnsafeProjection.create(schema).apply(
+      InternalRow(UTF8String.EMPTY_UTF8, UTF8String.fromString("foo")))
+    testConcat(schema, row, schema, row)
+  }
+
   test("randomized fix width types") {
     for (i <- 0 until 20) {
       testConcatOnce(Random.nextInt(100), Random.nextInt(100), fixed)
@@ -94,27 +122,84 @@ class GenerateUnsafeRowJoinerSuite extends SparkFunSuite {
     val extRow2 = RandomDataGenerator.forType(schema2, nullable = false).get.apply()
     val row1 = converter1.apply(internalConverter1.apply(extRow1).asInstanceOf[InternalRow])
     val row2 = converter2.apply(internalConverter2.apply(extRow2).asInstanceOf[InternalRow])
+    testConcat(schema1, row1, schema2, row2)
+  }
+
+  private def testConcat(
+      schema1: StructType,
+      row1: UnsafeRow,
+      schema2: StructType,
+      row2: UnsafeRow) {
 
     // Run the joiner.
     val mergedSchema = StructType(schema1 ++ schema2)
     val concater = GenerateUnsafeRowJoiner.create(schema1, schema2)
-    val output = concater.join(row1, row2)
+    val output: UnsafeRow = concater.join(row1, row2)
+
+    // We'll also compare to an UnsafeRow produced with JoinedRow + UnsafeProjection. This ensures
+    // that unused space in the row (e.g. leftover bits in the null-tracking bitmap) is written
+    // correctly.
+    val expectedOutput: UnsafeRow = {
+      val joinedRowProjection = UnsafeProjection.create(mergedSchema)
+      val joined = new JoinedRow()
+      joinedRowProjection.apply(joined.apply(row1, row2))
+    }
 
     // Test everything equals ...
     for (i <- mergedSchema.indices) {
+      val dataType = mergedSchema(i).dataType
       if (i < schema1.size) {
         assert(output.isNullAt(i) === row1.isNullAt(i))
         if (!output.isNullAt(i)) {
-          assert(output.get(i, mergedSchema(i).dataType) === row1.get(i, mergedSchema(i).dataType))
+          assert(output.get(i, dataType) === row1.get(i, dataType))
+          assert(output.get(i, dataType) === expectedOutput.get(i, dataType))
         }
       } else {
         assert(output.isNullAt(i) === row2.isNullAt(i - schema1.size))
         if (!output.isNullAt(i)) {
-          assert(output.get(i, mergedSchema(i).dataType) ===
-            row2.get(i - schema1.size, mergedSchema(i).dataType))
+          assert(output.get(i, dataType) === row2.get(i - schema1.size, dataType))
+          assert(output.get(i, dataType) === expectedOutput.get(i, dataType))
         }
       }
     }
+
+
+    assert(
+      expectedOutput.getSizeInBytes == output.getSizeInBytes,
+      "output isn't same size in bytes as slow path")
+
+    // Compare the UnsafeRows byte-by-byte so that we can print more useful debug information in
+    // case this assertion fails:
+    val actualBytes = output.getBaseObject.asInstanceOf[Array[Byte]]
+      .take(output.getSizeInBytes)
+    val expectedBytes = expectedOutput.getBaseObject.asInstanceOf[Array[Byte]]
+      .take(expectedOutput.getSizeInBytes)
+
+    val bitsetWidth = UnsafeRow.calculateBitSetWidthInBytes(expectedOutput.numFields())
+    val actualBitset = actualBytes.take(bitsetWidth)
+    val expectedBitset = expectedBytes.take(bitsetWidth)
+    assert(actualBitset === expectedBitset, "bitsets were not equal")
+
+    val fixedLengthSize = expectedOutput.numFields() * 8
+    val actualFixedLength = actualBytes.slice(bitsetWidth, bitsetWidth + fixedLengthSize)
+    val expectedFixedLength = expectedBytes.slice(bitsetWidth, bitsetWidth + fixedLengthSize)
+    if (actualFixedLength !== expectedFixedLength) {
+      actualFixedLength.grouped(8)
+        .zip(expectedFixedLength.grouped(8))
+        .zip(mergedSchema.fields.toIterator)
+        .foreach {
+          case ((actual, expected), field) =>
+            assert(actual === expected, s"Fixed length sections are not equal for field $field")
+      }
+      fail("Fixed length sections were not equal")
+    }
+
+    val variableLengthStart = bitsetWidth + fixedLengthSize
+    val actualVariableLength = actualBytes.drop(variableLengthStart)
+    val expectedVariableLength = expectedBytes.drop(variableLengthStart)
+    assert(actualVariableLength === expectedVariableLength, "fixed length sections were not equal")
+
+    assert(output.hashCode() == expectedOutput.hashCode(), "hash codes were not equal")
   }
 
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index 0cd0d8859145f..2c45b3b0c73d1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -208,4 +208,42 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     unsafeProj.apply(InternalRow(InternalRow(UTF8String.fromString("b"))))
     assert(row.getStruct(0, 1).getString(0).toString == "a")
   }
+
+  test("SPARK-22699: GenerateSafeProjection should not use global variables for struct") {
+    val safeProj = GenerateSafeProjection.generate(
+      Seq(BoundReference(0, new StructType().add("i", IntegerType), true)))
+    val globalVariables = safeProj.getClass.getDeclaredFields
+    // We need always 3 variables:
+    // - one is a reference to this
+    // - one is the references object
+    // - one is the mutableRow
+    assert(globalVariables.length == 3)
+  }
+
+  test("SPARK-18016: generated projections on wider table requiring state compaction") {
+    val N = 6000
+    val wideRow1 = new GenericInternalRow(new Array[Any](N))
+    val schema1 = StructType((1 to N).map(i => StructField("", IntegerType)))
+    val wideRow2 = new GenericInternalRow(
+      Array.tabulate[Any](N)(i => UTF8String.fromString(i.toString)))
+    val schema2 = StructType((1 to N).map(i => StructField("", StringType)))
+    val joined = new JoinedRow(wideRow1, wideRow2)
+    val joinedSchema = StructType(schema1 ++ schema2)
+    val nested = new JoinedRow(InternalRow(joined, joined), joined)
+    val nestedSchema = StructType(
+      Seq(StructField("", joinedSchema), StructField("", joinedSchema)) ++ joinedSchema)
+
+    val safeProj = FromUnsafeProjection(nestedSchema)
+    val result = safeProj(nested)
+
+    // test generated MutableProjection
+    val exprs = nestedSchema.fields.zipWithIndex.map { case (f, i) =>
+      BoundReference(i, f.dataType, true)
+    }
+    val mutableProj = GenerateMutableProjection.generate(exprs)
+    val row1 = mutableProj(result)
+    assert(result === row1)
+    val row2 = mutableProj(result)
+    assert(result === row2)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 77e4eff26c69b..3f41f4b144096 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -38,54 +38,64 @@ class ColumnPruningSuite extends PlanTest {
       CollapseProject) :: Nil
   }
 
-  test("Column pruning for Generate when Generate.join = false") {
-    val input = LocalRelation('a.int, 'b.array(StringType))
+  test("Column pruning for Generate when Generate.unrequiredChildIndex = child.output") {
+    val input = LocalRelation('a.int, 'b.int, 'c.array(StringType))
 
-    val query = input.generate(Explode('b), join = false).analyze
+    val query =
+      input
+        .generate(Explode('c), outputNames = "explode" :: Nil)
+        .select('c, 'explode)
+        .analyze
 
     val optimized = Optimize.execute(query)
 
-    val correctAnswer = input.select('b).generate(Explode('b), join = false).analyze
+    val correctAnswer =
+      input
+        .select('c)
+        .generate(Explode('c), outputNames = "explode" :: Nil)
+        .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Column pruning for Generate when Generate.join = true") {
-    val input = LocalRelation('a.int, 'b.int, 'c.array(StringType))
+  test("Fill Generate.unrequiredChildIndex if possible") {
+    val input = LocalRelation('b.array(StringType))
 
     val query =
       input
-        .generate(Explode('c), join = true, outputNames = "explode" :: Nil)
-        .select('a, 'explode)
+        .generate(Explode('b), outputNames = "explode" :: Nil)
+        .select(('explode + 1).as("result"))
         .analyze
 
     val optimized = Optimize.execute(query)
 
     val correctAnswer =
       input
-        .select('a, 'c)
-        .generate(Explode('c), join = true, outputNames = "explode" :: Nil)
-        .select('a, 'explode)
+        .generate(Explode('b), unrequiredChildIndex = input.output.zipWithIndex.map(_._2),
+          outputNames = "explode" :: Nil)
+         .select(('explode + 1).as("result"))
         .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Turn Generate.join to false if possible") {
-    val input = LocalRelation('b.array(StringType))
+  test("Another fill Generate.unrequiredChildIndex if possible") {
+    val input = LocalRelation('a.int, 'b.int, 'c1.string, 'c2.string)
 
     val query =
       input
-        .generate(Explode('b), join = true, outputNames = "explode" :: Nil)
-        .select(('explode + 1).as("result"))
+        .generate(Explode(CreateArray(Seq('c1, 'c2))), outputNames = "explode" :: Nil)
+        .select('a, 'c1, 'explode)
         .analyze
 
     val optimized = Optimize.execute(query)
 
     val correctAnswer =
       input
-        .generate(Explode('b), join = false, outputNames = "explode" :: Nil)
-        .select(('explode + 1).as("result"))
+        .select('a, 'c1, 'c2)
+        .generate(Explode(CreateArray(Seq('c1, 'c2))),
+          unrequiredChildIndex = Seq(2),
+          outputNames = "explode" :: Nil)
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -246,7 +256,7 @@ class ColumnPruningSuite extends PlanTest {
       x.select('a)
         .sortBy(SortOrder('a, Ascending)).analyze
 
-    comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
+    comparePlans(optimized, correctAnswer)
 
     // push down invalid
     val originalQuery1 = {
@@ -261,7 +271,7 @@ class ColumnPruningSuite extends PlanTest {
         .sortBy(SortOrder('a, Ascending))
         .select('b).analyze
 
-    comparePlans(optimized1, analysis.EliminateSubqueryAliases(correctAnswer1))
+    comparePlans(optimized1, correctAnswer1)
   }
 
   test("Column pruning on Window with useless aggregate functions") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineConcatsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineConcatsSuite.scala
index 412e199dfaae3..441c15340a778 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineConcatsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineConcatsSuite.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.types.StringType
 
 
 class CombineConcatsSuite extends PlanTest {
@@ -37,8 +36,10 @@ class CombineConcatsSuite extends PlanTest {
     comparePlans(actual, correctAnswer)
   }
 
+  def str(s: String): Literal = Literal(s)
+  def binary(s: String): Literal = Literal(s.getBytes)
+
   test("combine nested Concat exprs") {
-    def str(s: String): Literal = Literal(s, StringType)
     assertEquivalent(
       Concat(
         Concat(str("a") :: str("b") :: Nil) ::
@@ -72,4 +73,13 @@ class CombineConcatsSuite extends PlanTest {
         Nil),
       Concat(str("a") :: str("b") :: str("c") :: str("d") :: Nil))
   }
+
+  test("combine string and binary exprs") {
+    assertEquivalent(
+      Concat(
+        Concat(str("a") :: str("b") :: Nil) ::
+        Concat(binary("c") :: binary("d") :: Nil) ::
+        Nil),
+      Concat(str("a") :: str("b") :: binary("c") :: binary("d") :: Nil))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index de0e7c7ee49ac..82a10254d846d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -94,19 +94,15 @@ class FilterPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("combine redundant deterministic filters") {
+  test("do not combine non-deterministic filters even if they are identical") {
     val originalQuery =
       testRelation
         .where(Rand(0) > 0.1 && 'a === 1)
-        .where(Rand(0) > 0.1 && 'a === 1)
+        .where(Rand(0) > 0.1 && 'a === 1).analyze
 
-    val optimized = Optimize.execute(originalQuery.analyze)
-    val correctAnswer =
-      testRelation
-        .where(Rand(0) > 0.1 && 'a === 1 && Rand(0) > 0.1)
-        .analyze
+    val optimized = Optimize.execute(originalQuery)
 
-    comparePlans(optimized, correctAnswer)
+    comparePlans(optimized, originalQuery)
   }
 
   test("SPARK-16164: Filter pushdown should keep the ordering in the logical plan") {
@@ -508,7 +504,7 @@ class FilterPushdownSuite extends PlanTest {
     }
     val optimized = Optimize.execute(originalQuery.analyze)
 
-    comparePlans(analysis.EliminateSubqueryAliases(originalQuery.analyze), optimized)
+    comparePlans(originalQuery.analyze, optimized)
   }
 
   test("joins: conjunctive predicates") {
@@ -527,7 +523,7 @@ class FilterPushdownSuite extends PlanTest {
       left.join(right, condition = Some("x.b".attr === "y.b".attr))
         .analyze
 
-    comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
+    comparePlans(optimized, correctAnswer)
   }
 
   test("joins: conjunctive predicates #2") {
@@ -546,7 +542,7 @@ class FilterPushdownSuite extends PlanTest {
       left.join(right, condition = Some("x.b".attr === "y.b".attr))
         .analyze
 
-    comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
+    comparePlans(optimized, correctAnswer)
   }
 
   test("joins: conjunctive predicates #3") {
@@ -570,7 +566,7 @@ class FilterPushdownSuite extends PlanTest {
           condition = Some("z.a".attr === "x.b".attr))
         .analyze
 
-    comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
+    comparePlans(optimized, correctAnswer)
   }
 
   test("joins: push down where clause into left anti join") {
@@ -585,7 +581,7 @@ class FilterPushdownSuite extends PlanTest {
       x.where("x.a".attr > 10)
         .join(y, LeftAnti, Some("x.b".attr === "y.b".attr))
         .analyze
-    comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
+    comparePlans(optimized, correctAnswer)
   }
 
   test("joins: only push down join conditions to the right of a left anti join") {
@@ -602,7 +598,7 @@ class FilterPushdownSuite extends PlanTest {
         LeftAnti,
         Some("x.b".attr === "y.b".attr && "x.a".attr > 10))
         .analyze
-    comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
+    comparePlans(optimized, correctAnswer)
   }
 
   test("joins: only push down join conditions to the right of an existence join") {
@@ -620,7 +616,7 @@ class FilterPushdownSuite extends PlanTest {
         ExistenceJoin(fillerVal),
         Some("x.a".attr > 1))
       .analyze
-    comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
+    comparePlans(optimized, correctAnswer)
   }
 
   val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
@@ -628,14 +624,14 @@ class FilterPushdownSuite extends PlanTest {
   test("generate: predicate referenced no generated column") {
     val originalQuery = {
       testRelationWithArrayType
-        .generate(Explode('c_arr), true, false, Some("arr"))
+        .generate(Explode('c_arr), alias = Some("arr"))
         .where(('b >= 5) && ('a > 6))
     }
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = {
       testRelationWithArrayType
         .where(('b >= 5) && ('a > 6))
-        .generate(Explode('c_arr), true, false, Some("arr")).analyze
+        .generate(Explode('c_arr), alias = Some("arr")).analyze
     }
 
     comparePlans(optimized, correctAnswer)
@@ -644,14 +640,14 @@ class FilterPushdownSuite extends PlanTest {
   test("generate: non-deterministic predicate referenced no generated column") {
     val originalQuery = {
       testRelationWithArrayType
-        .generate(Explode('c_arr), true, false, Some("arr"))
+        .generate(Explode('c_arr), alias = Some("arr"))
         .where(('b >= 5) && ('a + Rand(10).as("rnd") > 6) && ('col > 6))
     }
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = {
       testRelationWithArrayType
         .where('b >= 5)
-        .generate(Explode('c_arr), true, false, Some("arr"))
+        .generate(Explode('c_arr), alias = Some("arr"))
         .where('a + Rand(10).as("rnd") > 6 && 'col > 6)
         .analyze
     }
@@ -663,14 +659,14 @@ class FilterPushdownSuite extends PlanTest {
     val generator = Explode('c_arr)
     val originalQuery = {
       testRelationWithArrayType
-        .generate(generator, true, false, Some("arr"))
+        .generate(generator, alias = Some("arr"))
         .where(('b >= 5) && ('c > 6))
     }
     val optimized = Optimize.execute(originalQuery.analyze)
     val referenceResult = {
       testRelationWithArrayType
         .where('b >= 5)
-        .generate(generator, true, false, Some("arr"))
+        .generate(generator, alias = Some("arr"))
         .where('c > 6).analyze
     }
 
@@ -691,7 +687,7 @@ class FilterPushdownSuite extends PlanTest {
   test("generate: all conjuncts referenced generated column") {
     val originalQuery = {
       testRelationWithArrayType
-        .generate(Explode('c_arr), true, false, Some("arr"))
+        .generate(Explode('c_arr), alias = Some("arr"))
         .where(('col > 6) || ('b > 5)).analyze
     }
     val optimized = Optimize.execute(originalQuery)
@@ -813,6 +809,19 @@ class FilterPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("aggregate: don't push filters if the aggregate has no grouping expressions") {
+    val originalQuery = LocalRelation.apply(testRelation.output, Seq.empty)
+      .select('a, 'b)
+      .groupBy()(count(1))
+      .where(false)
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+
+    val correctAnswer = originalQuery.analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("broadcast hint") {
     val originalQuery = ResolvedHint(testRelation)
       .where('a === 2L && 'b + Rand(10).as("rnd") === 3)
@@ -835,9 +844,9 @@ class FilterPushdownSuite extends PlanTest {
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = Union(Seq(
-      testRelation.where('a === 2L),
-      testRelation2.where('d === 2L)))
-      .where('b + Rand(10).as("rnd") === 3 && 'c > 5L)
+      testRelation.where('a === 2L && 'c > 5L),
+      testRelation2.where('d === 2L && 'f > 5L)))
+      .where('b + Rand(10).as("rnd") === 3)
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -1138,12 +1147,13 @@ class FilterPushdownSuite extends PlanTest {
     val x = testRelation.subquery('x)
     val y = testRelation.subquery('y)
 
-    // Verify that all conditions preceding the first non-deterministic condition are pushed down
+    // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
     val originalQuery = x.join(y, condition = Some("x.a".attr === 5 && "y.a".attr === 5 &&
       "x.a".attr === Rand(10) && "y.b".attr === 5))
-    val correctAnswer = x.where("x.a".attr === 5).join(y.where("y.a".attr === 5),
-        condition = Some("x.a".attr === Rand(10) && "y.b".attr === 5))
+    val correctAnswer =
+      x.where("x.a".attr === 5).join(y.where("y.a".attr === 5 && "y.b".attr === 5),
+        condition = Some("x.a".attr === Rand(10)))
 
     // CheckAnalysis will ensure nondeterministic expressions not appear in join condition.
     // TODO support nondeterministic expressions in join condition.
@@ -1151,16 +1161,16 @@ class FilterPushdownSuite extends PlanTest {
       checkAnalysis = false)
   }
 
-  test("watermark pushdown: no pushdown on watermark attribute") {
+  test("watermark pushdown: no pushdown on watermark attribute #1") {
     val interval = new CalendarInterval(2, 2000L)
 
-    // Verify that all conditions preceding the first watermark touching condition are pushed down
+    // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
     val originalQuery = EventTimeWatermark('b, interval, testRelation)
       .where('a === 5 && 'b === 10 && 'c === 5)
     val correctAnswer = EventTimeWatermark(
-      'b, interval, testRelation.where('a === 5))
-      .where('b === 10 && 'c === 5)
+      'b, interval, testRelation.where('a === 5 && 'c === 5))
+      .where('b === 10)
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze,
       checkAnalysis = false)
@@ -1169,7 +1179,7 @@ class FilterPushdownSuite extends PlanTest {
   test("watermark pushdown: no pushdown for nondeterministic filter") {
     val interval = new CalendarInterval(2, 2000L)
 
-    // Verify that all conditions preceding the first watermark touching condition are pushed down
+    // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
     val originalQuery = EventTimeWatermark('c, interval, testRelation)
       .where('a === 5 && 'b === Rand(10) && 'c === 5)
@@ -1184,7 +1194,7 @@ class FilterPushdownSuite extends PlanTest {
   test("watermark pushdown: full pushdown") {
     val interval = new CalendarInterval(2, 2000L)
 
-    // Verify that all conditions preceding the first watermark touching condition are pushed down
+    // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
     val originalQuery = EventTimeWatermark('c, interval, testRelation)
       .where('a === 5 && 'b === 10)
@@ -1195,15 +1205,15 @@ class FilterPushdownSuite extends PlanTest {
       checkAnalysis = false)
   }
 
-  test("watermark pushdown: empty pushdown") {
+  test("watermark pushdown: no pushdown on watermark attribute #2") {
     val interval = new CalendarInterval(2, 2000L)
 
-    // Verify that all conditions preceding the first watermark touching condition are pushed down
-    // by the optimizer and others are not.
     val originalQuery = EventTimeWatermark('a, interval, testRelation)
       .where('a === 5 && 'b === 10)
+    val correctAnswer = EventTimeWatermark(
+      'a, interval, testRelation.where('b === 10)).where('a === 5)
 
-    comparePlans(Optimize.execute(originalQuery.analyze), originalQuery.analyze,
+    comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze,
       checkAnalysis = false)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
index dccb32f0379a8..c28844642aed0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
@@ -147,8 +147,8 @@ class FoldablePropagationSuite extends PlanTest {
   test("Propagate in expand") {
     val c1 = Literal(1).as('a)
     val c2 = Literal(2).as('b)
-    val a1 = c1.toAttribute.withNullability(true)
-    val a2 = c2.toAttribute.withNullability(true)
+    val a1 = c1.toAttribute.newInstance().withNullability(true)
+    val a2 = c2.toAttribute.newInstance().withNullability(true)
     val expand = Expand(
       Seq(Seq(Literal(null), 'b), Seq('a, Literal(null))),
       Seq(a1, a2),
@@ -161,4 +161,23 @@ class FoldablePropagationSuite extends PlanTest {
     val correctAnswer = correctExpand.where(a1.isNotNull).select(a1, a2).analyze
     comparePlans(optimized, correctAnswer)
   }
+
+  test("Propagate above outer join") {
+    val left = LocalRelation('a.int).select('a, Literal(1).as('b))
+    val right = LocalRelation('c.int).select('c, Literal(1).as('d))
+
+    val join = left.join(
+      right,
+      joinType = LeftOuter,
+      condition = Some('a === 'c && 'b === 'd))
+    val query = join.select(('b + 3).as('res)).analyze
+    val optimized = Optimize.execute(query)
+
+    val correctAnswer = left.join(
+      right,
+      joinType = LeftOuter,
+      condition = Some('a === 'c && Literal(1) === Literal(1)))
+      .select((Literal(1) + 3).as('res)).analyze
+    comparePlans(optimized, correctAnswer)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
index 5580f8604ec72..a0708bf7eee9a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
@@ -236,4 +236,17 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
       comparePlans(optimized, originalQuery)
     }
   }
+
+  test("constraints should be inferred from aliased literals") {
+    val originalLeft = testRelation.subquery('left).as("left")
+    val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a === 2).as("left")
+
+    val right = Project(Seq(Literal(2).as("two")), testRelation.subquery('right)).as("right")
+    val condition = Some("left.a".attr === "right.two".attr)
+
+    val original = originalLeft.join(right, Inner, condition)
+    val correct = optimizedLeft.join(right, Inner, condition)
+
+    comparePlans(Optimize.execute(original.analyze), correct.analyze)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index 97733a754ccc2..ccd9d8dd4d213 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -118,7 +117,7 @@ class JoinOptimizationSuite extends PlanTest {
 
     queryAnswers foreach { queryAnswerPair =>
       val optimized = Optimize.execute(queryAnswerPair._1.analyze)
-      comparePlans(optimized, analysis.EliminateSubqueryAliases(queryAnswerPair._2.analyze))
+      comparePlans(optimized, queryAnswerPair._2.analyze)
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
index d7acd139225cd..478118ed709f7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -169,7 +169,7 @@ class OptimizeInSuite extends PlanTest {
       val optimizedPlan = OptimizeIn(plan)
       optimizedPlan match {
         case Filter(cond, _)
-          if cond.isInstanceOf[InSet] && cond.asInstanceOf[InSet].getSet().size == 3 =>
+          if cond.isInstanceOf[InSet] && cond.asInstanceOf[InSet].set.size == 3 =>
         // pass
         case _ => fail("Unexpected result for OptimizedIn")
       }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
index bc1c48b99c295..3964508e3a55e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
@@ -21,8 +21,9 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.types.StructType
 
@@ -78,17 +79,18 @@ class PropagateEmptyRelationSuite extends PlanTest {
 
       (true, false, Inner, Some(LocalRelation('a.int, 'b.int))),
       (true, false, Cross, Some(LocalRelation('a.int, 'b.int))),
-      (true, false, LeftOuter, None),
+      (true, false, LeftOuter, Some(Project(Seq('a, Literal(null).as('b)), testRelation1).analyze)),
       (true, false, RightOuter, Some(LocalRelation('a.int, 'b.int))),
-      (true, false, FullOuter, None),
-      (true, false, LeftAnti, None),
-      (true, false, LeftSemi, None),
+      (true, false, FullOuter, Some(Project(Seq('a, Literal(null).as('b)), testRelation1).analyze)),
+      (true, false, LeftAnti, Some(testRelation1)),
+      (true, false, LeftSemi, Some(LocalRelation('a.int))),
 
       (false, true, Inner, Some(LocalRelation('a.int, 'b.int))),
       (false, true, Cross, Some(LocalRelation('a.int, 'b.int))),
       (false, true, LeftOuter, Some(LocalRelation('a.int, 'b.int))),
-      (false, true, RightOuter, None),
-      (false, true, FullOuter, None),
+      (false, true, RightOuter,
+        Some(Project(Seq(Literal(null).as('a), 'b), testRelation2).analyze)),
+      (false, true, FullOuter, Some(Project(Seq(Literal(null).as('a), 'b), testRelation2).analyze)),
       (false, true, LeftAnti, Some(LocalRelation('a.int))),
       (false, true, LeftSemi, Some(LocalRelation('a.int))),
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
index 0fa1aaeb9e164..e9701ffd2c54b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Alias, Not}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, Not}
 import org.apache.spark.sql.catalyst.expressions.aggregate.First
 import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -198,6 +198,14 @@ class ReplaceOperatorSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("add one grouping key if necessary when replace Deduplicate with Aggregate") {
+    val input = LocalRelation()
+    val query = Deduplicate(Seq.empty, input) // dropDuplicates()
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = Aggregate(Seq(Literal(1)), input.output, input)
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("don't replace streaming Deduplicate") {
     val input = LocalRelation(Seq('a.int, 'b.int), isStreaming = true)
     val attrA = input.output(0)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
new file mode 100644
index 0000000000000..6b3739c372c3a
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.ListQuery
+import org.apache.spark.sql.catalyst.plans.{LeftSemi, PlanTest}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+
+
+class RewriteSubquerySuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Column Pruning", FixedPoint(100), ColumnPruning) ::
+      Batch("Rewrite Subquery", FixedPoint(1),
+        RewritePredicateSubquery,
+        ColumnPruning,
+        CollapseProject,
+        RemoveRedundantProject) :: Nil
+  }
+
+  test("Column pruning after rewriting predicate subquery") {
+    val relation = LocalRelation('a.int, 'b.int)
+    val relInSubquery = LocalRelation('x.int, 'y.int, 'z.int)
+
+    val query = relation.where('a.in(ListQuery(relInSubquery.select('x)))).select('a)
+
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = relation
+      .select('a)
+      .join(relInSubquery.select('x), LeftSemi, Some('a === 'x))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
index 3634accf1ec21..0d11958876ce9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -164,6 +165,12 @@ class ComplexTypesSuite extends PlanTest{
     comparePlans(Optimizer execute query, expected)
   }
 
+  test("SPARK-22570: CreateArray should not create a lot of global variables") {
+    val ctx = new CodegenContext
+    CreateArray(Seq(Literal(1))).genCode(ctx)
+    assert(ctx.inlinedMutableStates.length == 0)
+  }
+
   test("simplify map ops") {
     val rel = relation
       .select(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index d34a83c42c67e..812bfdd7bb885 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -276,7 +276,7 @@ class PlanParserSuite extends AnalysisTest {
     assertEqual(
       "select * from t lateral view explode(x) expl as x",
       table("t")
-        .generate(explode, join = true, outer = false, Some("expl"), Seq("x"))
+        .generate(explode, alias = Some("expl"), outputNames = Seq("x"))
         .select(star()))
 
     // Multiple lateral views
@@ -286,12 +286,12 @@ class PlanParserSuite extends AnalysisTest {
         |lateral view explode(x) expl
         |lateral view outer json_tuple(x, y) jtup q, z""".stripMargin,
       table("t")
-        .generate(explode, join = true, outer = false, Some("expl"), Seq.empty)
-        .generate(jsonTuple, join = true, outer = true, Some("jtup"), Seq("q", "z"))
+        .generate(explode, alias = Some("expl"))
+        .generate(jsonTuple, outer = true, alias = Some("jtup"), outputNames = Seq("q", "z"))
         .select(star()))
 
     // Multi-Insert lateral views.
-    val from = table("t1").generate(explode, join = true, outer = false, Some("expl"), Seq("x"))
+    val from = table("t1").generate(explode, alias = Some("expl"), outputNames = Seq("x"))
     assertEqual(
       """from t1
         |lateral view explode(x) expl as x
@@ -303,7 +303,7 @@ class PlanParserSuite extends AnalysisTest {
         |where s < 10
       """.stripMargin,
       Union(from
-        .generate(jsonTuple, join = true, outer = false, Some("jtup"), Seq("q", "z"))
+        .generate(jsonTuple, alias = Some("jtup"), outputNames = Seq("q", "z"))
         .select(star())
         .insertInto("t2"),
         from.where('s < 10).select(star()).insertInto("t3")))
@@ -312,10 +312,8 @@ class PlanParserSuite extends AnalysisTest {
     val expected = table("t")
       .generate(
         UnresolvedGenerator(FunctionIdentifier("posexplode"), Seq('x)),
-        join = true,
-        outer = false,
-        Some("posexpl"),
-        Seq("x", "y"))
+        alias = Some("posexpl"),
+        outputNames = Seq("x", "y"))
       .select(star())
     assertEqual(
       "select * from t lateral view posexplode(x) posexpl as x, y",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
index cdf912df7c76a..14041747fd20e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
@@ -23,8 +23,8 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types.IntegerType
 
 /**
- * This suite is used to test [[LogicalPlan]]'s `resolveOperators` and make sure it can correctly
- * skips sub-trees that have already been marked as analyzed.
+ * This suite is used to test [[LogicalPlan]]'s `transformUp/transformDown` plus analysis barrier
+ * and make sure it can correctly skip sub-trees that have already been analyzed.
  */
 class LogicalPlanSuite extends SparkFunSuite {
   private var invocationCount = 0
@@ -36,39 +36,53 @@ class LogicalPlanSuite extends SparkFunSuite {
 
   private val testRelation = LocalRelation()
 
-  test("resolveOperator runs on operators") {
+  test("transformUp runs on operators") {
     invocationCount = 0
     val plan = Project(Nil, testRelation)
-    plan resolveOperators function
+    plan transformUp function
 
     assert(invocationCount === 1)
+
+    invocationCount = 0
+    plan transformDown function
+    assert(invocationCount === 1)
   }
 
-  test("resolveOperator runs on operators recursively") {
+  test("transformUp runs on operators recursively") {
     invocationCount = 0
     val plan = Project(Nil, Project(Nil, testRelation))
-    plan resolveOperators function
+    plan transformUp function
 
     assert(invocationCount === 2)
+
+    invocationCount = 0
+    plan transformDown function
+    assert(invocationCount === 2)
   }
 
-  test("resolveOperator skips all ready resolved plans") {
+  test("transformUp skips all ready resolved plans wrapped in analysis barrier") {
     invocationCount = 0
-    val plan = Project(Nil, Project(Nil, testRelation))
-    plan.foreach(_.setAnalyzed())
-    plan resolveOperators function
+    val plan = AnalysisBarrier(Project(Nil, Project(Nil, testRelation)))
+    plan transformUp function
 
     assert(invocationCount === 0)
+
+    invocationCount = 0
+    plan transformDown function
+    assert(invocationCount === 0)
   }
 
-  test("resolveOperator skips partially resolved plans") {
+  test("transformUp skips partially resolved plans wrapped in analysis barrier") {
     invocationCount = 0
-    val plan1 = Project(Nil, testRelation)
+    val plan1 = AnalysisBarrier(Project(Nil, testRelation))
     val plan2 = Project(Nil, plan1)
-    plan1.foreach(_.setAnalyzed())
-    plan2 resolveOperators function
+    plan2 transformUp function
 
     assert(invocationCount === 1)
+
+    invocationCount = 0
+    plan2 transformDown function
+    assert(invocationCount === 1)
   }
 
   test("isStreaming") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
index 455037e6c9952..2b1fe987a7960 100755
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
@@ -22,7 +22,7 @@ import java.sql.Date
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.LeftOuter
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Filter, Join, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
@@ -33,7 +33,7 @@ import org.apache.spark.sql.types._
  */
 class FilterEstimationSuite extends StatsEstimationTestBase {
 
-  // Suppose our test table has 10 rows and 6 columns.
+  // Suppose our test table has 10 rows and 10 columns.
   // column cint has values: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
   // Hence, distinctCount:10, min:1, max:10, nullCount:0, avgLen:4, maxLen:4
   val attrInt = AttributeReference("cint", IntegerType)()
@@ -91,6 +91,26 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
   val colStatInt4 = ColumnStat(distinctCount = 10, min = Some(1), max = Some(10),
     nullCount = 0, avgLen = 4, maxLen = 4)
 
+  // column cintHgm has values: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 with histogram.
+  // Note that cintHgm has an even distribution with histogram information built.
+  // Hence, distinctCount:10, min:1, max:10, nullCount:0, avgLen:4, maxLen:4
+  val attrIntHgm = AttributeReference("cintHgm", IntegerType)()
+  val hgmInt = Histogram(2.0, Array(HistogramBin(1.0, 2.0, 2),
+    HistogramBin(2.0, 4.0, 2), HistogramBin(4.0, 6.0, 2),
+    HistogramBin(6.0, 8.0, 2), HistogramBin(8.0, 10.0, 2)))
+  val colStatIntHgm = ColumnStat(distinctCount = 10, min = Some(1), max = Some(10),
+    nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmInt))
+
+  // column cintSkewHgm has values: 1, 4, 4, 5, 5, 5, 5, 6, 6, 10 with histogram.
+  // Note that cintSkewHgm has a skewed distribution with histogram information built.
+  // distinctCount:5, min:1, max:10, nullCount:0, avgLen:4, maxLen:4
+  val attrIntSkewHgm = AttributeReference("cintSkewHgm", IntegerType)()
+  val hgmIntSkew = Histogram(2.0, Array(HistogramBin(1.0, 4.0, 2),
+    HistogramBin(4.0, 5.0, 2), HistogramBin(5.0, 5.0, 1),
+    HistogramBin(5.0, 6.0, 2), HistogramBin(6.0, 10.0, 2)))
+  val colStatIntSkewHgm = ColumnStat(distinctCount = 5, min = Some(1), max = Some(10),
+    nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmIntSkew))
+
   val attributeMap = AttributeMap(Seq(
     attrInt -> colStatInt,
     attrBool -> colStatBool,
@@ -100,7 +120,9 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     attrString -> colStatString,
     attrInt2 -> colStatInt2,
     attrInt3 -> colStatInt3,
-    attrInt4 -> colStatInt4
+    attrInt4 -> colStatInt4,
+    attrIntHgm -> colStatIntHgm,
+    attrIntSkewHgm -> colStatIntSkewHgm
   ))
 
   test("true") {
@@ -359,7 +381,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
   test("cbool > false") {
     validateEstimatedStats(
       Filter(GreaterThan(attrBool, Literal(false)), childStatsTestPlan(Seq(attrBool), 10L)),
-      Seq(attrBool -> ColumnStat(distinctCount = 1, min = Some(true), max = Some(true),
+      Seq(attrBool -> ColumnStat(distinctCount = 1, min = Some(false), max = Some(true),
         nullCount = 0, avgLen = 1, maxLen = 1)),
       expectedRowCount = 5)
   }
@@ -578,6 +600,193 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
       expectedRowCount = 5)
   }
 
+  // The following test cases have histogram information collected for the test column with
+  // an even distribution
+  test("Not(cintHgm < 3 AND null)") {
+    val condition = Not(And(LessThan(attrIntHgm, Literal(3)), Literal(null, IntegerType)))
+    validateEstimatedStats(
+      Filter(condition, childStatsTestPlan(Seq(attrIntHgm), 10L)),
+      Seq(attrIntHgm -> colStatIntHgm.copy(distinctCount = 7)),
+      expectedRowCount = 7)
+  }
+
+  test("cintHgm = 5") {
+    validateEstimatedStats(
+      Filter(EqualTo(attrIntHgm, Literal(5)), childStatsTestPlan(Seq(attrIntHgm), 10L)),
+      Seq(attrIntHgm -> ColumnStat(distinctCount = 1, min = Some(5), max = Some(5),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmInt))),
+      expectedRowCount = 1)
+  }
+
+  test("cintHgm = 0") {
+    // This is an out-of-range case since 0 is outside the range [min, max]
+    validateEstimatedStats(
+      Filter(EqualTo(attrIntHgm, Literal(0)), childStatsTestPlan(Seq(attrIntHgm), 10L)),
+      Nil,
+      expectedRowCount = 0)
+  }
+
+  test("cintHgm < 3") {
+    validateEstimatedStats(
+      Filter(LessThan(attrIntHgm, Literal(3)), childStatsTestPlan(Seq(attrIntHgm), 10L)),
+      Seq(attrIntHgm -> ColumnStat(distinctCount = 3, min = Some(1), max = Some(3),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmInt))),
+      expectedRowCount = 3)
+  }
+
+  test("cintHgm < 0") {
+    // This is a corner case since literal 0 is smaller than min.
+    validateEstimatedStats(
+      Filter(LessThan(attrIntHgm, Literal(0)), childStatsTestPlan(Seq(attrIntHgm), 10L)),
+      Nil,
+      expectedRowCount = 0)
+  }
+
+  test("cintHgm <= 3") {
+    validateEstimatedStats(
+      Filter(LessThanOrEqual(attrIntHgm, Literal(3)), childStatsTestPlan(Seq(attrIntHgm), 10L)),
+      Seq(attrIntHgm -> ColumnStat(distinctCount = 3, min = Some(1), max = Some(3),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmInt))),
+      expectedRowCount = 3)
+  }
+
+  test("cintHgm > 6") {
+    validateEstimatedStats(
+      Filter(GreaterThan(attrIntHgm, Literal(6)), childStatsTestPlan(Seq(attrIntHgm), 10L)),
+      Seq(attrIntHgm -> ColumnStat(distinctCount = 4, min = Some(6), max = Some(10),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmInt))),
+      expectedRowCount = 4)
+  }
+
+  test("cintHgm > 10") {
+    // This is a corner case since max value is 10.
+    validateEstimatedStats(
+      Filter(GreaterThan(attrIntHgm, Literal(10)), childStatsTestPlan(Seq(attrIntHgm), 10L)),
+      Nil,
+      expectedRowCount = 0)
+  }
+
+  test("cintHgm >= 6") {
+    validateEstimatedStats(
+      Filter(GreaterThanOrEqual(attrIntHgm, Literal(6)), childStatsTestPlan(Seq(attrIntHgm), 10L)),
+      Seq(attrIntHgm -> ColumnStat(distinctCount = 5, min = Some(6), max = Some(10),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmInt))),
+      expectedRowCount = 5)
+  }
+
+  test("cintHgm > 3 AND cintHgm <= 6") {
+    val condition = And(GreaterThan(attrIntHgm,
+      Literal(3)), LessThanOrEqual(attrIntHgm, Literal(6)))
+    validateEstimatedStats(
+      Filter(condition, childStatsTestPlan(Seq(attrIntHgm), 10L)),
+      Seq(attrIntHgm -> ColumnStat(distinctCount = 4, min = Some(3), max = Some(6),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmInt))),
+      expectedRowCount = 4)
+  }
+
+  test("cintHgm = 3 OR cintHgm = 6") {
+    val condition = Or(EqualTo(attrIntHgm, Literal(3)), EqualTo(attrIntHgm, Literal(6)))
+    validateEstimatedStats(
+      Filter(condition, childStatsTestPlan(Seq(attrIntHgm), 10L)),
+      Seq(attrIntHgm -> colStatIntHgm.copy(distinctCount = 3)),
+      expectedRowCount = 3)
+  }
+
+  // The following test cases have histogram information collected for the test column with
+  // a skewed distribution.
+  test("Not(cintSkewHgm < 3 AND null)") {
+    val condition = Not(And(LessThan(attrIntSkewHgm, Literal(3)), Literal(null, IntegerType)))
+    validateEstimatedStats(
+      Filter(condition, childStatsTestPlan(Seq(attrIntSkewHgm), 10L)),
+      Seq(attrIntSkewHgm -> colStatIntSkewHgm.copy(distinctCount = 5)),
+      expectedRowCount = 9)
+  }
+
+  test("cintSkewHgm = 5") {
+    validateEstimatedStats(
+      Filter(EqualTo(attrIntSkewHgm, Literal(5)), childStatsTestPlan(Seq(attrIntSkewHgm), 10L)),
+      Seq(attrIntSkewHgm -> ColumnStat(distinctCount = 1, min = Some(5), max = Some(5),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmIntSkew))),
+      expectedRowCount = 4)
+  }
+
+  test("cintSkewHgm = 0") {
+    // This is an out-of-range case since 0 is outside the range [min, max]
+    validateEstimatedStats(
+      Filter(EqualTo(attrIntSkewHgm, Literal(0)), childStatsTestPlan(Seq(attrIntSkewHgm), 10L)),
+      Nil,
+      expectedRowCount = 0)
+  }
+
+  test("cintSkewHgm < 3") {
+    validateEstimatedStats(
+      Filter(LessThan(attrIntSkewHgm, Literal(3)), childStatsTestPlan(Seq(attrIntSkewHgm), 10L)),
+      Seq(attrIntSkewHgm -> ColumnStat(distinctCount = 1, min = Some(1), max = Some(3),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmIntSkew))),
+      expectedRowCount = 2)
+  }
+
+  test("cintSkewHgm < 0") {
+    // This is a corner case since literal 0 is smaller than min.
+    validateEstimatedStats(
+      Filter(LessThan(attrIntSkewHgm, Literal(0)), childStatsTestPlan(Seq(attrIntSkewHgm), 10L)),
+      Nil,
+      expectedRowCount = 0)
+  }
+
+  test("cintSkewHgm <= 3") {
+    validateEstimatedStats(
+      Filter(LessThanOrEqual(attrIntSkewHgm, Literal(3)),
+        childStatsTestPlan(Seq(attrIntSkewHgm), 10L)),
+      Seq(attrIntSkewHgm -> ColumnStat(distinctCount = 1, min = Some(1), max = Some(3),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmIntSkew))),
+      expectedRowCount = 2)
+  }
+
+  test("cintSkewHgm > 6") {
+    validateEstimatedStats(
+      Filter(GreaterThan(attrIntSkewHgm, Literal(6)), childStatsTestPlan(Seq(attrIntSkewHgm), 10L)),
+      Seq(attrIntSkewHgm -> ColumnStat(distinctCount = 1, min = Some(6), max = Some(10),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmIntSkew))),
+      expectedRowCount = 2)
+  }
+
+  test("cintSkewHgm > 10") {
+    // This is a corner case since max value is 10.
+    validateEstimatedStats(
+      Filter(GreaterThan(attrIntSkewHgm, Literal(10)),
+        childStatsTestPlan(Seq(attrIntSkewHgm), 10L)),
+      Nil,
+      expectedRowCount = 0)
+  }
+
+  test("cintSkewHgm >= 6") {
+    validateEstimatedStats(
+      Filter(GreaterThanOrEqual(attrIntSkewHgm, Literal(6)),
+        childStatsTestPlan(Seq(attrIntSkewHgm), 10L)),
+      Seq(attrIntSkewHgm -> ColumnStat(distinctCount = 2, min = Some(6), max = Some(10),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmIntSkew))),
+      expectedRowCount = 3)
+  }
+
+  test("cintSkewHgm > 3 AND cintSkewHgm <= 6") {
+    val condition = And(GreaterThan(attrIntSkewHgm,
+      Literal(3)), LessThanOrEqual(attrIntSkewHgm, Literal(6)))
+    validateEstimatedStats(
+      Filter(condition, childStatsTestPlan(Seq(attrIntSkewHgm), 10L)),
+      Seq(attrIntSkewHgm -> ColumnStat(distinctCount = 4, min = Some(3), max = Some(6),
+        nullCount = 0, avgLen = 4, maxLen = 4, histogram = Some(hgmIntSkew))),
+      expectedRowCount = 8)
+  }
+
+  test("cintSkewHgm = 3 OR cintSkewHgm = 6") {
+    val condition = Or(EqualTo(attrIntSkewHgm, Literal(3)), EqualTo(attrIntSkewHgm, Literal(6)))
+    validateEstimatedStats(
+      Filter(condition, childStatsTestPlan(Seq(attrIntSkewHgm), 10L)),
+      Seq(attrIntSkewHgm -> colStatIntSkewHgm.copy(distinctCount = 2)),
+      expectedRowCount = 3)
+  }
+
   private def childStatsTestPlan(outList: Seq[Attribute], tableRowCount: BigInt): StatsTestPlan = {
     StatsTestPlan(
       outputList = outList,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala
index 097c78eb27fca..26139d85d25fb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeMap, AttributeReference, EqualTo}
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Join, Project, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types.{DateType, TimestampType, _}
@@ -67,6 +67,213 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
     rowCount = 2,
     attributeStats = AttributeMap(Seq("key-1-2", "key-2-3").map(nameToColInfo)))
 
+  private def estimateByHistogram(
+      leftHistogram: Histogram,
+      rightHistogram: Histogram,
+      expectedMin: Double,
+      expectedMax: Double,
+      expectedNdv: Long,
+      expectedRows: Long): Unit = {
+    val col1 = attr("key1")
+    val col2 = attr("key2")
+    val c1 = generateJoinChild(col1, leftHistogram, expectedMin, expectedMax)
+    val c2 = generateJoinChild(col2, rightHistogram, expectedMin, expectedMax)
+
+    val c1JoinC2 = Join(c1, c2, Inner, Some(EqualTo(col1, col2)))
+    val c2JoinC1 = Join(c2, c1, Inner, Some(EqualTo(col2, col1)))
+    val expectedStatsAfterJoin = Statistics(
+      sizeInBytes = expectedRows * (8 + 2 * 4),
+      rowCount = Some(expectedRows),
+      attributeStats = AttributeMap(Seq(
+        col1 -> c1.stats.attributeStats(col1).copy(
+          distinctCount = expectedNdv, min = Some(expectedMin), max = Some(expectedMax)),
+        col2 -> c2.stats.attributeStats(col2).copy(
+          distinctCount = expectedNdv, min = Some(expectedMin), max = Some(expectedMax))))
+    )
+
+    // Join order should not affect estimation result.
+    Seq(c1JoinC2, c2JoinC1).foreach { join =>
+      assert(join.stats == expectedStatsAfterJoin)
+    }
+  }
+
+  private def generateJoinChild(
+      col: Attribute,
+      histogram: Histogram,
+      expectedMin: Double,
+      expectedMax: Double): LogicalPlan = {
+    val colStat = inferColumnStat(histogram)
+    StatsTestPlan(
+      outputList = Seq(col),
+      rowCount = (histogram.height * histogram.bins.length).toLong,
+      attributeStats = AttributeMap(Seq(col -> colStat)))
+  }
+
+  /** Column statistics should be consistent with histograms in tests. */
+  private def inferColumnStat(histogram: Histogram): ColumnStat = {
+    var ndv = 0L
+    for (i <- histogram.bins.indices) {
+      val bin = histogram.bins(i)
+      if (i == 0 || bin.hi != histogram.bins(i - 1).hi) {
+        ndv += bin.ndv
+      }
+    }
+    ColumnStat(distinctCount = ndv, min = Some(histogram.bins.head.lo),
+      max = Some(histogram.bins.last.hi), nullCount = 0, avgLen = 4, maxLen = 4,
+      histogram = Some(histogram))
+  }
+
+  test("equi-height histograms: a bin is contained by another one") {
+    val histogram1 = Histogram(height = 300, Array(
+      HistogramBin(lo = 10, hi = 30, ndv = 10), HistogramBin(lo = 30, hi = 60, ndv = 30)))
+    val histogram2 = Histogram(height = 100, Array(
+      HistogramBin(lo = 0, hi = 50, ndv = 50), HistogramBin(lo = 50, hi = 100, ndv = 40)))
+    // test bin trimming
+    val (t0, h0) = trimBin(histogram2.bins(0), height = 100, lowerBound = 10, upperBound = 60)
+    assert(t0 == HistogramBin(lo = 10, hi = 50, ndv = 40) && h0 == 80)
+    val (t1, h1) = trimBin(histogram2.bins(1), height = 100, lowerBound = 10, upperBound = 60)
+    assert(t1 == HistogramBin(lo = 50, hi = 60, ndv = 8) && h1 == 20)
+
+    val expectedRanges = Seq(
+      // histogram1.bins(0) overlaps t0
+      OverlappedRange(10, 30, 10, 40 * 1 / 2, 300, 80 * 1 / 2),
+      // histogram1.bins(1) overlaps t0
+      OverlappedRange(30, 50, 30 * 2 / 3, 40 * 1 / 2, 300 * 2 / 3, 80 * 1 / 2),
+      // histogram1.bins(1) overlaps t1
+      OverlappedRange(50, 60, 30 * 1 / 3, 8, 300 * 1 / 3, 20)
+    )
+    assert(expectedRanges.equals(
+      getOverlappedRanges(histogram1, histogram2, lowerBound = 10, upperBound = 60)))
+
+    estimateByHistogram(
+      leftHistogram = histogram1,
+      rightHistogram = histogram2,
+      expectedMin = 10,
+      expectedMax = 60,
+      expectedNdv = 10 + 20 + 8,
+      expectedRows = 300 * 40 / 20 + 200 * 40 / 20 + 100 * 20 / 10)
+  }
+
+  test("equi-height histograms: a bin has only one value after trimming") {
+    val histogram1 = Histogram(height = 300, Array(
+      HistogramBin(lo = 50, hi = 60, ndv = 10), HistogramBin(lo = 60, hi = 75, ndv = 3)))
+    val histogram2 = Histogram(height = 100, Array(
+      HistogramBin(lo = 0, hi = 50, ndv = 50), HistogramBin(lo = 50, hi = 100, ndv = 40)))
+    // test bin trimming
+    val (t0, h0) = trimBin(histogram2.bins(0), height = 100, lowerBound = 50, upperBound = 75)
+    assert(t0 == HistogramBin(lo = 50, hi = 50, ndv = 1) && h0 == 2)
+    val (t1, h1) = trimBin(histogram2.bins(1), height = 100, lowerBound = 50, upperBound = 75)
+    assert(t1 == HistogramBin(lo = 50, hi = 75, ndv = 20) && h1 == 50)
+
+    val expectedRanges = Seq(
+      // histogram1.bins(0) overlaps t0
+      OverlappedRange(50, 50, 1, 1, 300 / 10, 2),
+      // histogram1.bins(0) overlaps t1
+      OverlappedRange(50, 60, 10, 20 * 10 / 25, 300, 50 * 10 / 25),
+      // histogram1.bins(1) overlaps t1
+      OverlappedRange(60, 75, 3, 20 * 15 / 25, 300, 50 * 15 / 25)
+    )
+    assert(expectedRanges.equals(
+      getOverlappedRanges(histogram1, histogram2, lowerBound = 50, upperBound = 75)))
+
+    estimateByHistogram(
+      leftHistogram = histogram1,
+      rightHistogram = histogram2,
+      expectedMin = 50,
+      expectedMax = 75,
+      expectedNdv = 1 + 8 + 3,
+      expectedRows = 30 * 2 / 1 + 300 * 20 / 10 + 300 * 30 / 12)
+  }
+
+  test("equi-height histograms: skew distribution (some bins have only one value)") {
+    val histogram1 = Histogram(height = 300, Array(
+      HistogramBin(lo = 30, hi = 30, ndv = 1),
+      HistogramBin(lo = 30, hi = 30, ndv = 1),
+      HistogramBin(lo = 30, hi = 60, ndv = 30)))
+    val histogram2 = Histogram(height = 100, Array(
+      HistogramBin(lo = 0, hi = 50, ndv = 50), HistogramBin(lo = 50, hi = 100, ndv = 40)))
+    // test bin trimming
+    val (t0, h0) = trimBin(histogram2.bins(0), height = 100, lowerBound = 30, upperBound = 60)
+    assert(t0 == HistogramBin(lo = 30, hi = 50, ndv = 20) && h0 == 40)
+    val (t1, h1) = trimBin(histogram2.bins(1), height = 100, lowerBound = 30, upperBound = 60)
+    assert(t1 ==HistogramBin(lo = 50, hi = 60, ndv = 8) && h1 == 20)
+
+    val expectedRanges = Seq(
+      OverlappedRange(30, 30, 1, 1, 300, 40 / 20),
+      OverlappedRange(30, 30, 1, 1, 300, 40 / 20),
+      OverlappedRange(30, 50, 30 * 2 / 3, 20, 300 * 2 / 3, 40),
+      OverlappedRange(50, 60, 30 * 1 / 3, 8, 300 * 1 / 3, 20)
+    )
+    assert(expectedRanges.equals(
+      getOverlappedRanges(histogram1, histogram2, lowerBound = 30, upperBound = 60)))
+
+    estimateByHistogram(
+      leftHistogram = histogram1,
+      rightHistogram = histogram2,
+      expectedMin = 30,
+      expectedMax = 60,
+      expectedNdv = 1 + 20 + 8,
+      expectedRows = 300 * 2 / 1 + 300 * 2 / 1 + 200 * 40 / 20 + 100 * 20 / 10)
+  }
+
+  test("equi-height histograms: skew distribution (histograms have different skewed values") {
+    val histogram1 = Histogram(height = 300, Array(
+      HistogramBin(lo = 30, hi = 30, ndv = 1), HistogramBin(lo = 30, hi = 60, ndv = 30)))
+    val histogram2 = Histogram(height = 100, Array(
+      HistogramBin(lo = 0, hi = 50, ndv = 50), HistogramBin(lo = 50, hi = 50, ndv = 1)))
+    // test bin trimming
+    val (t0, h0) = trimBin(histogram1.bins(1), height = 300, lowerBound = 30, upperBound = 50)
+    assert(t0 == HistogramBin(lo = 30, hi = 50, ndv = 20) && h0 == 200)
+    val (t1, h1) = trimBin(histogram2.bins(0), height = 100, lowerBound = 30, upperBound = 50)
+    assert(t1 == HistogramBin(lo = 30, hi = 50, ndv = 20) && h1 == 40)
+
+    val expectedRanges = Seq(
+      OverlappedRange(30, 30, 1, 1, 300, 40 / 20),
+      OverlappedRange(30, 50, 20, 20, 200, 40),
+      OverlappedRange(50, 50, 1, 1, 200 / 20, 100)
+    )
+    assert(expectedRanges.equals(
+      getOverlappedRanges(histogram1, histogram2, lowerBound = 30, upperBound = 50)))
+
+    estimateByHistogram(
+      leftHistogram = histogram1,
+      rightHistogram = histogram2,
+      expectedMin = 30,
+      expectedMax = 50,
+      expectedNdv = 1 + 20,
+      expectedRows = 300 * 2 / 1 + 200 * 40 / 20 + 10 * 100 / 1)
+  }
+
+  test("equi-height histograms: skew distribution (both histograms have the same skewed value") {
+    val histogram1 = Histogram(height = 300, Array(
+      HistogramBin(lo = 30, hi = 30, ndv = 1), HistogramBin(lo = 30, hi = 60, ndv = 30)))
+    val histogram2 = Histogram(height = 150, Array(
+      HistogramBin(lo = 0, hi = 30, ndv = 30), HistogramBin(lo = 30, hi = 30, ndv = 1)))
+    // test bin trimming
+    val (t0, h0) = trimBin(histogram1.bins(1), height = 300, lowerBound = 30, upperBound = 30)
+    assert(t0 == HistogramBin(lo = 30, hi = 30, ndv = 1) && h0 == 10)
+    val (t1, h1) = trimBin(histogram2.bins(0), height = 150, lowerBound = 30, upperBound = 30)
+    assert(t1 == HistogramBin(lo = 30, hi = 30, ndv = 1) && h1 == 5)
+
+    val expectedRanges = Seq(
+      OverlappedRange(30, 30, 1, 1, 300, 5),
+      OverlappedRange(30, 30, 1, 1, 300, 150),
+      OverlappedRange(30, 30, 1, 1, 10, 5),
+      OverlappedRange(30, 30, 1, 1, 10, 150)
+    )
+    assert(expectedRanges.equals(
+      getOverlappedRanges(histogram1, histogram2, lowerBound = 30, upperBound = 30)))
+
+    estimateByHistogram(
+      leftHistogram = histogram1,
+      rightHistogram = histogram2,
+      expectedMin = 30,
+      expectedMax = 30,
+      // only one value: 30
+      expectedNdv = 1,
+      expectedRows = 300 * 5 / 1 + 300 * 150 / 1 + 10 * 5 / 1 + 10 * 150 / 1)
+  }
+
   test("cross join") {
     // table1 (key-1-5 int, key-5-9 int): (1, 9), (2, 8), (3, 7), (4, 6), (5, 5)
     // table2 (key-1-2 int, key-2-4 int): (1, 2), (2, 3), (2, 4)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index c8cf16d937352..625ff38943fa3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -563,6 +563,76 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     }
   }
 
+  test("truncTimestamp") {
+    def testTrunc(
+        level: Int,
+        expected: String,
+        inputTS: SQLTimestamp,
+        timezone: TimeZone = DateTimeUtils.defaultTimeZone()): Unit = {
+      val truncated =
+        DateTimeUtils.truncTimestamp(inputTS, level, timezone)
+      val expectedTS =
+        DateTimeUtils.stringToTimestamp(UTF8String.fromString(expected))
+      assert(truncated === expectedTS.get)
+    }
+
+    val defaultInputTS =
+      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-05T09:32:05.359"))
+    val defaultInputTS1 =
+      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-31T20:32:05.359"))
+    val defaultInputTS2 =
+      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-04-01T02:32:05.359"))
+    val defaultInputTS3 =
+      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-30T02:32:05.359"))
+    val defaultInputTS4 =
+      DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-29T02:32:05.359"))
+
+    testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", defaultInputTS.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", defaultInputTS.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_DAY, "2015-03-05T00:00:00", defaultInputTS.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_HOUR, "2015-03-05T09:00:00", defaultInputTS.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_MINUTE, "2015-03-05T09:32:00", defaultInputTS.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_SECOND, "2015-03-05T09:32:05", defaultInputTS.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-02T00:00:00", defaultInputTS.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS1.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS2.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS3.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-23T00:00:00", defaultInputTS4.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", defaultInputTS.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", defaultInputTS1.get)
+    testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", defaultInputTS2.get)
+
+    for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
+      DateTimeTestUtils.withDefaultTimeZone(tz) {
+        val inputTS =
+          DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-05T09:32:05.359"))
+        val inputTS1 =
+          DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-31T20:32:05.359"))
+        val inputTS2 =
+          DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-04-01T02:32:05.359"))
+        val inputTS3 =
+          DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-30T02:32:05.359"))
+        val inputTS4 =
+          DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-29T02:32:05.359"))
+
+        testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", inputTS.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", inputTS.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_DAY, "2015-03-05T00:00:00", inputTS.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_HOUR, "2015-03-05T09:00:00", inputTS.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_MINUTE, "2015-03-05T09:32:00", inputTS.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_SECOND, "2015-03-05T09:32:05", inputTS.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-02T00:00:00", inputTS.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS1.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS2.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS3.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-23T00:00:00", inputTS4.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS1.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", inputTS2.get, tz)
+      }
+    }
+  }
+
   test("daysToMillis and millisToDays") {
     val c = Calendar.getInstance(TimeZonePST)
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 4db3fea008ee9..ef41837f89d68 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -38,7 +38,7 @@
     <dependency>
       <groupId>com.univocity</groupId>
       <artifactId>univocity-parsers</artifactId>
-      <version>2.5.4</version>
+      <version>2.5.9</version>
       <type>jar</type>
     </dependency>
     <dependency>
@@ -195,7 +195,7 @@
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
         <configuration>
-          <argLine>-ea -Xmx4g -Xss4m -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+          <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
         </configuration>
       </plugin>
       <plugin>
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
new file mode 100644
index 0000000000000..b6e792274da11
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc;
+
+import java.math.BigDecimal;
+
+import org.apache.orc.storage.ql.exec.vector.*;
+
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.sql.types.TimestampType;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/**
+ * A column vector class wrapping Hive's ColumnVector. Because Spark ColumnarBatch only accepts
+ * Spark's vectorized.ColumnVector, this column vector is used to adapt Hive ColumnVector with
+ * Spark ColumnarVector.
+ */
+public class OrcColumnVector extends org.apache.spark.sql.vectorized.ColumnVector {
+  private ColumnVector baseData;
+  private LongColumnVector longData;
+  private DoubleColumnVector doubleData;
+  private BytesColumnVector bytesData;
+  private DecimalColumnVector decimalData;
+  private TimestampColumnVector timestampData;
+  private final boolean isTimestamp;
+
+  private int batchSize;
+
+  OrcColumnVector(DataType type, ColumnVector vector) {
+    super(type);
+
+    if (type instanceof TimestampType) {
+      isTimestamp = true;
+    } else {
+      isTimestamp = false;
+    }
+
+    baseData = vector;
+    if (vector instanceof LongColumnVector) {
+      longData = (LongColumnVector) vector;
+    } else if (vector instanceof DoubleColumnVector) {
+      doubleData = (DoubleColumnVector) vector;
+    } else if (vector instanceof BytesColumnVector) {
+      bytesData = (BytesColumnVector) vector;
+    } else if (vector instanceof DecimalColumnVector) {
+      decimalData = (DecimalColumnVector) vector;
+    } else if (vector instanceof TimestampColumnVector) {
+      timestampData = (TimestampColumnVector) vector;
+    } else {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  public void setBatchSize(int batchSize) {
+    this.batchSize = batchSize;
+  }
+
+  @Override
+  public void close() {
+
+  }
+
+  @Override
+  public int numNulls() {
+    if (baseData.isRepeating) {
+      if (baseData.isNull[0]) {
+        return batchSize;
+      } else {
+        return 0;
+      }
+    } else if (baseData.noNulls) {
+      return 0;
+    } else {
+      int count = 0;
+      for (int i = 0; i < batchSize; i++) {
+        if (baseData.isNull[i]) count++;
+      }
+      return count;
+    }
+  }
+
+  /* A helper method to get the row index in a column. */
+  private int getRowIndex(int rowId) {
+    return baseData.isRepeating ? 0 : rowId;
+  }
+
+  @Override
+  public boolean isNullAt(int rowId) {
+    return baseData.isNull[getRowIndex(rowId)];
+  }
+
+  @Override
+  public boolean getBoolean(int rowId) {
+    return longData.vector[getRowIndex(rowId)] == 1;
+  }
+
+  @Override
+  public boolean[] getBooleans(int rowId, int count) {
+    boolean[] res = new boolean[count];
+    for (int i = 0; i < count; i++) {
+      res[i] = getBoolean(rowId + i);
+    }
+    return res;
+  }
+
+  @Override
+  public byte getByte(int rowId) {
+    return (byte) longData.vector[getRowIndex(rowId)];
+  }
+
+  @Override
+  public byte[] getBytes(int rowId, int count) {
+    byte[] res = new byte[count];
+    for (int i = 0; i < count; i++) {
+      res[i] = getByte(rowId + i);
+    }
+    return res;
+  }
+
+  @Override
+  public short getShort(int rowId) {
+    return (short) longData.vector[getRowIndex(rowId)];
+  }
+
+  @Override
+  public short[] getShorts(int rowId, int count) {
+    short[] res = new short[count];
+    for (int i = 0; i < count; i++) {
+      res[i] = getShort(rowId + i);
+    }
+    return res;
+  }
+
+  @Override
+  public int getInt(int rowId) {
+    return (int) longData.vector[getRowIndex(rowId)];
+  }
+
+  @Override
+  public int[] getInts(int rowId, int count) {
+    int[] res = new int[count];
+    for (int i = 0; i < count; i++) {
+      res[i] = getInt(rowId + i);
+    }
+    return res;
+  }
+
+  @Override
+  public long getLong(int rowId) {
+    int index = getRowIndex(rowId);
+    if (isTimestamp) {
+      return timestampData.time[index] * 1000 + timestampData.nanos[index] / 1000;
+    } else {
+      return longData.vector[index];
+    }
+  }
+
+  @Override
+  public long[] getLongs(int rowId, int count) {
+    long[] res = new long[count];
+    for (int i = 0; i < count; i++) {
+      res[i] = getLong(rowId + i);
+    }
+    return res;
+  }
+
+  @Override
+  public float getFloat(int rowId) {
+    return (float) doubleData.vector[getRowIndex(rowId)];
+  }
+
+  @Override
+  public float[] getFloats(int rowId, int count) {
+    float[] res = new float[count];
+    for (int i = 0; i < count; i++) {
+      res[i] = getFloat(rowId + i);
+    }
+    return res;
+  }
+
+  @Override
+  public double getDouble(int rowId) {
+    return doubleData.vector[getRowIndex(rowId)];
+  }
+
+  @Override
+  public double[] getDoubles(int rowId, int count) {
+    double[] res = new double[count];
+    for (int i = 0; i < count; i++) {
+      res[i] = getDouble(rowId + i);
+    }
+    return res;
+  }
+
+  @Override
+  public int getArrayLength(int rowId) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public int getArrayOffset(int rowId) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Decimal getDecimal(int rowId, int precision, int scale) {
+    BigDecimal data = decimalData.vector[getRowIndex(rowId)].getHiveDecimal().bigDecimalValue();
+    return Decimal.apply(data, precision, scale);
+  }
+
+  @Override
+  public UTF8String getUTF8String(int rowId) {
+    int index = getRowIndex(rowId);
+    BytesColumnVector col = bytesData;
+    return UTF8String.fromBytes(col.vector[index], col.start[index], col.length[index]);
+  }
+
+  @Override
+  public byte[] getBinary(int rowId) {
+    int index = getRowIndex(rowId);
+    byte[] binary = new byte[bytesData.length[index]];
+    System.arraycopy(bytesData.vector[index], bytesData.start[index], binary, 0, binary.length);
+    return binary;
+  }
+
+  @Override
+  public org.apache.spark.sql.vectorized.ColumnVector arrayData() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public org.apache.spark.sql.vectorized.ColumnVector getChildColumn(int ordinal) {
+    throw new UnsupportedOperationException();
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
new file mode 100644
index 0000000000000..36fdf2bdf84d2
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
@@ -0,0 +1,571 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc;
+
+import java.io.IOException;
+import java.util.stream.IntStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.orc.OrcConf;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.mapred.OrcInputFormat;
+import org.apache.orc.storage.common.type.HiveDecimal;
+import org.apache.orc.storage.ql.exec.vector.*;
+import org.apache.orc.storage.serde2.io.HiveDecimalWritable;
+
+import org.apache.spark.memory.MemoryMode;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils;
+import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector;
+import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
+import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
+import org.apache.spark.sql.types.*;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+
+
+/**
+ * To support vectorization in WholeStageCodeGen, this reader returns ColumnarBatch.
+ * After creating, `initialize` and `initBatch` should be called sequentially.
+ */
+public class OrcColumnarBatchReader extends RecordReader<Void, ColumnarBatch> {
+
+  /**
+   * The default size of batch. We use this value for ORC reader to make it consistent with Spark's
+   * columnar batch, because their default batch sizes are different like the following:
+   *
+   * - ORC's VectorizedRowBatch.DEFAULT_SIZE = 1024
+   * - Spark's ColumnarBatch.DEFAULT_BATCH_SIZE = 4 * 1024
+   */
+  private static final int DEFAULT_SIZE = 4 * 1024;
+
+  // ORC File Reader
+  private Reader reader;
+
+  // Vectorized ORC Row Batch
+  private VectorizedRowBatch batch;
+
+  /**
+   * The column IDs of the physical ORC file schema which are required by this reader.
+   * -1 means this required column doesn't exist in the ORC file.
+   */
+  private int[] requestedColIds;
+
+  // Record reader from ORC row batch.
+  private org.apache.orc.RecordReader recordReader;
+
+  private StructField[] requiredFields;
+
+  // The result columnar batch for vectorized execution by whole-stage codegen.
+  private ColumnarBatch columnarBatch;
+
+  // Writable column vectors of the result columnar batch.
+  private WritableColumnVector[] columnVectors;
+
+  // The wrapped ORC column vectors. It should be null if `copyToSpark` is true.
+  private org.apache.spark.sql.vectorized.ColumnVector[] orcVectorWrappers;
+
+  // The memory mode of the columnarBatch
+  private final MemoryMode MEMORY_MODE;
+
+  // Whether or not to copy the ORC columnar batch to Spark columnar batch.
+  private final boolean copyToSpark;
+
+  public OrcColumnarBatchReader(boolean useOffHeap, boolean copyToSpark) {
+    MEMORY_MODE = useOffHeap ? MemoryMode.OFF_HEAP : MemoryMode.ON_HEAP;
+    this.copyToSpark = copyToSpark;
+  }
+
+
+  @Override
+  public Void getCurrentKey() throws IOException, InterruptedException {
+    return null;
+  }
+
+  @Override
+  public ColumnarBatch getCurrentValue() throws IOException, InterruptedException {
+    return columnarBatch;
+  }
+
+  @Override
+  public float getProgress() throws IOException, InterruptedException {
+    return recordReader.getProgress();
+  }
+
+  @Override
+  public boolean nextKeyValue() throws IOException, InterruptedException {
+    return nextBatch();
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (columnarBatch != null) {
+      columnarBatch.close();
+      columnarBatch = null;
+    }
+    if (recordReader != null) {
+      recordReader.close();
+      recordReader = null;
+    }
+  }
+
+  /**
+   * Initialize ORC file reader and batch record reader.
+   * Please note that `initBatch` is needed to be called after this.
+   */
+  @Override
+  public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
+      throws IOException, InterruptedException {
+    FileSplit fileSplit = (FileSplit)inputSplit;
+    Configuration conf = taskAttemptContext.getConfiguration();
+    reader = OrcFile.createReader(
+      fileSplit.getPath(),
+      OrcFile.readerOptions(conf)
+        .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))
+        .filesystem(fileSplit.getPath().getFileSystem(conf)));
+
+    Reader.Options options =
+      OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength());
+    recordReader = reader.rows(options);
+  }
+
+  /**
+   * Initialize columnar batch by setting required schema and partition information.
+   * With this information, this creates ColumnarBatch with the full schema.
+   */
+  public void initBatch(
+      TypeDescription orcSchema,
+      int[] requestedColIds,
+      StructField[] requiredFields,
+      StructType partitionSchema,
+      InternalRow partitionValues) {
+    batch = orcSchema.createRowBatch(DEFAULT_SIZE);
+    assert(!batch.selectedInUse); // `selectedInUse` should be initialized with `false`.
+
+    this.requiredFields = requiredFields;
+    this.requestedColIds = requestedColIds;
+    assert(requiredFields.length == requestedColIds.length);
+
+    StructType resultSchema = new StructType(requiredFields);
+    for (StructField f : partitionSchema.fields()) {
+      resultSchema = resultSchema.add(f);
+    }
+
+    int capacity = DEFAULT_SIZE;
+
+    if (copyToSpark) {
+      if (MEMORY_MODE == MemoryMode.OFF_HEAP) {
+        columnVectors = OffHeapColumnVector.allocateColumns(capacity, resultSchema);
+      } else {
+        columnVectors = OnHeapColumnVector.allocateColumns(capacity, resultSchema);
+      }
+
+      // Initialize the missing columns once.
+      for (int i = 0; i < requiredFields.length; i++) {
+        if (requestedColIds[i] == -1) {
+          columnVectors[i].putNulls(0, capacity);
+          columnVectors[i].setIsConstant();
+        }
+      }
+
+      if (partitionValues.numFields() > 0) {
+        int partitionIdx = requiredFields.length;
+        for (int i = 0; i < partitionValues.numFields(); i++) {
+          ColumnVectorUtils.populate(columnVectors[i + partitionIdx], partitionValues, i);
+          columnVectors[i + partitionIdx].setIsConstant();
+        }
+      }
+
+      columnarBatch = new ColumnarBatch(resultSchema, columnVectors, capacity);
+    } else {
+      // Just wrap the ORC column vector instead of copying it to Spark column vector.
+      orcVectorWrappers = new org.apache.spark.sql.vectorized.ColumnVector[resultSchema.length()];
+
+      for (int i = 0; i < requiredFields.length; i++) {
+        DataType dt = requiredFields[i].dataType();
+        int colId = requestedColIds[i];
+        // Initialize the missing columns once.
+        if (colId == -1) {
+          OnHeapColumnVector missingCol = new OnHeapColumnVector(capacity, dt);
+          missingCol.putNulls(0, capacity);
+          missingCol.setIsConstant();
+          orcVectorWrappers[i] = missingCol;
+        } else {
+          orcVectorWrappers[i] = new OrcColumnVector(dt, batch.cols[colId]);
+        }
+      }
+
+      if (partitionValues.numFields() > 0) {
+        int partitionIdx = requiredFields.length;
+        for (int i = 0; i < partitionValues.numFields(); i++) {
+          DataType dt = partitionSchema.fields()[i].dataType();
+          OnHeapColumnVector partitionCol = new OnHeapColumnVector(capacity, dt);
+          ColumnVectorUtils.populate(partitionCol, partitionValues, i);
+          partitionCol.setIsConstant();
+          orcVectorWrappers[partitionIdx + i] = partitionCol;
+        }
+      }
+
+      columnarBatch = new ColumnarBatch(resultSchema, orcVectorWrappers, capacity);
+    }
+  }
+
+  /**
+   * Return true if there exists more data in the next batch. If exists, prepare the next batch
+   * by copying from ORC VectorizedRowBatch columns to Spark ColumnarBatch columns.
+   */
+  private boolean nextBatch() throws IOException {
+    recordReader.nextBatch(batch);
+    int batchSize = batch.size;
+    if (batchSize == 0) {
+      return false;
+    }
+    columnarBatch.setNumRows(batchSize);
+
+    if (!copyToSpark) {
+      for (int i = 0; i < requiredFields.length; i++) {
+        if (requestedColIds[i] != -1) {
+          ((OrcColumnVector) orcVectorWrappers[i]).setBatchSize(batchSize);
+        }
+      }
+      return true;
+    }
+
+    for (WritableColumnVector vector : columnVectors) {
+      vector.reset();
+    }
+
+    for (int i = 0; i < requiredFields.length; i++) {
+      StructField field = requiredFields[i];
+      WritableColumnVector toColumn = columnVectors[i];
+
+      if (requestedColIds[i] >= 0) {
+        ColumnVector fromColumn = batch.cols[requestedColIds[i]];
+
+        if (fromColumn.isRepeating) {
+          putRepeatingValues(batchSize, field, fromColumn, toColumn);
+        } else if (fromColumn.noNulls) {
+          putNonNullValues(batchSize, field, fromColumn, toColumn);
+        } else {
+          putValues(batchSize, field, fromColumn, toColumn);
+        }
+      }
+    }
+    return true;
+  }
+
+  private void putRepeatingValues(
+      int batchSize,
+      StructField field,
+      ColumnVector fromColumn,
+      WritableColumnVector toColumn) {
+    if (fromColumn.isNull[0]) {
+      toColumn.putNulls(0, batchSize);
+    } else {
+      DataType type = field.dataType();
+      if (type instanceof BooleanType) {
+        toColumn.putBooleans(0, batchSize, ((LongColumnVector)fromColumn).vector[0] == 1);
+      } else if (type instanceof ByteType) {
+        toColumn.putBytes(0, batchSize, (byte)((LongColumnVector)fromColumn).vector[0]);
+      } else if (type instanceof ShortType) {
+        toColumn.putShorts(0, batchSize, (short)((LongColumnVector)fromColumn).vector[0]);
+      } else if (type instanceof IntegerType || type instanceof DateType) {
+        toColumn.putInts(0, batchSize, (int)((LongColumnVector)fromColumn).vector[0]);
+      } else if (type instanceof LongType) {
+        toColumn.putLongs(0, batchSize, ((LongColumnVector)fromColumn).vector[0]);
+      } else if (type instanceof TimestampType) {
+        toColumn.putLongs(0, batchSize,
+          fromTimestampColumnVector((TimestampColumnVector)fromColumn, 0));
+      } else if (type instanceof FloatType) {
+        toColumn.putFloats(0, batchSize, (float)((DoubleColumnVector)fromColumn).vector[0]);
+      } else if (type instanceof DoubleType) {
+        toColumn.putDoubles(0, batchSize, ((DoubleColumnVector)fromColumn).vector[0]);
+      } else if (type instanceof StringType || type instanceof BinaryType) {
+        BytesColumnVector data = (BytesColumnVector)fromColumn;
+        WritableColumnVector arrayData = toColumn.getChildColumn(0);
+        int size = data.vector[0].length;
+        arrayData.reserve(size);
+        arrayData.putBytes(0, size, data.vector[0], 0);
+        for (int index = 0; index < batchSize; index++) {
+          toColumn.putArray(index, 0, size);
+        }
+      } else if (type instanceof DecimalType) {
+        DecimalType decimalType = (DecimalType)type;
+        putDecimalWritables(
+          toColumn,
+          batchSize,
+          decimalType.precision(),
+          decimalType.scale(),
+          ((DecimalColumnVector)fromColumn).vector[0]);
+      } else {
+        throw new UnsupportedOperationException("Unsupported Data Type: " + type);
+      }
+    }
+  }
+
+  private void putNonNullValues(
+      int batchSize,
+      StructField field,
+      ColumnVector fromColumn,
+      WritableColumnVector toColumn) {
+    DataType type = field.dataType();
+    if (type instanceof BooleanType) {
+      long[] data = ((LongColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        toColumn.putBoolean(index, data[index] == 1);
+      }
+    } else if (type instanceof ByteType) {
+      long[] data = ((LongColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        toColumn.putByte(index, (byte)data[index]);
+      }
+    } else if (type instanceof ShortType) {
+      long[] data = ((LongColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        toColumn.putShort(index, (short)data[index]);
+      }
+    } else if (type instanceof IntegerType || type instanceof DateType) {
+      long[] data = ((LongColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        toColumn.putInt(index, (int)data[index]);
+      }
+    } else if (type instanceof LongType) {
+      toColumn.putLongs(0, batchSize, ((LongColumnVector)fromColumn).vector, 0);
+    } else if (type instanceof TimestampType) {
+      TimestampColumnVector data = ((TimestampColumnVector)fromColumn);
+      for (int index = 0; index < batchSize; index++) {
+        toColumn.putLong(index, fromTimestampColumnVector(data, index));
+      }
+    } else if (type instanceof FloatType) {
+      double[] data = ((DoubleColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        toColumn.putFloat(index, (float)data[index]);
+      }
+    } else if (type instanceof DoubleType) {
+      toColumn.putDoubles(0, batchSize, ((DoubleColumnVector)fromColumn).vector, 0);
+    } else if (type instanceof StringType || type instanceof BinaryType) {
+      BytesColumnVector data = ((BytesColumnVector)fromColumn);
+      WritableColumnVector arrayData = toColumn.getChildColumn(0);
+      int totalNumBytes = IntStream.of(data.length).sum();
+      arrayData.reserve(totalNumBytes);
+      for (int index = 0, pos = 0; index < batchSize; pos += data.length[index], index++) {
+        arrayData.putBytes(pos, data.length[index], data.vector[index], data.start[index]);
+        toColumn.putArray(index, pos, data.length[index]);
+      }
+    } else if (type instanceof DecimalType) {
+      DecimalType decimalType = (DecimalType)type;
+      DecimalColumnVector data = ((DecimalColumnVector)fromColumn);
+      if (decimalType.precision() > Decimal.MAX_LONG_DIGITS()) {
+        WritableColumnVector arrayData = toColumn.getChildColumn(0);
+        arrayData.reserve(batchSize * 16);
+      }
+      for (int index = 0; index < batchSize; index++) {
+        putDecimalWritable(
+          toColumn,
+          index,
+          decimalType.precision(),
+          decimalType.scale(),
+          data.vector[index]);
+      }
+    } else {
+      throw new UnsupportedOperationException("Unsupported Data Type: " + type);
+    }
+  }
+
+  private void putValues(
+      int batchSize,
+      StructField field,
+      ColumnVector fromColumn,
+      WritableColumnVector toColumn) {
+    DataType type = field.dataType();
+    if (type instanceof BooleanType) {
+      long[] vector = ((LongColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        if (fromColumn.isNull[index]) {
+          toColumn.putNull(index);
+        } else {
+          toColumn.putBoolean(index, vector[index] == 1);
+        }
+      }
+    } else if (type instanceof ByteType) {
+      long[] vector = ((LongColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        if (fromColumn.isNull[index]) {
+          toColumn.putNull(index);
+        } else {
+          toColumn.putByte(index, (byte)vector[index]);
+        }
+      }
+    } else if (type instanceof ShortType) {
+      long[] vector = ((LongColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        if (fromColumn.isNull[index]) {
+          toColumn.putNull(index);
+        } else {
+          toColumn.putShort(index, (short)vector[index]);
+        }
+      }
+    } else if (type instanceof IntegerType || type instanceof DateType) {
+      long[] vector = ((LongColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        if (fromColumn.isNull[index]) {
+          toColumn.putNull(index);
+        } else {
+          toColumn.putInt(index, (int)vector[index]);
+        }
+      }
+    } else if (type instanceof LongType) {
+      long[] vector = ((LongColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        if (fromColumn.isNull[index]) {
+          toColumn.putNull(index);
+        } else {
+          toColumn.putLong(index, vector[index]);
+        }
+      }
+    } else if (type instanceof TimestampType) {
+      TimestampColumnVector vector = ((TimestampColumnVector)fromColumn);
+      for (int index = 0; index < batchSize; index++) {
+        if (fromColumn.isNull[index]) {
+          toColumn.putNull(index);
+        } else {
+          toColumn.putLong(index, fromTimestampColumnVector(vector, index));
+        }
+      }
+    } else if (type instanceof FloatType) {
+      double[] vector = ((DoubleColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        if (fromColumn.isNull[index]) {
+          toColumn.putNull(index);
+        } else {
+          toColumn.putFloat(index, (float)vector[index]);
+        }
+      }
+    } else if (type instanceof DoubleType) {
+      double[] vector = ((DoubleColumnVector)fromColumn).vector;
+      for (int index = 0; index < batchSize; index++) {
+        if (fromColumn.isNull[index]) {
+          toColumn.putNull(index);
+        } else {
+          toColumn.putDouble(index, vector[index]);
+        }
+      }
+    } else if (type instanceof StringType || type instanceof BinaryType) {
+      BytesColumnVector vector = (BytesColumnVector)fromColumn;
+      WritableColumnVector arrayData = toColumn.getChildColumn(0);
+      int totalNumBytes = IntStream.of(vector.length).sum();
+      arrayData.reserve(totalNumBytes);
+      for (int index = 0, pos = 0; index < batchSize; pos += vector.length[index], index++) {
+        if (fromColumn.isNull[index]) {
+          toColumn.putNull(index);
+        } else {
+          arrayData.putBytes(pos, vector.length[index], vector.vector[index], vector.start[index]);
+          toColumn.putArray(index, pos, vector.length[index]);
+        }
+      }
+    } else if (type instanceof DecimalType) {
+      DecimalType decimalType = (DecimalType)type;
+      HiveDecimalWritable[] vector = ((DecimalColumnVector)fromColumn).vector;
+      if (decimalType.precision() > Decimal.MAX_LONG_DIGITS()) {
+        WritableColumnVector arrayData = toColumn.getChildColumn(0);
+        arrayData.reserve(batchSize * 16);
+      }
+      for (int index = 0; index < batchSize; index++) {
+        if (fromColumn.isNull[index]) {
+          toColumn.putNull(index);
+        } else {
+          putDecimalWritable(
+            toColumn,
+            index,
+            decimalType.precision(),
+            decimalType.scale(),
+            vector[index]);
+        }
+      }
+    } else {
+      throw new UnsupportedOperationException("Unsupported Data Type: " + type);
+    }
+  }
+
+  /**
+   * Returns the number of micros since epoch from an element of TimestampColumnVector.
+   */
+  private static long fromTimestampColumnVector(TimestampColumnVector vector, int index) {
+    return vector.time[index] * 1000L + vector.nanos[index] / 1000L;
+  }
+
+  /**
+   * Put a `HiveDecimalWritable` to a `WritableColumnVector`.
+   */
+  private static void putDecimalWritable(
+      WritableColumnVector toColumn,
+      int index,
+      int precision,
+      int scale,
+      HiveDecimalWritable decimalWritable) {
+    HiveDecimal decimal = decimalWritable.getHiveDecimal();
+    Decimal value =
+      Decimal.apply(decimal.bigDecimalValue(), decimal.precision(), decimal.scale());
+    value.changePrecision(precision, scale);
+
+    if (precision <= Decimal.MAX_INT_DIGITS()) {
+      toColumn.putInt(index, (int) value.toUnscaledLong());
+    } else if (precision <= Decimal.MAX_LONG_DIGITS()) {
+      toColumn.putLong(index, value.toUnscaledLong());
+    } else {
+      byte[] bytes = value.toJavaBigDecimal().unscaledValue().toByteArray();
+      WritableColumnVector arrayData = toColumn.getChildColumn(0);
+      arrayData.putBytes(index * 16, bytes.length, bytes, 0);
+      toColumn.putArray(index, index * 16, bytes.length);
+    }
+  }
+
+  /**
+   * Put `HiveDecimalWritable`s to a `WritableColumnVector`.
+   */
+  private static void putDecimalWritables(
+      WritableColumnVector toColumn,
+      int size,
+      int precision,
+      int scale,
+      HiveDecimalWritable decimalWritable) {
+    HiveDecimal decimal = decimalWritable.getHiveDecimal();
+    Decimal value =
+      Decimal.apply(decimal.bigDecimalValue(), decimal.precision(), decimal.scale());
+    value.changePrecision(precision, scale);
+
+    if (precision <= Decimal.MAX_INT_DIGITS()) {
+      toColumn.putInts(0, size, (int) value.toUnscaledLong());
+    } else if (precision <= Decimal.MAX_LONG_DIGITS()) {
+      toColumn.putLongs(0, size, value.toUnscaledLong());
+    } else {
+      byte[] bytes = value.toJavaBigDecimal().unscaledValue().toByteArray();
+      WritableColumnVector arrayData = toColumn.getChildColumn(0);
+      arrayData.reserve(bytes.length);
+      arrayData.putBytes(0, bytes.length, bytes, 0);
+      for (int index = 0; index < size; index++) {
+        toColumn.putArray(index, 0, bytes.length);
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
index 0f9903360e0d8..e511d3da7e160 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
@@ -22,6 +22,7 @@
 import static org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.createRLEIterator;
 
 import java.io.IOException;
+import java.util.TimeZone;
 import org.apache.parquet.bytes.BytesUtils;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.Dictionary;
@@ -36,7 +37,6 @@
 import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.spark.sql.catalyst.util.DateTimeUtils;
-import org.apache.spark.sql.execution.vectorized.ColumnVector;
 import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.types.DecimalType;
@@ -95,13 +95,18 @@ public class VectorizedColumnReader {
   private final PageReader pageReader;
   private final ColumnDescriptor descriptor;
   private final OriginalType originalType;
+  // The timezone conversion to apply to int96 timestamps. Null if no conversion.
+  private final TimeZone convertTz;
+  private static final TimeZone UTC = DateTimeUtils.TimeZoneUTC();
 
   public VectorizedColumnReader(
       ColumnDescriptor descriptor,
       OriginalType originalType,
-      PageReader pageReader) throws IOException {
+      PageReader pageReader,
+      TimeZone convertTz) throws IOException {
     this.descriptor = descriptor;
     this.pageReader = pageReader;
+    this.convertTz = convertTz;
     this.originalType = originalType;
     this.maxDefLevel = descriptor.getMaxDefinitionLevel();
 
@@ -224,6 +229,10 @@ void readBatch(int total, WritableColumnVector column) throws IOException {
     }
   }
 
+  private boolean shouldConvertTimestamps() {
+    return convertTz != null && !convertTz.equals(UTC);
+  }
+
   /**
    * Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
    */
@@ -231,7 +240,7 @@ private void decodeDictionaryIds(
       int rowId,
       int num,
       WritableColumnVector column,
-      ColumnVector dictionaryIds) {
+      WritableColumnVector dictionaryIds) {
     switch (descriptor.getType()) {
       case INT32:
         if (column.dataType() == DataTypes.IntegerType ||
@@ -296,11 +305,21 @@ private void decodeDictionaryIds(
         break;
       case INT96:
         if (column.dataType() == DataTypes.TimestampType) {
-          for (int i = rowId; i < rowId + num; ++i) {
-            // TODO: Convert dictionary of Binaries to dictionary of Longs
-            if (!column.isNullAt(i)) {
-              Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i));
-              column.putLong(i, ParquetRowConverter.binaryToSQLTimestamp(v));
+          if (!shouldConvertTimestamps()) {
+            for (int i = rowId; i < rowId + num; ++i) {
+              if (!column.isNullAt(i)) {
+                Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i));
+                column.putLong(i, ParquetRowConverter.binaryToSQLTimestamp(v));
+              }
+            }
+          } else {
+            for (int i = rowId; i < rowId + num; ++i) {
+              if (!column.isNullAt(i)) {
+                Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i));
+                long rawTime = ParquetRowConverter.binaryToSQLTimestamp(v);
+                long adjTime = DateTimeUtils.convertTz(rawTime, convertTz, UTC);
+                column.putLong(i, adjTime);
+              }
             }
           }
         } else {
@@ -427,16 +446,29 @@ private void readBinaryBatch(int rowId, int num, WritableColumnVector column) {
     // This is where we implement support for the valid type conversions.
     // TODO: implement remaining type conversions
     VectorizedValuesReader data = (VectorizedValuesReader) dataColumn;
-    if (column.isArray()) {
+    if (column.dataType() == DataTypes.StringType || column.dataType() == DataTypes.BinaryType) {
       defColumn.readBinarys(num, column, rowId, maxDefLevel, data);
     } else if (column.dataType() == DataTypes.TimestampType) {
-      for (int i = 0; i < num; i++) {
-        if (defColumn.readInteger() == maxDefLevel) {
-          column.putLong(rowId + i,
-              // Read 12 bytes for INT96
-              ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12)));
-        } else {
-          column.putNull(rowId + i);
+      if (!shouldConvertTimestamps()) {
+        for (int i = 0; i < num; i++) {
+          if (defColumn.readInteger() == maxDefLevel) {
+            // Read 12 bytes for INT96
+            long rawTime = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12));
+            column.putLong(rowId + i, rawTime);
+          } else {
+            column.putNull(rowId + i);
+          }
+        }
+      } else {
+        for (int i = 0; i < num; i++) {
+          if (defColumn.readInteger() == maxDefLevel) {
+            // Read 12 bytes for INT96
+            long rawTime = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12));
+            long adjTime = DateTimeUtils.convertTz(rawTime, convertTz, UTC);
+            column.putLong(rowId + i, adjTime);
+          } else {
+            column.putNull(rowId + i);
+          }
         }
       }
     } else {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
index 669d71e60779d..cd745b1f0e4e3 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
+import java.util.TimeZone;
 
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
@@ -30,10 +31,10 @@
 import org.apache.spark.memory.MemoryMode;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils;
-import org.apache.spark.sql.execution.vectorized.ColumnarBatch;
 import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
 import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector;
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
 import org.apache.spark.sql.types.StructField;
 import org.apache.spark.sql.types.StructType;
 
@@ -77,6 +78,12 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa
    */
   private boolean[] missingColumns;
 
+  /**
+   * The timezone that timestamp INT96 values should be converted to. Null if no conversion. Here to
+   * workaround incompatibilities between different engines when writing timestamp values.
+   */
+  private TimeZone convertTz = null;
+
   /**
    * columnBatch object that is used for batch decoding. This is created on first use and triggers
    * batched decoding. It is not valid to interleave calls to the batched interface with the row
@@ -105,10 +112,15 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa
    */
   private final MemoryMode MEMORY_MODE;
 
-  public VectorizedParquetRecordReader(boolean useOffHeap) {
+  public VectorizedParquetRecordReader(TimeZone convertTz, boolean useOffHeap) {
+    this.convertTz = convertTz;
     MEMORY_MODE = useOffHeap ? MemoryMode.OFF_HEAP : MemoryMode.ON_HEAP;
   }
 
+  public VectorizedParquetRecordReader(boolean useOffHeap) {
+    this(null, useOffHeap);
+  }
+
   /**
    * Implementation of RecordReader API.
    */
@@ -236,7 +248,10 @@ public void enableReturningBatches() {
    * Advances to the next batch of rows. Returns false if there are no more.
    */
   public boolean nextBatch() throws IOException {
-    columnarBatch.reset();
+    for (WritableColumnVector vector : columnVectors) {
+      vector.reset();
+    }
+    columnarBatch.setNumRows(0);
     if (rowsReturned >= totalRowCount) return false;
     checkEndOfRowGroup();
 
@@ -291,8 +306,8 @@ private void checkEndOfRowGroup() throws IOException {
     columnReaders = new VectorizedColumnReader[columns.size()];
     for (int i = 0; i < columns.size(); ++i) {
       if (missingColumns[i]) continue;
-      columnReaders[i] = new VectorizedColumnReader(
-        columns.get(i), types.get(i).getOriginalType(), pages.getPageReader(columns.get(i)));
+      columnReaders[i] = new VectorizedColumnReader(columns.get(i), types.get(i).getOriginalType(),
+        pages.getPageReader(columns.get(i)), convertTz);
     }
     totalCountLoadedSoFar += pages.getRowCount();
   }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/AggregateHashMap.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/AggregateHashMap.java
index 9467435435d1f..24260b05194a7 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/AggregateHashMap.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/AggregateHashMap.java
@@ -41,7 +41,7 @@
 public class AggregateHashMap {
 
   private OnHeapColumnVector[] columnVectors;
-  private ColumnarBatch batch;
+  private MutableColumnarRow aggBufferRow;
   private int[] buckets;
   private int numBuckets;
   private int numRows = 0;
@@ -63,7 +63,7 @@ public AggregateHashMap(StructType schema, int capacity, double loadFactor, int
     this.maxSteps = maxSteps;
     numBuckets = (int) (capacity / loadFactor);
     columnVectors = OnHeapColumnVector.allocateColumns(capacity, schema);
-    batch = new ColumnarBatch(schema, columnVectors, capacity);
+    aggBufferRow = new MutableColumnarRow(columnVectors);
     buckets = new int[numBuckets];
     Arrays.fill(buckets, -1);
   }
@@ -72,14 +72,15 @@ public AggregateHashMap(StructType schema) {
     this(schema, DEFAULT_CAPACITY, DEFAULT_LOAD_FACTOR, DEFAULT_MAX_STEPS);
   }
 
-  public ColumnarRow findOrInsert(long key) {
+  public MutableColumnarRow findOrInsert(long key) {
     int idx = find(key);
     if (idx != -1 && buckets[idx] == -1) {
       columnVectors[0].putLong(numRows, key);
       columnVectors[1].putLong(numRows, 0);
       buckets[idx] = numRows++;
     }
-    return batch.getRow(buckets[idx]);
+    aggBufferRow.rowId = buckets[idx];
+    return aggBufferRow;
   }
 
   @VisibleForTesting
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
index b4b5f0a265934..b5cbe8e2839ba 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
@@ -28,6 +28,8 @@
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.util.DateTimeUtils;
 import org.apache.spark.sql.types.*;
+import org.apache.spark.sql.vectorized.ColumnarArray;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 
@@ -98,21 +100,13 @@ public static void populate(WritableColumnVector col, InternalRow row, int field
    * For example, an array of IntegerType will return an int[].
    * Throws exceptions for unhandled schemas.
    */
-  public static Object toPrimitiveJavaArray(ColumnarArray array) {
-    DataType dt = array.data.dataType();
-    if (dt instanceof IntegerType) {
-      int[] result = new int[array.length];
-      ColumnVector data = array.data;
-      for (int i = 0; i < result.length; i++) {
-        if (data.isNullAt(array.offset + i)) {
-          throw new RuntimeException("Cannot handle NULL values.");
-        }
-        result[i] = data.getInt(array.offset + i);
+  public static int[] toJavaIntArray(ColumnarArray array) {
+    for (int i = 0; i < array.numElements(); i++) {
+      if (array.isNullAt(i)) {
+        throw new RuntimeException("Cannot handle NULL values.");
       }
-      return result;
-    } else {
-      throw new UnsupportedOperationException();
     }
+    return array.toIntArray();
   }
 
   private static void appendValue(WritableColumnVector dst, DataType t, Object o) {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarRow.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
similarity index 79%
rename from sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarRow.java
rename to sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
index 98a907322713b..70057a9def6c0 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarRow.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
@@ -14,6 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.spark.sql.execution.vectorized;
 
 import java.math.BigDecimal;
@@ -22,37 +23,38 @@
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
 import org.apache.spark.sql.catalyst.util.MapData;
 import org.apache.spark.sql.types.*;
+import org.apache.spark.sql.vectorized.ColumnarArray;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+import org.apache.spark.sql.vectorized.ColumnarRow;
+import org.apache.spark.sql.vectorized.ColumnVector;
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 
 /**
- * Row abstraction in {@link ColumnVector}. The instance of this class is intended
- * to be reused, callers should copy the data out if it needs to be stored.
+ * A mutable version of {@link ColumnarRow}, which is used in the vectorized hash map for hash
+ * aggregate, and {@link ColumnarBatch} to save object creation.
+ *
+ * Note that this class intentionally has a lot of duplicated code with {@link ColumnarRow}, to
+ * avoid java polymorphism overhead by keeping {@link ColumnarRow} and this class final classes.
  */
-public final class ColumnarRow extends InternalRow {
-  protected int rowId;
+public final class MutableColumnarRow extends InternalRow {
+  public int rowId;
   private final ColumnVector[] columns;
   private final WritableColumnVector[] writableColumns;
 
-  // Ctor used if this is a struct.
-  ColumnarRow(ColumnVector[] columns) {
+  public MutableColumnarRow(ColumnVector[] columns) {
     this.columns = columns;
-    this.writableColumns = new WritableColumnVector[this.columns.length];
-    for (int i = 0; i < this.columns.length; i++) {
-      if (this.columns[i] instanceof WritableColumnVector) {
-        this.writableColumns[i] = (WritableColumnVector) this.columns[i];
-      }
-    }
+    this.writableColumns = null;
   }
 
-  public ColumnVector[] columns() { return columns; }
+  public MutableColumnarRow(WritableColumnVector[] writableColumns) {
+    this.columns = writableColumns;
+    this.writableColumns = writableColumns;
+  }
 
   @Override
   public int numFields() { return columns.length; }
 
-  /**
-   * Revisit this. This is expensive. This is currently only used in test paths.
-   */
   @Override
   public InternalRow copy() {
     GenericInternalRow row = new GenericInternalRow(columns.length);
@@ -224,8 +226,8 @@ public void update(int ordinal, Object value) {
         setDouble(ordinal, (double) value);
       } else if (dt instanceof DecimalType) {
         DecimalType t = (DecimalType) dt;
-        setDecimal(ordinal, Decimal.apply((BigDecimal) value, t.precision(), t.scale()),
-                t.precision());
+        Decimal d = Decimal.apply((BigDecimal) value, t.precision(), t.scale());
+        setDecimal(ordinal, d, t.precision());
       } else {
         throw new UnsupportedOperationException("Datatype not supported " + dt);
       }
@@ -234,68 +236,54 @@ public void update(int ordinal, Object value) {
 
   @Override
   public void setNullAt(int ordinal) {
-    getWritableColumn(ordinal).putNull(rowId);
+    writableColumns[ordinal].putNull(rowId);
   }
 
   @Override
   public void setBoolean(int ordinal, boolean value) {
-    WritableColumnVector column = getWritableColumn(ordinal);
-    column.putNotNull(rowId);
-    column.putBoolean(rowId, value);
+    writableColumns[ordinal].putNotNull(rowId);
+    writableColumns[ordinal].putBoolean(rowId, value);
   }
 
   @Override
   public void setByte(int ordinal, byte value) {
-    WritableColumnVector column = getWritableColumn(ordinal);
-    column.putNotNull(rowId);
-    column.putByte(rowId, value);
+    writableColumns[ordinal].putNotNull(rowId);
+    writableColumns[ordinal].putByte(rowId, value);
   }
 
   @Override
   public void setShort(int ordinal, short value) {
-    WritableColumnVector column = getWritableColumn(ordinal);
-    column.putNotNull(rowId);
-    column.putShort(rowId, value);
+    writableColumns[ordinal].putNotNull(rowId);
+    writableColumns[ordinal].putShort(rowId, value);
   }
 
   @Override
   public void setInt(int ordinal, int value) {
-    WritableColumnVector column = getWritableColumn(ordinal);
-    column.putNotNull(rowId);
-    column.putInt(rowId, value);
+    writableColumns[ordinal].putNotNull(rowId);
+    writableColumns[ordinal].putInt(rowId, value);
   }
 
   @Override
   public void setLong(int ordinal, long value) {
-    WritableColumnVector column = getWritableColumn(ordinal);
-    column.putNotNull(rowId);
-    column.putLong(rowId, value);
+    writableColumns[ordinal].putNotNull(rowId);
+    writableColumns[ordinal].putLong(rowId, value);
   }
 
   @Override
   public void setFloat(int ordinal, float value) {
-    WritableColumnVector column = getWritableColumn(ordinal);
-    column.putNotNull(rowId);
-    column.putFloat(rowId, value);
+    writableColumns[ordinal].putNotNull(rowId);
+    writableColumns[ordinal].putFloat(rowId, value);
   }
 
   @Override
   public void setDouble(int ordinal, double value) {
-    WritableColumnVector column = getWritableColumn(ordinal);
-    column.putNotNull(rowId);
-    column.putDouble(rowId, value);
+    writableColumns[ordinal].putNotNull(rowId);
+    writableColumns[ordinal].putDouble(rowId, value);
   }
 
   @Override
   public void setDecimal(int ordinal, Decimal value, int precision) {
-    WritableColumnVector column = getWritableColumn(ordinal);
-    column.putNotNull(rowId);
-    column.putDecimal(rowId, value, precision);
-  }
-
-  private WritableColumnVector getWritableColumn(int ordinal) {
-    WritableColumnVector column = writableColumns[ordinal];
-    assert (!column.isConstant);
-    return column;
+    writableColumns[ordinal].putNotNull(rowId);
+    writableColumns[ordinal].putDecimal(rowId, value, precision);
   }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
index 1cbaf08569334..1c45b846790b6 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
@@ -110,7 +110,6 @@ public void putNotNull(int rowId) {
   public void putNull(int rowId) {
     Platform.putByte(null, nulls + rowId, (byte) 1);
     ++numNulls;
-    anyNullsSet = true;
   }
 
   @Override
@@ -119,13 +118,12 @@ public void putNulls(int rowId, int count) {
     for (int i = 0; i < count; ++i, ++offset) {
       Platform.putByte(null, offset, (byte) 1);
     }
-    anyNullsSet = true;
     numNulls += count;
   }
 
   @Override
   public void putNotNulls(int rowId, int count) {
-    if (!anyNullsSet) return;
+    if (numNulls == 0) return;
     long offset = nulls + rowId;
     for (int i = 0; i < count; ++i, ++offset) {
       Platform.putByte(null, offset, (byte) 0);
@@ -532,7 +530,7 @@ public int putByteArray(int rowId, byte[] value, int offset, int length) {
   @Override
   protected void reserveInternal(int newCapacity) {
     int oldCapacity = (nulls == 0L) ? 0 : capacity;
-    if (this.resultArray != null) {
+    if (isArray()) {
       this.lengthData =
           Platform.reallocateMemory(lengthData, oldCapacity * 4, newCapacity * 4);
       this.offsetData =
@@ -547,7 +545,7 @@ protected void reserveInternal(int newCapacity) {
     } else if (type instanceof LongType || type instanceof DoubleType ||
         DecimalType.is64BitDecimalType(type) || type instanceof TimestampType) {
       this.data = Platform.reallocateMemory(data, oldCapacity * 8, newCapacity * 8);
-    } else if (resultStruct != null) {
+    } else if (childColumns != null) {
       // Nothing to store.
     } else {
       throw new RuntimeException("Unhandled " + type);
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
index 85d72295ab9b8..1d538fe4181b7 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
@@ -107,7 +107,6 @@ public void putNotNull(int rowId) {
   public void putNull(int rowId) {
     nulls[rowId] = (byte)1;
     ++numNulls;
-    anyNullsSet = true;
   }
 
   @Override
@@ -115,13 +114,12 @@ public void putNulls(int rowId, int count) {
     for (int i = 0; i < count; ++i) {
       nulls[rowId + i] = (byte)1;
     }
-    anyNullsSet = true;
     numNulls += count;
   }
 
   @Override
   public void putNotNulls(int rowId, int count) {
-    if (!anyNullsSet) return;
+    if (numNulls == 0) return;
     for (int i = 0; i < count; ++i) {
       nulls[rowId + i] = (byte)0;
     }
@@ -505,7 +503,7 @@ public int putByteArray(int rowId, byte[] value, int offset, int length) {
   // Spilt this function out since it is the slow path.
   @Override
   protected void reserveInternal(int newCapacity) {
-    if (this.resultArray != null || DecimalType.isByteArrayDecimalType(type)) {
+    if (isArray()) {
       int[] newLengths = new int[newCapacity];
       int[] newOffsets = new int[newCapacity];
       if (this.arrayLengths != null) {
@@ -558,7 +556,7 @@ protected void reserveInternal(int newCapacity) {
         if (doubleData != null) System.arraycopy(doubleData, 0, newData, 0, capacity);
         doubleData = newData;
       }
-    } else if (resultStruct != null) {
+    } else if (childColumns != null) {
       // Nothing to store.
     } else {
       throw new RuntimeException("Unhandled " + type);
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
index e7653f0c00b9a..d2ae32b06f83b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
@@ -23,6 +23,7 @@
 
 import org.apache.spark.sql.internal.SQLConf;
 import org.apache.spark.sql.types.*;
+import org.apache.spark.sql.vectorized.ColumnVector;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
 import org.apache.spark.unsafe.types.UTF8String;
 
@@ -36,8 +37,10 @@
  * elements. This means that the put() APIs do not check as in common cases (i.e. flat schemas),
  * the lengths are known up front.
  *
- * A ColumnVector should be considered immutable once originally created. In other words, it is not
- * valid to call put APIs after reads until reset() is called.
+ * A WritableColumnVector should be considered immutable once originally created. In other words,
+ * it is not valid to call put APIs after reads until reset() is called.
+ *
+ * WritableColumnVector are intended to be reused.
  */
 public abstract class WritableColumnVector extends ColumnVector {
 
@@ -52,12 +55,11 @@ public void reset() {
         ((WritableColumnVector) c).reset();
       }
     }
-    numNulls = 0;
     elementsAppended = 0;
-    if (anyNullsSet) {
+    if (numNulls > 0) {
       putNotNulls(0, capacity);
-      anyNullsSet = false;
     }
+    numNulls = 0;
   }
 
   @Override
@@ -74,8 +76,6 @@ public void close() {
       dictionaryIds = null;
     }
     dictionary = null;
-    resultStruct = null;
-    resultArray = null;
   }
 
   public void reserve(int requiredCapacity) {
@@ -104,8 +104,57 @@ private void throwUnsupportedException(int requiredCapacity, Throwable cause) {
   @Override
   public int numNulls() { return numNulls; }
 
-  @Override
-  public boolean anyNullsSet() { return anyNullsSet; }
+  /**
+   * Returns the dictionary Id for rowId.
+   *
+   * This should only be called when this `WritableColumnVector` represents dictionaryIds.
+   * We have this separate method for dictionaryIds as per SPARK-16928.
+   */
+  public abstract int getDictId(int rowId);
+
+  /**
+   * The Dictionary for this column.
+   *
+   * If it's not null, will be used to decode the value in getXXX().
+   */
+  protected Dictionary dictionary;
+
+  /**
+   * Reusable column for ids of dictionary.
+   */
+  protected WritableColumnVector dictionaryIds;
+
+  /**
+   * Returns true if this column has a dictionary.
+   */
+  public boolean hasDictionary() { return this.dictionary != null; }
+
+  /**
+   * Returns the underlying integer column for ids of dictionary.
+   */
+  public WritableColumnVector getDictionaryIds() {
+    return dictionaryIds;
+  }
+
+  /**
+   * Update the dictionary.
+   */
+  public void setDictionary(Dictionary dictionary) {
+    this.dictionary = dictionary;
+  }
+
+  /**
+   * Reserve a integer column for ids of dictionary.
+   */
+  public WritableColumnVector reserveDictionaryIds(int capacity) {
+    if (dictionaryIds == null) {
+      dictionaryIds = reserveNewColumn(capacity, DataTypes.IntegerType);
+    } else {
+      dictionaryIds.reset();
+      dictionaryIds.reserve(capacity);
+    }
+    return dictionaryIds;
+  }
 
   /**
    * Ensures that there is enough storage to store capacity elements. That is, the put() APIs
@@ -537,11 +586,11 @@ public final int appendArray(int length) {
   public final int appendStruct(boolean isNull) {
     if (isNull) {
       appendNull();
-      for (ColumnVector c: childColumns) {
+      for (WritableColumnVector c: childColumns) {
         if (c.type instanceof StructType) {
-          ((WritableColumnVector) c).appendStruct(true);
+          c.appendStruct(true);
         } else {
-          ((WritableColumnVector) c).appendNull();
+          c.appendNull();
         }
       }
     } else {
@@ -588,12 +637,6 @@ public final int appendStruct(boolean isNull) {
    */
   protected int numNulls;
 
-  /**
-   * True if there is at least one NULL byte set. This is an optimization for the writer, to skip
-   * having to clear NULL bits.
-   */
-  protected boolean anyNullsSet;
-
   /**
    * True if this column's values are fixed. This means the column values never change, even
    * across resets.
@@ -615,41 +658,16 @@ public final int appendStruct(boolean isNull) {
    */
   protected WritableColumnVector[] childColumns;
 
-  /**
-   * Update the dictionary.
-   */
-  public void setDictionary(Dictionary dictionary) {
-    this.dictionary = dictionary;
-  }
-
-  /**
-   * Reserve a integer column for ids of dictionary.
-   */
-  public WritableColumnVector reserveDictionaryIds(int capacity) {
-    WritableColumnVector dictionaryIds = (WritableColumnVector) this.dictionaryIds;
-    if (dictionaryIds == null) {
-      dictionaryIds = reserveNewColumn(capacity, DataTypes.IntegerType);
-      this.dictionaryIds = dictionaryIds;
-    } else {
-      dictionaryIds.reset();
-      dictionaryIds.reserve(capacity);
-    }
-    return dictionaryIds;
-  }
-
-  /**
-   * Returns the underlying integer column for ids of dictionary.
-   */
-  @Override
-  public WritableColumnVector getDictionaryIds() {
-    return (WritableColumnVector) dictionaryIds;
-  }
-
   /**
    * Reserve a new column.
    */
   protected abstract WritableColumnVector reserveNewColumn(int capacity, DataType type);
 
+  protected boolean isArray() {
+    return type instanceof ArrayType || type instanceof BinaryType || type instanceof StringType ||
+      DecimalType.isByteArrayDecimalType(type);
+  }
+
   /**
    * Sets up the common state and also handles creating the child columns if this is a nested
    * type.
@@ -658,8 +676,7 @@ protected WritableColumnVector(int capacity, DataType type) {
     super(type);
     this.capacity = capacity;
 
-    if (type instanceof ArrayType || type instanceof BinaryType || type instanceof StringType
-        || DecimalType.isByteArrayDecimalType(type)) {
+    if (isArray()) {
       DataType childType;
       int childCapacity = capacity;
       if (type instanceof ArrayType) {
@@ -670,27 +687,19 @@ protected WritableColumnVector(int capacity, DataType type) {
       }
       this.childColumns = new WritableColumnVector[1];
       this.childColumns[0] = reserveNewColumn(childCapacity, childType);
-      this.resultArray = new ColumnarArray(this.childColumns[0]);
-      this.resultStruct = null;
     } else if (type instanceof StructType) {
       StructType st = (StructType)type;
       this.childColumns = new WritableColumnVector[st.fields().length];
       for (int i = 0; i < childColumns.length; ++i) {
         this.childColumns[i] = reserveNewColumn(capacity, st.fields()[i].dataType());
       }
-      this.resultArray = null;
-      this.resultStruct = new ColumnarRow(this.childColumns);
     } else if (type instanceof CalendarIntervalType) {
       // Two columns. Months as int. Microseconds as Long.
       this.childColumns = new WritableColumnVector[2];
       this.childColumns[0] = reserveNewColumn(capacity, DataTypes.IntegerType);
       this.childColumns[1] = reserveNewColumn(capacity, DataTypes.LongType);
-      this.resultArray = null;
-      this.resultStruct = new ColumnarRow(this.childColumns);
     } else {
       this.childColumns = null;
-      this.resultArray = null;
-      this.resultStruct = null;
     }
   }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2Options.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2Options.java
index 9a89c8193dd6e..ddc2acca693ac 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2Options.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/DataSourceV2Options.java
@@ -36,6 +36,10 @@ private String toLowerCase(String key) {
     return key.toLowerCase(Locale.ROOT);
   }
 
+  public static DataSourceV2Options empty() {
+    return new DataSourceV2Options(new HashMap<>());
+  }
+
   public DataSourceV2Options(Map<String, String> originalMap) {
     keyLowerCasedMap = new HashMap<>(originalMap.size());
     for (Map.Entry<String, String> entry : originalMap.entrySet()) {
@@ -43,10 +47,54 @@ public DataSourceV2Options(Map<String, String> originalMap) {
     }
   }
 
+  public Map<String, String> asMap() {
+    return new HashMap<>(keyLowerCasedMap);
+  }
+
   /**
    * Returns the option value to which the specified key is mapped, case-insensitively.
    */
   public Optional<String> get(String key) {
     return Optional.ofNullable(keyLowerCasedMap.get(toLowerCase(key)));
   }
+
+  /**
+   * Returns the boolean value to which the specified key is mapped,
+   * or defaultValue if there is no mapping for the key. The key match is case-insensitive
+   */
+  public boolean getBoolean(String key, boolean defaultValue) {
+    String lcaseKey = toLowerCase(key);
+    return keyLowerCasedMap.containsKey(lcaseKey) ?
+      Boolean.parseBoolean(keyLowerCasedMap.get(lcaseKey)) : defaultValue;
+  }
+
+  /**
+   * Returns the integer value to which the specified key is mapped,
+   * or defaultValue if there is no mapping for the key. The key match is case-insensitive
+   */
+  public int getInt(String key, int defaultValue) {
+    String lcaseKey = toLowerCase(key);
+    return keyLowerCasedMap.containsKey(lcaseKey) ?
+      Integer.parseInt(keyLowerCasedMap.get(lcaseKey)) : defaultValue;
+  }
+
+  /**
+   * Returns the long value to which the specified key is mapped,
+   * or defaultValue if there is no mapping for the key. The key match is case-insensitive
+   */
+  public long getLong(String key, long defaultValue) {
+    String lcaseKey = toLowerCase(key);
+    return keyLowerCasedMap.containsKey(lcaseKey) ?
+      Long.parseLong(keyLowerCasedMap.get(lcaseKey)) : defaultValue;
+  }
+
+  /**
+   * Returns the double value to which the specified key is mapped,
+   * or defaultValue if there is no mapping for the key. The key match is case-insensitive
+   */
+  public double getDouble(String key, double defaultValue) {
+    String lcaseKey = toLowerCase(key);
+    return keyLowerCasedMap.containsKey(lcaseKey) ?
+      Double.parseDouble(keyLowerCasedMap.get(lcaseKey)) : defaultValue;
+  }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java
new file mode 100644
index 0000000000000..3cb020d2e0836
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2;
+
+import org.apache.spark.annotation.InterfaceStability;
+
+/**
+ * A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
+ * propagate session configs with the specified key-prefix to all data source operations in this
+ * session.
+ */
+@InterfaceStability.Evolving
+public interface SessionConfigSupport {
+
+    /**
+     * Key prefix of the session configs to propagate. Spark will extract all session configs that
+     * starts with `spark.datasource.$keyPrefix`, turn `spark.datasource.$keyPrefix.xxx -&gt; yyy`
+     * into `xxx -&gt; yyy`, and propagate them to all data source operations in this session.
+     */
+    String keyPrefix();
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/ContinuousReadSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/ContinuousReadSupport.java
new file mode 100644
index 0000000000000..3136cee1f655f
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/ContinuousReadSupport.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.streaming;
+
+import java.util.Optional;
+
+import org.apache.spark.sql.sources.v2.DataSourceV2;
+import org.apache.spark.sql.sources.v2.DataSourceV2Options;
+import org.apache.spark.sql.sources.v2.streaming.reader.ContinuousReader;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
+ * provide data reading ability for continuous stream processing.
+ */
+public interface ContinuousReadSupport extends DataSourceV2 {
+  /**
+   * Creates a {@link ContinuousReader} to scan the data from this data source.
+   *
+   * @param schema the user provided schema, or empty() if none was provided
+   * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure
+   *                           recovery. Readers for the same logical source in the same query
+   *                           will be given the same checkpointLocation.
+   * @param options the options for the returned data source reader, which is an immutable
+   *                case-insensitive string-to-string map.
+   */
+  ContinuousReader createContinuousReader(
+    Optional<StructType> schema,
+    String checkpointLocation,
+    DataSourceV2Options options);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/ContinuousWriteSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/ContinuousWriteSupport.java
new file mode 100644
index 0000000000000..dee493cadb71e
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/ContinuousWriteSupport.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.streaming;
+
+import java.util.Optional;
+
+import org.apache.spark.annotation.InterfaceStability;
+import org.apache.spark.sql.execution.streaming.BaseStreamingSink;
+import org.apache.spark.sql.sources.v2.DataSourceV2;
+import org.apache.spark.sql.sources.v2.DataSourceV2Options;
+import org.apache.spark.sql.sources.v2.streaming.writer.ContinuousWriter;
+import org.apache.spark.sql.sources.v2.writer.DataSourceV2Writer;
+import org.apache.spark.sql.streaming.OutputMode;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
+ * provide data writing ability for continuous stream processing.
+ */
+@InterfaceStability.Evolving
+public interface ContinuousWriteSupport extends BaseStreamingSink {
+
+    /**
+     * Creates an optional {@link ContinuousWriter} to save the data to this data source. Data
+     * sources can return None if there is no writing needed to be done.
+     *
+     * @param queryId A unique string for the writing query. It's possible that there are many
+     *                writing queries running at the same time, and the returned
+     *                {@link DataSourceV2Writer} can use this id to distinguish itself from others.
+     * @param schema the schema of the data to be written.
+     * @param mode the output mode which determines what successive epoch output means to this
+     *             sink, please refer to {@link OutputMode} for more details.
+     * @param options the options for the returned data source writer, which is an immutable
+     *                case-insensitive string-to-string map.
+     */
+    Optional<ContinuousWriter> createContinuousWriter(
+        String queryId,
+        StructType schema,
+        OutputMode mode,
+        DataSourceV2Options options);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/MicroBatchReadSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/MicroBatchReadSupport.java
new file mode 100644
index 0000000000000..3c87a3db68243
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/MicroBatchReadSupport.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.streaming;
+
+import java.util.Optional;
+
+import org.apache.spark.annotation.InterfaceStability;
+import org.apache.spark.sql.sources.v2.DataSourceV2;
+import org.apache.spark.sql.sources.v2.DataSourceV2Options;
+import org.apache.spark.sql.sources.v2.streaming.reader.MicroBatchReader;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
+ * provide streaming micro-batch data reading ability.
+ */
+@InterfaceStability.Evolving
+public interface MicroBatchReadSupport extends DataSourceV2 {
+  /**
+   * Creates a {@link MicroBatchReader} to read batches of data from this data source in a
+   * streaming query.
+   *
+   * The execution engine will create a micro-batch reader at the start of a streaming query,
+   * alternate calls to setOffsetRange and createReadTasks for each batch to process, and then
+   * call stop() when the execution is complete. Note that a single query may have multiple
+   * executions due to restart or failure recovery.
+   *
+   * @param schema the user provided schema, or empty() if none was provided
+   * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure
+   *                           recovery. Readers for the same logical source in the same query
+   *                           will be given the same checkpointLocation.
+   * @param options the options for the returned data source reader, which is an immutable
+   *                case-insensitive string-to-string map.
+   */
+  MicroBatchReader createMicroBatchReader(
+      Optional<StructType> schema,
+      String checkpointLocation,
+      DataSourceV2Options options);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/MicroBatchWriteSupport.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/MicroBatchWriteSupport.java
new file mode 100644
index 0000000000000..53ffa95ae0f4c
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/MicroBatchWriteSupport.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.streaming;
+
+import java.util.Optional;
+
+import org.apache.spark.annotation.InterfaceStability;
+import org.apache.spark.sql.execution.streaming.BaseStreamingSink;
+import org.apache.spark.sql.sources.v2.DataSourceV2;
+import org.apache.spark.sql.sources.v2.DataSourceV2Options;
+import org.apache.spark.sql.sources.v2.writer.DataSourceV2Writer;
+import org.apache.spark.sql.streaming.OutputMode;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * A mix-in interface for {@link DataSourceV2}. Data sources can implement this interface to
+ * provide data writing ability and save the data from a microbatch to the data source.
+ */
+@InterfaceStability.Evolving
+public interface MicroBatchWriteSupport extends BaseStreamingSink {
+
+  /**
+   * Creates an optional {@link DataSourceV2Writer} to save the data to this data source. Data
+   * sources can return None if there is no writing needed to be done.
+   *
+   * @param queryId A unique string for the writing query. It's possible that there are many writing
+   *                queries running at the same time, and the returned {@link DataSourceV2Writer}
+   *                can use this id to distinguish itself from others.
+   * @param epochId The unique numeric ID of the batch within this writing query. This is an
+   *                incrementing counter representing a consistent set of data; the same batch may
+   *                be started multiple times in failure recovery scenarios, but it will always
+   *                contain the same records.
+   * @param schema the schema of the data to be written.
+   * @param mode the output mode which determines what successive batch output means to this
+   *             sink, please refer to {@link OutputMode} for more details.
+   * @param options the options for the returned data source writer, which is an immutable
+   *                case-insensitive string-to-string map.
+   */
+  Optional<DataSourceV2Writer> createMicroBatchWriter(
+      String queryId,
+      long epochId,
+      StructType schema,
+      OutputMode mode,
+      DataSourceV2Options options);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/ContinuousDataReader.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/ContinuousDataReader.java
new file mode 100644
index 0000000000000..ca9a290e97a02
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/ContinuousDataReader.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.streaming.reader;
+
+import org.apache.spark.sql.sources.v2.reader.DataReader;
+
+/**
+ * A variation on {@link DataReader} for use with streaming in continuous processing mode.
+ */
+public interface ContinuousDataReader<T> extends DataReader<T> {
+    /**
+     * Get the offset of the current record, or the start offset if no records have been read.
+     *
+     * The execution engine will call this method along with get() to keep track of the current
+     * offset. When an epoch ends, the offset of the previous record in each partition will be saved
+     * as a restart checkpoint.
+     */
+    PartitionOffset getOffset();
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/ContinuousReader.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/ContinuousReader.java
new file mode 100644
index 0000000000000..f0b205869ed6c
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/ContinuousReader.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.streaming.reader;
+
+import org.apache.spark.sql.execution.streaming.BaseStreamingSource;
+import org.apache.spark.sql.sources.v2.reader.DataSourceV2Reader;
+
+import java.util.Optional;
+
+/**
+ * A mix-in interface for {@link DataSourceV2Reader}. Data source readers can implement this
+ * interface to allow reading in a continuous processing mode stream.
+ *
+ * Implementations must ensure each read task output is a {@link ContinuousDataReader}.
+ */
+public interface ContinuousReader extends BaseStreamingSource, DataSourceV2Reader {
+    /**
+     * Merge offsets coming from {@link ContinuousDataReader} instances in each partition to
+     * a single global offset.
+     */
+    Offset mergeOffsets(PartitionOffset[] offsets);
+
+    /**
+     * Deserialize a JSON string into an Offset of the implementation-defined offset type.
+     * @throws IllegalArgumentException if the JSON does not encode a valid offset for this reader
+     */
+    Offset deserializeOffset(String json);
+
+    /**
+     * Set the desired start offset for read tasks created from this reader. The scan will start
+     * from the first record after the provided offset, or from an implementation-defined inferred
+     * starting point if no offset is provided.
+     */
+    void setOffset(Optional<Offset> start);
+
+    /**
+     * Return the specified or inferred start offset for this reader.
+     *
+     * @throws IllegalStateException if setOffset has not been called
+     */
+    Offset getStartOffset();
+
+    /**
+     * The execution engine will call this method in every epoch to determine if new read tasks need
+     * to be generated, which may be required if for example the underlying source system has had
+     * partitions added or removed.
+     *
+     * If true, the query will be shut down and restarted with a new reader.
+     */
+    default boolean needsReconfiguration() {
+        return false;
+    }
+
+    /**
+     * Informs the source that Spark has completed processing all data for offsets less than or
+     * equal to `end` and will only request offsets greater than `end` in the future.
+     */
+    void commit(Offset end);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/MicroBatchReader.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/MicroBatchReader.java
new file mode 100644
index 0000000000000..70ff756806032
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/MicroBatchReader.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.streaming.reader;
+
+import org.apache.spark.sql.sources.v2.reader.DataSourceV2Reader;
+import org.apache.spark.sql.execution.streaming.BaseStreamingSource;
+
+import java.util.Optional;
+
+/**
+ * A mix-in interface for {@link DataSourceV2Reader}. Data source readers can implement this
+ * interface to indicate they allow micro-batch streaming reads.
+ */
+public interface MicroBatchReader extends DataSourceV2Reader, BaseStreamingSource {
+    /**
+     * Set the desired offset range for read tasks created from this reader. Read tasks will
+     * generate only data within (`start`, `end`]; that is, from the first record after `start` to
+     * the record with offset `end`.
+     *
+     * @param start The initial offset to scan from. If not specified, scan from an
+     *              implementation-specified start point, such as the earliest available record.
+     * @param end The last offset to include in the scan. If not specified, scan up to an
+     *            implementation-defined endpoint, such as the last available offset
+     *            or the start offset plus a target batch size.
+     */
+    void setOffsetRange(Optional<Offset> start, Optional<Offset> end);
+
+    /**
+     * Returns the specified (if explicitly set through setOffsetRange) or inferred start offset
+     * for this reader.
+     *
+     * @throws IllegalStateException if setOffsetRange has not been called
+     */
+    Offset getStartOffset();
+
+    /**
+     * Return the specified (if explicitly set through setOffsetRange) or inferred end offset
+     * for this reader.
+     *
+     * @throws IllegalStateException if setOffsetRange has not been called
+     */
+    Offset getEndOffset();
+
+    /**
+     * Deserialize a JSON string into an Offset of the implementation-defined offset type.
+     * @throws IllegalArgumentException if the JSON does not encode a valid offset for this reader
+     */
+    Offset deserializeOffset(String json);
+
+    /**
+     * Informs the source that Spark has completed processing all data for offsets less than or
+     * equal to `end` and will only request offsets greater than `end` in the future.
+     */
+    void commit(Offset end);
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/Offset.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/Offset.java
new file mode 100644
index 0000000000000..60b87f2ac0756
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/Offset.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.streaming.reader;
+
+/**
+ * An abstract representation of progress through a [[MicroBatchReader]] or [[ContinuousReader]].
+ * During execution, Offsets provided by the data source implementation will be logged and used as
+ * restart checkpoints. Sources should provide an Offset implementation which they can use to
+ * reconstruct the stream position where the offset was taken.
+ */
+public abstract class Offset extends org.apache.spark.sql.execution.streaming.Offset {
+    /**
+     * A JSON-serialized representation of an Offset that is
+     * used for saving offsets to the offset log.
+     * Note: We assume that equivalent/equal offsets serialize to
+     * identical JSON strings.
+     *
+     * @return JSON string encoding
+     */
+    public abstract String json();
+
+    /**
+     * Equality based on JSON string representation. We leverage the
+     * JSON representation for normalization between the Offset's
+     * in memory and on disk representations.
+     */
+    @Override
+    public boolean equals(Object obj) {
+        if (obj instanceof org.apache.spark.sql.execution.streaming.Offset) {
+            return this.json()
+                .equals(((org.apache.spark.sql.execution.streaming.Offset) obj).json());
+        } else {
+            return false;
+        }
+    }
+
+    @Override
+    public int hashCode() {
+        return this.json().hashCode();
+    }
+
+    @Override
+    public String toString() {
+        return this.json();
+    }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/PartitionOffset.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/PartitionOffset.java
new file mode 100644
index 0000000000000..eca0085c8a8ce
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/reader/PartitionOffset.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.streaming.reader;
+
+import java.io.Serializable;
+
+/**
+ * Used for per-partition offsets in continuous processing. ContinuousReader implementations will
+ * provide a method to merge these into a global Offset.
+ *
+ * These offsets must be serializable.
+ */
+public interface PartitionOffset extends Serializable {
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/writer/ContinuousWriter.java b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/writer/ContinuousWriter.java
new file mode 100644
index 0000000000000..723395bd1e963
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/sources/v2/streaming/writer/ContinuousWriter.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2.streaming.writer;
+
+import org.apache.spark.annotation.InterfaceStability;
+import org.apache.spark.sql.sources.v2.writer.DataSourceV2Writer;
+import org.apache.spark.sql.sources.v2.writer.DataWriter;
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+
+/**
+ * A {@link DataSourceV2Writer} for use with continuous stream processing.
+ */
+@InterfaceStability.Evolving
+public interface ContinuousWriter extends DataSourceV2Writer {
+  /**
+   * Commits this writing job for the specified epoch with a list of commit messages. The commit
+   * messages are collected from successful data writers and are produced by
+   * {@link DataWriter#commit()}.
+   *
+   * If this method fails (by throwing an exception), this writing job is considered to have been
+   * failed, and the execution engine will attempt to call {@link #abort(WriterCommitMessage[])}.
+   */
+  void commit(long epochId, WriterCommitMessage[] messages);
+
+  default void commit(WriterCommitMessage[] messages) {
+    throw new UnsupportedOperationException(
+       "Commit without epoch should not be called with ContinuousWriter");
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java
index d31790a285687..33ae9a9e87668 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java
@@ -22,6 +22,7 @@
 import scala.concurrent.duration.Duration;
 
 import org.apache.spark.annotation.InterfaceStability;
+import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger;
 import org.apache.spark.sql.execution.streaming.OneTimeTrigger$;
 
 /**
@@ -95,4 +96,57 @@ public static Trigger ProcessingTime(String interval) {
   public static Trigger Once() {
     return OneTimeTrigger$.MODULE$;
   }
+
+  /**
+   * A trigger that continuously processes streaming data, asynchronously checkpointing at
+   * the specified interval.
+   *
+   * @since 2.3.0
+   */
+  public static Trigger Continuous(long intervalMs) {
+    return ContinuousTrigger.apply(intervalMs);
+  }
+
+  /**
+   * A trigger that continuously processes streaming data, asynchronously checkpointing at
+   * the specified interval.
+   *
+   * {{{
+   *    import java.util.concurrent.TimeUnit
+   *    df.writeStream.trigger(Trigger.Continuous(10, TimeUnit.SECONDS))
+   * }}}
+   *
+   * @since 2.3.0
+   */
+  public static Trigger Continuous(long interval, TimeUnit timeUnit) {
+    return ContinuousTrigger.create(interval, timeUnit);
+  }
+
+  /**
+   * (Scala-friendly)
+   * A trigger that continuously processes streaming data, asynchronously checkpointing at
+   * the specified interval.
+   *
+   * {{{
+   *    import scala.concurrent.duration._
+   *    df.writeStream.trigger(Trigger.Continuous(10.seconds))
+   * }}}
+   * @since 2.3.0
+   */
+  public static Trigger Continuous(Duration interval) {
+    return ContinuousTrigger.apply(interval);
+  }
+
+  /**
+   * A trigger that continuously processes streaming data, asynchronously checkpointing at
+   * the specified interval.
+   *
+   * {{{
+   *    df.writeStream.trigger(Trigger.Continuous("10 seconds"))
+   * }}}
+   * @since 2.3.0
+   */
+  public static Trigger Continuous(String interval) {
+    return ContinuousTrigger.apply(interval);
+  }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
similarity index 69%
rename from sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java
rename to sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
index 5c502c9d91be4..708333213f3f1 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution.vectorized;
+package org.apache.spark.sql.vectorized;
 
 import org.apache.arrow.vector.*;
 import org.apache.arrow.vector.complex.*;
@@ -34,11 +34,7 @@ public final class ArrowColumnVector extends ColumnVector {
   private ArrowColumnVector[] childColumns;
 
   private void ensureAccessible(int index) {
-    int valueCount = accessor.getValueCount();
-    if (index < 0 || index >= valueCount) {
-      throw new IndexOutOfBoundsException(
-        String.format("index: %d, valueCount: %d", index, valueCount));
-    }
+    ensureAccessible(index, 1);
   }
 
   private void ensureAccessible(int index, int count) {
@@ -54,11 +50,6 @@ public int numNulls() {
     return accessor.getNullCount();
   }
 
-  @Override
-  public boolean anyNullsSet() {
-    return numNulls() > 0;
-  }
-
   @Override
   public void close() {
     if (childColumns != null) {
@@ -69,20 +60,12 @@ public void close() {
     accessor.close();
   }
 
-  //
-  // APIs dealing with nulls
-  //
-
   @Override
   public boolean isNullAt(int rowId) {
     ensureAccessible(rowId);
     return accessor.isNullAt(rowId);
   }
 
-  //
-  // APIs dealing with Booleans
-  //
-
   @Override
   public boolean getBoolean(int rowId) {
     ensureAccessible(rowId);
@@ -99,10 +82,6 @@ public boolean[] getBooleans(int rowId, int count) {
     return array;
   }
 
-  //
-  // APIs dealing with Bytes
-  //
-
   @Override
   public byte getByte(int rowId) {
     ensureAccessible(rowId);
@@ -119,10 +98,6 @@ public byte[] getBytes(int rowId, int count) {
     return array;
   }
 
-  //
-  // APIs dealing with Shorts
-  //
-
   @Override
   public short getShort(int rowId) {
     ensureAccessible(rowId);
@@ -139,10 +114,6 @@ public short[] getShorts(int rowId, int count) {
     return array;
   }
 
-  //
-  // APIs dealing with Ints
-  //
-
   @Override
   public int getInt(int rowId) {
     ensureAccessible(rowId);
@@ -159,15 +130,6 @@ public int[] getInts(int rowId, int count) {
     return array;
   }
 
-  @Override
-  public int getDictId(int rowId) {
-    throw new UnsupportedOperationException();
-  }
-
-  //
-  // APIs dealing with Longs
-  //
-
   @Override
   public long getLong(int rowId) {
     ensureAccessible(rowId);
@@ -184,10 +146,6 @@ public long[] getLongs(int rowId, int count) {
     return array;
   }
 
-  //
-  // APIs dealing with floats
-  //
-
   @Override
   public float getFloat(int rowId) {
     ensureAccessible(rowId);
@@ -204,10 +162,6 @@ public float[] getFloats(int rowId, int count) {
     return array;
   }
 
-  //
-  // APIs dealing with doubles
-  //
-
   @Override
   public double getDouble(int rowId) {
     ensureAccessible(rowId);
@@ -224,10 +178,6 @@ public double[] getDoubles(int rowId, int count) {
     return array;
   }
 
-  //
-  // APIs dealing with Arrays
-  //
-
   @Override
   public int getArrayLength(int rowId) {
     ensureAccessible(rowId);
@@ -240,82 +190,63 @@ public int getArrayOffset(int rowId) {
     return accessor.getArrayOffset(rowId);
   }
 
-  //
-  // APIs dealing with Decimals
-  //
-
   @Override
   public Decimal getDecimal(int rowId, int precision, int scale) {
     ensureAccessible(rowId);
     return accessor.getDecimal(rowId, precision, scale);
   }
 
-  //
-  // APIs dealing with UTF8Strings
-  //
-
   @Override
   public UTF8String getUTF8String(int rowId) {
     ensureAccessible(rowId);
     return accessor.getUTF8String(rowId);
   }
 
-  //
-  // APIs dealing with Binaries
-  //
-
   @Override
   public byte[] getBinary(int rowId) {
     ensureAccessible(rowId);
     return accessor.getBinary(rowId);
   }
 
-  /**
-   * Returns the data for the underlying array.
-   */
   @Override
   public ArrowColumnVector arrayData() { return childColumns[0]; }
 
-  /**
-   * Returns the ordinal's child data column.
-   */
   @Override
   public ArrowColumnVector getChildColumn(int ordinal) { return childColumns[ordinal]; }
 
   public ArrowColumnVector(ValueVector vector) {
     super(ArrowUtils.fromArrowField(vector.getField()));
 
-    if (vector instanceof NullableBitVector) {
-      accessor = new BooleanAccessor((NullableBitVector) vector);
-    } else if (vector instanceof NullableTinyIntVector) {
-      accessor = new ByteAccessor((NullableTinyIntVector) vector);
-    } else if (vector instanceof NullableSmallIntVector) {
-      accessor = new ShortAccessor((NullableSmallIntVector) vector);
-    } else if (vector instanceof NullableIntVector) {
-      accessor = new IntAccessor((NullableIntVector) vector);
-    } else if (vector instanceof NullableBigIntVector) {
-      accessor = new LongAccessor((NullableBigIntVector) vector);
-    } else if (vector instanceof NullableFloat4Vector) {
-      accessor = new FloatAccessor((NullableFloat4Vector) vector);
-    } else if (vector instanceof NullableFloat8Vector) {
-      accessor = new DoubleAccessor((NullableFloat8Vector) vector);
-    } else if (vector instanceof NullableDecimalVector) {
-      accessor = new DecimalAccessor((NullableDecimalVector) vector);
-    } else if (vector instanceof NullableVarCharVector) {
-      accessor = new StringAccessor((NullableVarCharVector) vector);
-    } else if (vector instanceof NullableVarBinaryVector) {
-      accessor = new BinaryAccessor((NullableVarBinaryVector) vector);
-    } else if (vector instanceof NullableDateDayVector) {
-      accessor = new DateAccessor((NullableDateDayVector) vector);
-    } else if (vector instanceof NullableTimeStampMicroTZVector) {
-      accessor = new TimestampAccessor((NullableTimeStampMicroTZVector) vector);
+    if (vector instanceof BitVector) {
+      accessor = new BooleanAccessor((BitVector) vector);
+    } else if (vector instanceof TinyIntVector) {
+      accessor = new ByteAccessor((TinyIntVector) vector);
+    } else if (vector instanceof SmallIntVector) {
+      accessor = new ShortAccessor((SmallIntVector) vector);
+    } else if (vector instanceof IntVector) {
+      accessor = new IntAccessor((IntVector) vector);
+    } else if (vector instanceof BigIntVector) {
+      accessor = new LongAccessor((BigIntVector) vector);
+    } else if (vector instanceof Float4Vector) {
+      accessor = new FloatAccessor((Float4Vector) vector);
+    } else if (vector instanceof Float8Vector) {
+      accessor = new DoubleAccessor((Float8Vector) vector);
+    } else if (vector instanceof DecimalVector) {
+      accessor = new DecimalAccessor((DecimalVector) vector);
+    } else if (vector instanceof VarCharVector) {
+      accessor = new StringAccessor((VarCharVector) vector);
+    } else if (vector instanceof VarBinaryVector) {
+      accessor = new BinaryAccessor((VarBinaryVector) vector);
+    } else if (vector instanceof DateDayVector) {
+      accessor = new DateAccessor((DateDayVector) vector);
+    } else if (vector instanceof TimeStampMicroTZVector) {
+      accessor = new TimestampAccessor((TimeStampMicroTZVector) vector);
     } else if (vector instanceof ListVector) {
       ListVector listVector = (ListVector) vector;
       accessor = new ArrayAccessor(listVector);
 
       childColumns = new ArrowColumnVector[1];
       childColumns[0] = new ArrowColumnVector(listVector.getDataVector());
-      resultArray = new ColumnarArray(childColumns[0]);
     } else if (vector instanceof MapVector) {
       MapVector mapVector = (MapVector) vector;
       accessor = new StructAccessor(mapVector);
@@ -324,7 +255,6 @@ public ArrowColumnVector(ValueVector vector) {
       for (int i = 0; i < childColumns.length; ++i) {
         childColumns[i] = new ArrowColumnVector(mapVector.getVectorById(i));
       }
-      resultStruct = new ColumnarRow(childColumns);
     } else {
       throw new UnsupportedOperationException();
     }
@@ -333,23 +263,22 @@ public ArrowColumnVector(ValueVector vector) {
   private abstract static class ArrowVectorAccessor {
 
     private final ValueVector vector;
-    private final ValueVector.Accessor nulls;
 
     ArrowVectorAccessor(ValueVector vector) {
       this.vector = vector;
-      this.nulls = vector.getAccessor();
     }
 
-    final boolean isNullAt(int rowId) {
-      return nulls.isNull(rowId);
+    // TODO: should be final after removing ArrayAccessor workaround
+    boolean isNullAt(int rowId) {
+      return vector.isNull(rowId);
     }
 
     final int getValueCount() {
-      return nulls.getValueCount();
+      return vector.getValueCount();
     }
 
     final int getNullCount() {
-      return nulls.getNullCount();
+      return vector.getNullCount();
     }
 
     final void close() {
@@ -407,11 +336,11 @@ int getArrayOffset(int rowId) {
 
   private static class BooleanAccessor extends ArrowVectorAccessor {
 
-    private final NullableBitVector.Accessor accessor;
+    private final BitVector accessor;
 
-    BooleanAccessor(NullableBitVector vector) {
+    BooleanAccessor(BitVector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -422,11 +351,11 @@ final boolean getBoolean(int rowId) {
 
   private static class ByteAccessor extends ArrowVectorAccessor {
 
-    private final NullableTinyIntVector.Accessor accessor;
+    private final TinyIntVector accessor;
 
-    ByteAccessor(NullableTinyIntVector vector) {
+    ByteAccessor(TinyIntVector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -437,11 +366,11 @@ final byte getByte(int rowId) {
 
   private static class ShortAccessor extends ArrowVectorAccessor {
 
-    private final NullableSmallIntVector.Accessor accessor;
+    private final SmallIntVector accessor;
 
-    ShortAccessor(NullableSmallIntVector vector) {
+    ShortAccessor(SmallIntVector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -452,11 +381,11 @@ final short getShort(int rowId) {
 
   private static class IntAccessor extends ArrowVectorAccessor {
 
-    private final NullableIntVector.Accessor accessor;
+    private final IntVector accessor;
 
-    IntAccessor(NullableIntVector vector) {
+    IntAccessor(IntVector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -467,11 +396,11 @@ final int getInt(int rowId) {
 
   private static class LongAccessor extends ArrowVectorAccessor {
 
-    private final NullableBigIntVector.Accessor accessor;
+    private final BigIntVector accessor;
 
-    LongAccessor(NullableBigIntVector vector) {
+    LongAccessor(BigIntVector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -482,11 +411,11 @@ final long getLong(int rowId) {
 
   private static class FloatAccessor extends ArrowVectorAccessor {
 
-    private final NullableFloat4Vector.Accessor accessor;
+    private final Float4Vector accessor;
 
-    FloatAccessor(NullableFloat4Vector vector) {
+    FloatAccessor(Float4Vector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -497,11 +426,11 @@ final float getFloat(int rowId) {
 
   private static class DoubleAccessor extends ArrowVectorAccessor {
 
-    private final NullableFloat8Vector.Accessor accessor;
+    private final Float8Vector accessor;
 
-    DoubleAccessor(NullableFloat8Vector vector) {
+    DoubleAccessor(Float8Vector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -512,11 +441,11 @@ final double getDouble(int rowId) {
 
   private static class DecimalAccessor extends ArrowVectorAccessor {
 
-    private final NullableDecimalVector.Accessor accessor;
+    private final DecimalVector accessor;
 
-    DecimalAccessor(NullableDecimalVector vector) {
+    DecimalAccessor(DecimalVector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -528,12 +457,12 @@ final Decimal getDecimal(int rowId, int precision, int scale) {
 
   private static class StringAccessor extends ArrowVectorAccessor {
 
-    private final NullableVarCharVector.Accessor accessor;
+    private final VarCharVector accessor;
     private final NullableVarCharHolder stringResult = new NullableVarCharHolder();
 
-    StringAccessor(NullableVarCharVector vector) {
+    StringAccessor(VarCharVector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -551,11 +480,11 @@ final UTF8String getUTF8String(int rowId) {
 
   private static class BinaryAccessor extends ArrowVectorAccessor {
 
-    private final NullableVarBinaryVector.Accessor accessor;
+    private final VarBinaryVector accessor;
 
-    BinaryAccessor(NullableVarBinaryVector vector) {
+    BinaryAccessor(VarBinaryVector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -566,11 +495,11 @@ final byte[] getBinary(int rowId) {
 
   private static class DateAccessor extends ArrowVectorAccessor {
 
-    private final NullableDateDayVector.Accessor accessor;
+    private final DateDayVector accessor;
 
-    DateAccessor(NullableDateDayVector vector) {
+    DateAccessor(DateDayVector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -581,11 +510,11 @@ final int getInt(int rowId) {
 
   private static class TimestampAccessor extends ArrowVectorAccessor {
 
-    private final NullableTimeStampMicroTZVector.Accessor accessor;
+    private final TimeStampMicroTZVector accessor;
 
-    TimestampAccessor(NullableTimeStampMicroTZVector vector) {
+    TimestampAccessor(TimeStampMicroTZVector vector) {
       super(vector);
-      this.accessor = vector.getAccessor();
+      this.accessor = vector;
     }
 
     @Override
@@ -596,21 +525,31 @@ final long getLong(int rowId) {
 
   private static class ArrayAccessor extends ArrowVectorAccessor {
 
-    private final UInt4Vector.Accessor accessor;
+    private final ListVector accessor;
 
     ArrayAccessor(ListVector vector) {
       super(vector);
-      this.accessor = vector.getOffsetVector().getAccessor();
+      this.accessor = vector;
+    }
+
+    @Override
+    final boolean isNullAt(int rowId) {
+      // TODO: Workaround if vector has all non-null values, see ARROW-1948
+      if (accessor.getValueCount() > 0 && accessor.getValidityBuffer().capacity() == 0) {
+        return false;
+      } else {
+        return super.isNullAt(rowId);
+      }
     }
 
     @Override
     final int getArrayLength(int rowId) {
-      return accessor.get(rowId + 1) - accessor.get(rowId);
+      return accessor.getInnerValueCountAt(rowId);
     }
 
     @Override
     final int getArrayOffset(int rowId) {
-      return accessor.get(rowId);
+      return accessor.getOffsetBuffer().getInt(rowId * accessor.OFFSET_WIDTH);
     }
   }
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
similarity index 59%
rename from sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
rename to sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
index 940457f2e3363..d1196e1299fee 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.execution.vectorized;
+package org.apache.spark.sql.vectorized;
 
 import org.apache.spark.sql.catalyst.util.MapData;
 import org.apache.spark.sql.types.DataType;
@@ -22,32 +22,36 @@
 import org.apache.spark.unsafe.types.UTF8String;
 
 /**
- * This class represents a column of values and provides the main APIs to access the data
- * values. It supports all the types and contains get APIs as well as their batched versions.
- * The batched versions are preferable whenever possible.
- *
- * To handle nested schemas, ColumnVector has two types: Arrays and Structs. In both cases these
- * columns have child columns. All of the data is stored in the child columns and the parent column
- * contains nullability, and in the case of Arrays, the lengths and offsets into the child column.
- * Lengths and offsets are encoded identically to INTs.
- * Maps are just a special case of a two field struct.
+ * An interface representing in-memory columnar data in Spark. This interface defines the main APIs
+ * to access the data, as well as their batched versions. The batched versions are considered to be
+ * faster and preferable whenever possible.
  *
  * Most of the APIs take the rowId as a parameter. This is the batch local 0-based row id for values
- * in the current RowBatch.
+ * in this ColumnVector.
+ *
+ * ColumnVector supports all the data types including nested types. To handle nested types,
+ * ColumnVector can have children and is a tree structure. For struct type, it stores the actual
+ * data of each field in the corresponding child ColumnVector, and only stores null information in
+ * the parent ColumnVector. For array type, it stores the actual array elements in the child
+ * ColumnVector, and stores null information, array offsets and lengths in the parent ColumnVector.
  *
- * A ColumnVector should be considered immutable once originally created.
+ * ColumnVector is expected to be reused during the entire data loading process, to avoid allocating
+ * memory again and again.
  *
- * ColumnVectors are intended to be reused.
+ * ColumnVector is meant to maximize CPU efficiency but not to minimize storage footprint.
+ * Implementations should prefer computing efficiency over storage efficiency when design the
+ * format. Since it is expected to reuse the ColumnVector instance while loading data, the storage
+ * footprint is negligible.
  */
 public abstract class ColumnVector implements AutoCloseable {
+
   /**
-   * Returns the data type of this column.
+   * Returns the data type of this column vector.
    */
   public final DataType dataType() { return type; }
 
   /**
    * Cleans up memory for this column. The column is not usable after this.
-   * TODO: this should probably have ref-counted semantics.
    */
   public abstract void close();
 
@@ -56,12 +60,6 @@ public abstract class ColumnVector implements AutoCloseable {
    */
   public abstract int numNulls();
 
-  /**
-   * Returns true if any of the nulls indicator are set for this column. This can be used
-   * as an optimization to prevent setting nulls.
-   */
-  public abstract boolean anyNullsSet();
-
   /**
    * Returns whether the value at rowId is NULL.
    */
@@ -107,13 +105,6 @@ public abstract class ColumnVector implements AutoCloseable {
    */
   public abstract int[] getInts(int rowId, int count);
 
-  /**
-   * Returns the dictionary Id for rowId.
-   * This should only be called when the ColumnVector is dictionaryIds.
-   * We have this separate method for dictionaryIds as per SPARK-16928.
-   */
-  public abstract int getDictId(int rowId);
-
   /**
    * Returns the value for rowId.
    */
@@ -145,43 +136,39 @@ public abstract class ColumnVector implements AutoCloseable {
   public abstract double[] getDoubles(int rowId, int count);
 
   /**
-   * Returns the length of the array at rowid.
+   * Returns the length of the array for rowId.
    */
   public abstract int getArrayLength(int rowId);
 
   /**
-   * Returns the offset of the array at rowid.
+   * Returns the offset of the array for rowId.
    */
   public abstract int getArrayOffset(int rowId);
 
   /**
-   * Returns a utility object to get structs.
+   * Returns the struct for rowId.
    */
-  public ColumnarRow getStruct(int rowId) {
-    resultStruct.rowId = rowId;
-    return resultStruct;
+  public final ColumnarRow getStruct(int rowId) {
+    return new ColumnarRow(this, rowId);
   }
 
   /**
-   * Returns a utility object to get structs.
-   * provided to keep API compatibility with InternalRow for code generation
+   * A special version of {@link #getStruct(int)}, which is only used as an adapter for Spark
+   * codegen framework, the second parameter is totally ignored.
    */
-  public ColumnarRow getStruct(int rowId, int size) {
-    resultStruct.rowId = rowId;
-    return resultStruct;
+  public final ColumnarRow getStruct(int rowId, int size) {
+    return getStruct(rowId);
   }
 
   /**
-   * Returns the array at rowid.
+   * Returns the array for rowId.
    */
   public final ColumnarArray getArray(int rowId) {
-    resultArray.length = getArrayLength(rowId);
-    resultArray.offset = getArrayOffset(rowId);
-    return resultArray;
+    return new ColumnarArray(arrayData(), getArrayOffset(rowId), getArrayLength(rowId));
   }
 
   /**
-   * Returns the value for rowId.
+   * Returns the map for rowId.
    */
   public MapData getMap(int ordinal) {
     throw new UnsupportedOperationException();
@@ -213,50 +200,11 @@ public MapData getMap(int ordinal) {
    */
   public abstract ColumnVector getChildColumn(int ordinal);
 
-  /**
-   * Returns true if this column is an array.
-   */
-  public final boolean isArray() { return resultArray != null; }
-
   /**
    * Data type for this column.
    */
   protected DataType type;
 
-  /**
-   * Reusable Array holder for getArray().
-   */
-  protected ColumnarArray resultArray;
-
-  /**
-   * Reusable Struct holder for getStruct().
-   */
-  protected ColumnarRow resultStruct;
-
-  /**
-   * The Dictionary for this column.
-   *
-   * If it's not null, will be used to decode the value in getXXX().
-   */
-  protected Dictionary dictionary;
-
-  /**
-   * Reusable column for ids of dictionary.
-   */
-  protected ColumnVector dictionaryIds;
-
-  /**
-   * Returns true if this column has a dictionary.
-   */
-  public boolean hasDictionary() { return this.dictionary != null; }
-
-  /**
-   * Returns the underlying integer column for ids of dictionary.
-   */
-  public ColumnVector getDictionaryIds() {
-    return dictionaryIds;
-  }
-
   /**
    * Sets up the common state and also handles creating the child columns if this is a nested
    * type.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArray.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
similarity index 94%
rename from sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArray.java
rename to sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
index b9da641fc66c8..0d89a52e7a4fe 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarArray.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.execution.vectorized;
+package org.apache.spark.sql.vectorized;
 
 import org.apache.spark.sql.catalyst.util.ArrayData;
 import org.apache.spark.sql.catalyst.util.MapData;
@@ -23,18 +23,19 @@
 import org.apache.spark.unsafe.types.UTF8String;
 
 /**
- * Array abstraction in {@link ColumnVector}. The instance of this class is intended
- * to be reused, callers should copy the data out if it needs to be stored.
+ * Array abstraction in {@link ColumnVector}.
  */
 public final class ColumnarArray extends ArrayData {
   // The data for this array. This array contains elements from
   // data[offset] to data[offset + length).
-  public final ColumnVector data;
-  public int length;
-  public int offset;
+  private final ColumnVector data;
+  private final int offset;
+  private final int length;
 
-  ColumnarArray(ColumnVector data) {
+  public ColumnarArray(ColumnVector data, int offset, int length) {
     this.data = data;
+    this.offset = offset;
+    this.length = length;
   }
 
   @Override
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
similarity index 63%
rename from sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
rename to sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
index 2f5fb360b226f..9ae1c6d9993f0 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
@@ -14,25 +14,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.execution.vectorized;
+package org.apache.spark.sql.vectorized;
 
 import java.util.*;
 
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.execution.vectorized.MutableColumnarRow;
 import org.apache.spark.sql.types.StructType;
 
 /**
- * This class is the in memory representation of rows as they are streamed through operators. It
- * is designed to maximize CPU efficiency and not storage footprint. Since it is expected that
- * each operator allocates one of these objects, the storage footprint on the task is negligible.
- *
- * The layout is a columnar with values encoded in their native format. Each RowBatch contains
- * a horizontal partitioning of the data, split into columns.
- *
- * The ColumnarBatch supports either on heap or offheap modes with (mostly) the identical API.
- *
- * TODO:
- *  - There are many TODOs for the existing APIs. They should throw a not implemented exception.
- *  - Compaction: The batch and columns should be able to compact based on a selection vector.
+ * This class wraps multiple ColumnVectors as a row-wise table. It provides a row view of this
+ * batch so that Spark can access the data row by row. Instance of it is meant to be reused during
+ * the entire data loading process.
  */
 public final class ColumnarBatch {
   public static final int DEFAULT_BATCH_SIZE = 4 * 1024;
@@ -40,10 +33,10 @@ public final class ColumnarBatch {
   private final StructType schema;
   private final int capacity;
   private int numRows;
-  final ColumnVector[] columns;
+  private final ColumnVector[] columns;
 
-  // Staging row returned from getRow.
-  final ColumnarRow row;
+  // Staging row returned from `getRow`.
+  private final MutableColumnarRow row;
 
   /**
    * Called to close all the columns in this batch. It is not valid to access the data after
@@ -56,12 +49,12 @@ public void close() {
   }
 
   /**
-   * Returns an iterator over the rows in this batch. This skips rows that are filtered out.
+   * Returns an iterator over the rows in this batch.
    */
-  public Iterator<ColumnarRow> rowIterator() {
+  public Iterator<InternalRow> rowIterator() {
     final int maxRows = numRows;
-    final ColumnarRow row = new ColumnarRow(columns);
-    return new Iterator<ColumnarRow>() {
+    final MutableColumnarRow row = new MutableColumnarRow(columns);
+    return new Iterator<InternalRow>() {
       int rowId = 0;
 
       @Override
@@ -70,7 +63,7 @@ public boolean hasNext() {
       }
 
       @Override
-      public ColumnarRow next() {
+      public InternalRow next() {
         if (rowId >= maxRows) {
           throw new NoSuchElementException();
         }
@@ -86,19 +79,7 @@ public void remove() {
   }
 
   /**
-   * Resets the batch for writing.
-   */
-  public void reset() {
-    for (int i = 0; i < numCols(); ++i) {
-      if (columns[i] instanceof WritableColumnVector) {
-        ((WritableColumnVector) columns[i]).reset();
-      }
-    }
-    this.numRows = 0;
-  }
-
-  /**
-   * Sets the number of rows that are valid.
+   * Sets the number of rows in this batch.
    */
   public void setNumRows(int numRows) {
     assert(numRows <= this.capacity);
@@ -133,9 +114,8 @@ public void setNumRows(int numRows) {
   /**
    * Returns the row in this batch at `rowId`. Returned row is reused across calls.
    */
-  public ColumnarRow getRow(int rowId) {
-    assert(rowId >= 0);
-    assert(rowId < numRows);
+  public InternalRow getRow(int rowId) {
+    assert(rowId >= 0 && rowId < numRows);
     row.rowId = rowId;
     return row;
   }
@@ -144,6 +124,6 @@ public ColumnarBatch(StructType schema, ColumnVector[] columns, int capacity) {
     this.schema = schema;
     this.columns = columns;
     this.capacity = capacity;
-    this.row = new ColumnarRow(columns);
+    this.row = new MutableColumnarRow(columns);
   }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
new file mode 100644
index 0000000000000..3c6656dec77cd
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.vectorized;
+
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.catalyst.util.MapData;
+import org.apache.spark.sql.types.*;
+import org.apache.spark.unsafe.types.CalendarInterval;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/**
+ * Row abstraction in {@link ColumnVector}.
+ */
+public final class ColumnarRow extends InternalRow {
+  // The data for this row.
+  // E.g. the value of 3rd int field is `data.getChildColumn(3).getInt(rowId)`.
+  private final ColumnVector data;
+  private final int rowId;
+  private final int numFields;
+
+  public ColumnarRow(ColumnVector data, int rowId) {
+    assert (data.dataType() instanceof StructType);
+    this.data = data;
+    this.rowId = rowId;
+    this.numFields = ((StructType) data.dataType()).size();
+  }
+
+  @Override
+  public int numFields() { return numFields; }
+
+  /**
+   * Revisit this. This is expensive. This is currently only used in test paths.
+   */
+  @Override
+  public InternalRow copy() {
+    GenericInternalRow row = new GenericInternalRow(numFields);
+    for (int i = 0; i < numFields(); i++) {
+      if (isNullAt(i)) {
+        row.setNullAt(i);
+      } else {
+        DataType dt = data.getChildColumn(i).dataType();
+        if (dt instanceof BooleanType) {
+          row.setBoolean(i, getBoolean(i));
+        } else if (dt instanceof ByteType) {
+          row.setByte(i, getByte(i));
+        } else if (dt instanceof ShortType) {
+          row.setShort(i, getShort(i));
+        } else if (dt instanceof IntegerType) {
+          row.setInt(i, getInt(i));
+        } else if (dt instanceof LongType) {
+          row.setLong(i, getLong(i));
+        } else if (dt instanceof FloatType) {
+          row.setFloat(i, getFloat(i));
+        } else if (dt instanceof DoubleType) {
+          row.setDouble(i, getDouble(i));
+        } else if (dt instanceof StringType) {
+          row.update(i, getUTF8String(i).copy());
+        } else if (dt instanceof BinaryType) {
+          row.update(i, getBinary(i));
+        } else if (dt instanceof DecimalType) {
+          DecimalType t = (DecimalType)dt;
+          row.setDecimal(i, getDecimal(i, t.precision(), t.scale()), t.precision());
+        } else if (dt instanceof DateType) {
+          row.setInt(i, getInt(i));
+        } else if (dt instanceof TimestampType) {
+          row.setLong(i, getLong(i));
+        } else {
+          throw new RuntimeException("Not implemented. " + dt);
+        }
+      }
+    }
+    return row;
+  }
+
+  @Override
+  public boolean anyNull() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean isNullAt(int ordinal) { return data.getChildColumn(ordinal).isNullAt(rowId); }
+
+  @Override
+  public boolean getBoolean(int ordinal) { return data.getChildColumn(ordinal).getBoolean(rowId); }
+
+  @Override
+  public byte getByte(int ordinal) { return data.getChildColumn(ordinal).getByte(rowId); }
+
+  @Override
+  public short getShort(int ordinal) { return data.getChildColumn(ordinal).getShort(rowId); }
+
+  @Override
+  public int getInt(int ordinal) { return data.getChildColumn(ordinal).getInt(rowId); }
+
+  @Override
+  public long getLong(int ordinal) { return data.getChildColumn(ordinal).getLong(rowId); }
+
+  @Override
+  public float getFloat(int ordinal) { return data.getChildColumn(ordinal).getFloat(rowId); }
+
+  @Override
+  public double getDouble(int ordinal) { return data.getChildColumn(ordinal).getDouble(rowId); }
+
+  @Override
+  public Decimal getDecimal(int ordinal, int precision, int scale) {
+    if (data.getChildColumn(ordinal).isNullAt(rowId)) return null;
+    return data.getChildColumn(ordinal).getDecimal(rowId, precision, scale);
+  }
+
+  @Override
+  public UTF8String getUTF8String(int ordinal) {
+    if (data.getChildColumn(ordinal).isNullAt(rowId)) return null;
+    return data.getChildColumn(ordinal).getUTF8String(rowId);
+  }
+
+  @Override
+  public byte[] getBinary(int ordinal) {
+    if (data.getChildColumn(ordinal).isNullAt(rowId)) return null;
+    return data.getChildColumn(ordinal).getBinary(rowId);
+  }
+
+  @Override
+  public CalendarInterval getInterval(int ordinal) {
+    if (data.getChildColumn(ordinal).isNullAt(rowId)) return null;
+    final int months = data.getChildColumn(ordinal).getChildColumn(0).getInt(rowId);
+    final long microseconds = data.getChildColumn(ordinal).getChildColumn(1).getLong(rowId);
+    return new CalendarInterval(months, microseconds);
+  }
+
+  @Override
+  public ColumnarRow getStruct(int ordinal, int numFields) {
+    if (data.getChildColumn(ordinal).isNullAt(rowId)) return null;
+    return data.getChildColumn(ordinal).getStruct(rowId);
+  }
+
+  @Override
+  public ColumnarArray getArray(int ordinal) {
+    if (data.getChildColumn(ordinal).isNullAt(rowId)) return null;
+    return data.getChildColumn(ordinal).getArray(rowId);
+  }
+
+  @Override
+  public MapData getMap(int ordinal) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Object get(int ordinal, DataType dataType) {
+    if (dataType instanceof BooleanType) {
+      return getBoolean(ordinal);
+    } else if (dataType instanceof ByteType) {
+      return getByte(ordinal);
+    } else if (dataType instanceof ShortType) {
+      return getShort(ordinal);
+    } else if (dataType instanceof IntegerType) {
+      return getInt(ordinal);
+    } else if (dataType instanceof LongType) {
+      return getLong(ordinal);
+    } else if (dataType instanceof FloatType) {
+      return getFloat(ordinal);
+    } else if (dataType instanceof DoubleType) {
+      return getDouble(ordinal);
+    } else if (dataType instanceof StringType) {
+      return getUTF8String(ordinal);
+    } else if (dataType instanceof BinaryType) {
+      return getBinary(ordinal);
+    } else if (dataType instanceof DecimalType) {
+      DecimalType t = (DecimalType) dataType;
+      return getDecimal(ordinal, t.precision(), t.scale());
+    } else if (dataType instanceof DateType) {
+      return getInt(ordinal);
+    } else if (dataType instanceof TimestampType) {
+      return getLong(ordinal);
+    } else if (dataType instanceof ArrayType) {
+      return getArray(ordinal);
+    } else if (dataType instanceof StructType) {
+      return getStruct(ordinal, ((StructType)dataType).fields().length);
+    } else if (dataType instanceof MapType) {
+      return getMap(ordinal);
+    } else {
+      throw new UnsupportedOperationException("Datatype not supported " + dataType);
+    }
+  }
+
+  @Override
+  public void update(int ordinal, Object value) { throw new UnsupportedOperationException(); }
+
+  @Override
+  public void setNullAt(int ordinal) { throw new UnsupportedOperationException(); }
+}
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 0c5f3f22e31e8..0259c774bbf4a 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1,8 +1,10 @@
 org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider
 org.apache.spark.sql.execution.datasources.json.JsonFileFormat
+org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 org.apache.spark.sql.execution.datasources.text.TextFileFormat
 org.apache.spark.sql.execution.streaming.ConsoleSinkProvider
 org.apache.spark.sql.execution.streaming.TextSocketSourceProvider
 org.apache.spark.sql.execution.streaming.RateSourceProvider
+org.apache.spark.sql.execution.streaming.sources.RateSourceProviderV2
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin b/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
new file mode 100644
index 0000000000000..0bba2f88b92a5
--- /dev/null
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
@@ -0,0 +1 @@
+org.apache.spark.sql.execution.ui.SQLHistoryServerPlugin
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppStatusPlugin b/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppStatusPlugin
deleted file mode 100644
index ac6d7f6962f85..0000000000000
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppStatusPlugin
+++ /dev/null
@@ -1 +0,0 @@
-org.apache.spark.sql.execution.ui.SQLAppStatusPlugin
diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
index 594e747a8d3a5..b13850c301490 100644
--- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
+++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
@@ -32,7 +32,7 @@
   stroke-width: 1px;
 }
 
-/* Hightlight the SparkPlan node name */
+/* Highlight the SparkPlan node name */
 #plan-viz-graph svg text :first-child {
   font-weight: bold;
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 17966eecfc051..e8d683a578f35 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -33,7 +33,8 @@ import org.apache.spark.sql.execution.datasources.csv._
 import org.apache.spark.sql.execution.datasources.jdbc._
 import org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.sources.v2.{DataSourceV2, DataSourceV2Options, ReadSupport, ReadSupportWithSchema}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
+import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.types.{StringType, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -182,11 +183,15 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
         "read files of Hive data source directly.")
     }
 
-    val cls = DataSource.lookupDataSource(source)
+    val cls = DataSource.lookupDataSource(source, sparkSession.sessionState.conf)
     if (classOf[DataSourceV2].isAssignableFrom(cls)) {
-      val options = new DataSourceV2Options(extraOptions.asJava)
+      val ds = cls.newInstance()
+      val options = new DataSourceV2Options((extraOptions ++
+        DataSourceV2Utils.extractSessionConfigs(
+          ds = ds.asInstanceOf[DataSourceV2],
+          conf = sparkSession.sessionState.conf)).asJava)
 
-      val reader = (cls.newInstance(), userSpecifiedSchema) match {
+      val reader = (ds, userSpecifiedSchema) match {
         case (ds: ReadSupportWithSchema, Some(schema)) =>
           ds.createReader(schema, options)
 
@@ -512,17 +517,20 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    *
    * You can set the following CSV-specific options to deal with CSV files:
    * <ul>
-   * <li>`sep` (default `,`): sets the single character as a separator for each
+   * <li>`sep` (default `,`): sets a single character as a separator for each
    * field and value.</li>
    * <li>`encoding` (default `UTF-8`): decodes the CSV files by the given encoding
    * type.</li>
-   * <li>`quote` (default `"`): sets the single character used for escaping quoted values where
+   * <li>`quote` (default `"`): sets a single character used for escaping quoted values where
    * the separator can be part of the value. If you would like to turn off quotations, you need to
    * set not `null` but an empty string. This behaviour is different from
    * `com.databricks.spark.csv`.</li>
-   * <li>`escape` (default `\`): sets the single character used for escaping quotes inside
+   * <li>`escape` (default `\`): sets a single character used for escaping quotes inside
    * an already quoted value.</li>
-   * <li>`comment` (default empty string): sets the single character used for skipping lines
+   * <li>`charToEscapeQuoteEscaping` (default `escape` or `\0`): sets a single character used for
+   * escaping the escape for the quote character. The default value is escape character when escape
+   * and quote characters are different, `\0` otherwise.</li>
+   * <li>`comment` (default empty string): sets a single character used for skipping lines
    * beginning with this character. By default, it is disabled.</li>
    * <li>`header` (default `false`): uses the first line as names of columns.</li>
    * <li>`inferSchema` (default `false`): infers the input schema automatically from data. It
@@ -646,7 +654,14 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * Loads text files and returns a `DataFrame` whose schema starts with a string column named
    * "value", and followed by partitioned columns if there are any.
    *
-   * Each line in the text files is a new row in the resulting DataFrame. For example:
+   * You can set the following text-specific option(s) for reading text files:
+   * <ul>
+   * <li>`wholetext` ( default `false`): If true, read a file as a single row and not split by "\n".
+   * </li>
+   * </ul>
+   * By default, each line in the text files is a new row in the resulting DataFrame.
+   *
+   * Usage example:
    * {{{
    *   // Scala:
    *   spark.read.text("/path/to/spark/README.md")
@@ -678,7 +693,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * If the directory structure of the text files contains partitioning information, those are
    * ignored in the resulting Dataset. To include partitioning information as columns, use `text`.
    *
-   * Each line in the text files is a new element in the resulting Dataset. For example:
+   * You can set the following textFile-specific option(s) for reading text files:
+   * <ul>
+   * <li>`wholetext` ( default `false`): If true, read a file as a single row and not split by "\n".
+   * </li>
+   * </ul>
+   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
    * {{{
    *   // Scala:
    *   spark.read.textFile("/path/to/spark/README.md")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 35abeccfd514a..3304f368e1050 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -26,13 +26,14 @@ import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{AnalysisBarrier, InsertIntoTable, LogicalPlan}
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2
 import org.apache.spark.sql.sources.BaseRelation
-import org.apache.spark.sql.sources.v2.{DataSourceV2, DataSourceV2Options, WriteSupport}
+import org.apache.spark.sql.sources.v2._
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -234,16 +235,20 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
     assertNotBucketed("save")
 
-    val cls = DataSource.lookupDataSource(source)
+    val cls = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf)
     if (classOf[DataSourceV2].isAssignableFrom(cls)) {
-      cls.newInstance() match {
-        case ds: WriteSupport =>
-          val options = new DataSourceV2Options(extraOptions.asJava)
+      val ds = cls.newInstance()
+      ds match {
+        case ws: WriteSupport =>
+          val options = new DataSourceV2Options((extraOptions ++
+            DataSourceV2Utils.extractSessionConfigs(
+              ds = ds.asInstanceOf[DataSourceV2],
+              conf = df.sparkSession.sessionState.conf)).asJava)
           // Using a timestamp and a random UUID to distinguish different writing jobs. This is good
           // enough as there won't be tons of writing jobs created at the same second.
           val jobId = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
             .format(new Date()) + "-" + UUID.randomUUID()
-          val writer = ds.createWriter(jobId, df.logicalPlan.schema, mode, options)
+          val writer = ws.createWriter(jobId, df.logicalPlan.schema, mode, options)
           if (writer.isPresent) {
             runCommand(df.sparkSession, "save") {
               WriteToDataSourceV2(writer.get(), df.logicalPlan)
@@ -259,7 +264,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
           sparkSession = df.sparkSession,
           className = source,
           partitionColumns = partitioningColumns.getOrElse(Nil),
-          options = extraOptions.toMap).planForWriting(mode, df.logicalPlan)
+          options = extraOptions.toMap).planForWriting(mode, AnalysisBarrier(df.logicalPlan))
       }
     }
   }
@@ -589,13 +594,16 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    *
    * You can set the following CSV-specific option(s) for writing CSV files:
    * <ul>
-   * <li>`sep` (default `,`): sets the single character as a separator for each
+   * <li>`sep` (default `,`): sets a single character as a separator for each
    * field and value.</li>
-   * <li>`quote` (default `"`): sets the single character used for escaping quoted values where
+   * <li>`quote` (default `"`): sets a single character used for escaping quoted values where
    * the separator can be part of the value. If an empty string is set, it uses `u0000`
    * (null character).</li>
-   * <li>`escape` (default `\`): sets the single character used for escaping quotes inside
+   * <li>`escape` (default `\`): sets a single character used for escaping quotes inside
    * an already quoted value.</li>
+   * <li>`charToEscapeQuoteEscaping` (default `escape` or `\0`): sets a single character used for
+   * escaping the escape for the quote character. The default value is escape character when escape
+   * and quote characters are different, `\0` otherwise.</li>
    * <li>`escapeQuotes` (default `true`): a flag indicating whether values containing
    * quotes should always be enclosed in quotes. Default is to escape all values containing
    * a quote character.</li>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 1620ab3aa2094..34f0ab5aa6699 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -191,6 +191,9 @@ class Dataset[T] private[sql](
     }
   }
 
+  // Wraps analyzed logical plans with an analysis barrier so we won't traverse/resolve it again.
+  @transient private val planWithBarrier = AnalysisBarrier(logicalPlan)
+
   /**
    * Currently [[ExpressionEncoder]] is the only implementation of [[Encoder]], here we turn the
    * passed in encoder to [[ExpressionEncoder]] explicitly, and mark it implicit so that we can use
@@ -234,13 +237,20 @@ class Dataset[T] private[sql](
   private[sql] def showString(
       _numRows: Int, truncate: Int = 20, vertical: Boolean = false): String = {
     val numRows = _numRows.max(0).min(Int.MaxValue - 1)
-    val takeResult = toDF().take(numRows + 1)
+    val newDf = toDF()
+    val castCols = newDf.logicalPlan.output.map { col =>
+      // Since binary types in top-level schema fields have a specific format to print,
+      // so we do not cast them to strings here.
+      if (col.dataType == BinaryType) {
+        Column(col)
+      } else {
+        Column(col).cast(StringType)
+      }
+    }
+    val takeResult = newDf.select(castCols: _*).take(numRows + 1)
     val hasMoreData = takeResult.length > numRows
     val data = takeResult.take(numRows)
 
-    lazy val timeZone =
-      DateTimeUtils.getTimeZone(sparkSession.sessionState.conf.sessionLocalTimeZone)
-
     // For array values, replace Seq and Array with square brackets
     // For cells that are beyond `truncate` characters, replace it with the
     // first `truncate-3` and "..."
@@ -249,12 +259,6 @@ class Dataset[T] private[sql](
         val str = cell match {
           case null => "null"
           case binary: Array[Byte] => binary.map("%02X".format(_)).mkString("[", " ", "]")
-          case array: Array[_] => array.mkString("[", ", ", "]")
-          case seq: Seq[_] => seq.mkString("[", ", ", "]")
-          case d: Date =>
-            DateTimeUtils.dateToString(DateTimeUtils.fromJavaDate(d))
-          case ts: Timestamp =>
-            DateTimeUtils.timestampToString(DateTimeUtils.fromJavaTimestamp(ts), timeZone)
           case _ => cell.toString
         }
         if (truncate > 0 && str.length > truncate) {
@@ -398,12 +402,16 @@ class Dataset[T] private[sql](
    * If the schema of the Dataset does not match the desired `U` type, you can use `select`
    * along with `alias` or `as` to rearrange or rename as required.
    *
+   * Note that `as[]` only changes the view of the data that is passed into typed operations,
+   * such as `map()`, and does not eagerly project away any columns that are not present in
+   * the specified class.
+   *
    * @group basic
    * @since 1.6.0
    */
   @Experimental
   @InterfaceStability.Evolving
-  def as[U : Encoder]: Dataset[U] = Dataset[U](sparkSession, logicalPlan)
+  def as[U : Encoder]: Dataset[U] = Dataset[U](sparkSession, planWithBarrier)
 
   /**
    * Converts this strongly typed collection of data to generic `DataFrame` with columns renamed.
@@ -524,7 +532,7 @@ class Dataset[T] private[sql](
    */
   @Experimental
   @InterfaceStability.Evolving
-  def checkpoint(): Dataset[T] = checkpoint(eager = true)
+  def checkpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = true)
 
   /**
    * Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the
@@ -537,9 +545,52 @@ class Dataset[T] private[sql](
    */
   @Experimental
   @InterfaceStability.Evolving
-  def checkpoint(eager: Boolean): Dataset[T] = {
+  def checkpoint(eager: Boolean): Dataset[T] = checkpoint(eager = eager, reliableCheckpoint = true)
+
+  /**
+   * Eagerly locally checkpoints a Dataset and return the new Dataset. Checkpointing can be
+   * used to truncate the logical plan of this Dataset, which is especially useful in iterative
+   * algorithms where the plan may grow exponentially. Local checkpoints are written to executor
+   * storage and despite potentially faster they are unreliable and may compromise job completion.
+   *
+   * @group basic
+   * @since 2.3.0
+   */
+  @Experimental
+  @InterfaceStability.Evolving
+  def localCheckpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = false)
+
+  /**
+   * Locally checkpoints a Dataset and return the new Dataset. Checkpointing can be used to truncate
+   * the logical plan of this Dataset, which is especially useful in iterative algorithms where the
+   * plan may grow exponentially. Local checkpoints are written to executor storage and despite
+   * potentially faster they are unreliable and may compromise job completion.
+   *
+   * @group basic
+   * @since 2.3.0
+   */
+  @Experimental
+  @InterfaceStability.Evolving
+  def localCheckpoint(eager: Boolean): Dataset[T] = checkpoint(
+    eager = eager,
+    reliableCheckpoint = false
+  )
+
+  /**
+   * Returns a checkpointed version of this Dataset.
+   *
+   * @param eager Whether to checkpoint this dataframe immediately
+   * @param reliableCheckpoint Whether to create a reliable checkpoint saved to files inside the
+   *                           checkpoint directory. If false creates a local checkpoint using
+   *                           the caching subsystem
+   */
+  private def checkpoint(eager: Boolean, reliableCheckpoint: Boolean): Dataset[T] = {
     val internalRdd = queryExecution.toRdd.map(_.copy())
-    internalRdd.checkpoint()
+    if (reliableCheckpoint) {
+      internalRdd.checkpoint()
+    } else {
+      internalRdd.localCheckpoint()
+    }
 
     if (eager) {
       internalRdd.count()
@@ -604,7 +655,7 @@ class Dataset[T] private[sql](
     require(parsedDelay.milliseconds >= 0 && parsedDelay.months >= 0,
       s"delay threshold ($delayThreshold) should not be negative.")
     EliminateEventTimeWatermark(
-      EventTimeWatermark(UnresolvedAttribute(eventTime), parsedDelay, logicalPlan))
+      EventTimeWatermark(UnresolvedAttribute(eventTime), parsedDelay, planWithBarrier))
   }
 
   /**
@@ -777,7 +828,7 @@ class Dataset[T] private[sql](
    * @since 2.0.0
    */
   def join(right: Dataset[_]): DataFrame = withPlan {
-    Join(logicalPlan, right.logicalPlan, joinType = Inner, None)
+    Join(planWithBarrier, right.planWithBarrier, joinType = Inner, None)
   }
 
   /**
@@ -855,7 +906,7 @@ class Dataset[T] private[sql](
     // Analyze the self join. The assumption is that the analyzer will disambiguate left vs right
     // by creating a new instance for one of the branch.
     val joined = sparkSession.sessionState.executePlan(
-      Join(logicalPlan, right.logicalPlan, joinType = JoinType(joinType), None))
+      Join(planWithBarrier, right.planWithBarrier, joinType = JoinType(joinType), None))
       .analyzed.asInstanceOf[Join]
 
     withPlan {
@@ -916,7 +967,7 @@ class Dataset[T] private[sql](
     // Trigger analysis so in the case of self-join, the analyzer will clone the plan.
     // After the cloning, left and right side will have distinct expression ids.
     val plan = withPlan(
-      Join(logicalPlan, right.logicalPlan, JoinType(joinType), Some(joinExprs.expr)))
+      Join(planWithBarrier, right.planWithBarrier, JoinType(joinType), Some(joinExprs.expr)))
       .queryExecution.analyzed.asInstanceOf[Join]
 
     // If auto self join alias is disabled, return the plan.
@@ -925,8 +976,8 @@ class Dataset[T] private[sql](
     }
 
     // If left/right have no output set intersection, return the plan.
-    val lanalyzed = withPlan(this.logicalPlan).queryExecution.analyzed
-    val ranalyzed = withPlan(right.logicalPlan).queryExecution.analyzed
+    val lanalyzed = withPlan(this.planWithBarrier).queryExecution.analyzed
+    val ranalyzed = withPlan(right.planWithBarrier).queryExecution.analyzed
     if (lanalyzed.outputSet.intersect(ranalyzed.outputSet).isEmpty) {
       return withPlan(plan)
     }
@@ -958,7 +1009,7 @@ class Dataset[T] private[sql](
    * @since 2.1.0
    */
   def crossJoin(right: Dataset[_]): DataFrame = withPlan {
-    Join(logicalPlan, right.logicalPlan, joinType = Cross, None)
+    Join(planWithBarrier, right.planWithBarrier, joinType = Cross, None)
   }
 
   /**
@@ -990,8 +1041,8 @@ class Dataset[T] private[sql](
     // etc.
     val joined = sparkSession.sessionState.executePlan(
       Join(
-        this.logicalPlan,
-        other.logicalPlan,
+        this.planWithBarrier,
+        other.planWithBarrier,
         JoinType(joinType),
         Some(condition.expr))).analyzed.asInstanceOf[Join]
 
@@ -1212,7 +1263,7 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   def as(alias: String): Dataset[T] = withTypedPlan {
-    SubqueryAlias(alias, logicalPlan)
+    SubqueryAlias(alias, planWithBarrier)
   }
 
   /**
@@ -1250,7 +1301,7 @@ class Dataset[T] private[sql](
    */
   @scala.annotation.varargs
   def select(cols: Column*): DataFrame = withPlan {
-    Project(cols.map(_.named), logicalPlan)
+    Project(cols.map(_.named), planWithBarrier)
   }
 
   /**
@@ -1305,8 +1356,8 @@ class Dataset[T] private[sql](
   @InterfaceStability.Evolving
   def select[U1](c1: TypedColumn[T, U1]): Dataset[U1] = {
     implicit val encoder = c1.encoder
-    val project = Project(c1.withInputType(exprEnc, logicalPlan.output).named :: Nil,
-      logicalPlan)
+    val project = Project(c1.withInputType(exprEnc, planWithBarrier.output).named :: Nil,
+      planWithBarrier)
 
     if (encoder.flat) {
       new Dataset[U1](sparkSession, project, encoder)
@@ -1324,8 +1375,8 @@ class Dataset[T] private[sql](
   protected def selectUntyped(columns: TypedColumn[_, _]*): Dataset[_] = {
     val encoders = columns.map(_.encoder)
     val namedColumns =
-      columns.map(_.withInputType(exprEnc, logicalPlan.output).named)
-    val execution = new QueryExecution(sparkSession, Project(namedColumns, logicalPlan))
+      columns.map(_.withInputType(exprEnc, planWithBarrier.output).named)
+    val execution = new QueryExecution(sparkSession, Project(namedColumns, planWithBarrier))
     new Dataset(sparkSession, execution, ExpressionEncoder.tuple(encoders))
   }
 
@@ -1401,7 +1452,7 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   def filter(condition: Column): Dataset[T] = withTypedPlan {
-    Filter(condition.expr, logicalPlan)
+    Filter(condition.expr, planWithBarrier)
   }
 
   /**
@@ -1578,7 +1629,7 @@ class Dataset[T] private[sql](
   @Experimental
   @InterfaceStability.Evolving
   def groupByKey[K: Encoder](func: T => K): KeyValueGroupedDataset[K, T] = {
-    val inputPlan = logicalPlan
+    val inputPlan = planWithBarrier
     val withGroupingKey = AppendColumns(func, inputPlan)
     val executed = sparkSession.sessionState.executePlan(withGroupingKey)
 
@@ -1724,7 +1775,7 @@ class Dataset[T] private[sql](
    * @since 2.0.0
    */
   def limit(n: Int): Dataset[T] = withTypedPlan {
-    Limit(Literal(n), logicalPlan)
+    Limit(Literal(n), planWithBarrier)
   }
 
   /**
@@ -1774,7 +1825,7 @@ class Dataset[T] private[sql](
   def union(other: Dataset[T]): Dataset[T] = withSetOperator {
     // This breaks caching, but it's usually ok because it addresses a very specific use case:
     // using union to union many files or partitions.
-    CombineUnions(Union(logicalPlan, other.logicalPlan))
+    CombineUnions(Union(logicalPlan, other.logicalPlan)).mapChildren(AnalysisBarrier)
   }
 
   /**
@@ -1833,7 +1884,7 @@ class Dataset[T] private[sql](
 
     // This breaks caching, but it's usually ok because it addresses a very specific use case:
     // using union to union many files or partitions.
-    CombineUnions(Union(logicalPlan, rightChild))
+    CombineUnions(Union(logicalPlan, rightChild)).mapChildren(AnalysisBarrier)
   }
 
   /**
@@ -1847,7 +1898,7 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   def intersect(other: Dataset[T]): Dataset[T] = withSetOperator {
-    Intersect(logicalPlan, other.logicalPlan)
+    Intersect(planWithBarrier, other.planWithBarrier)
   }
 
   /**
@@ -1861,7 +1912,7 @@ class Dataset[T] private[sql](
    * @since 2.0.0
    */
   def except(other: Dataset[T]): Dataset[T] = withSetOperator {
-    Except(logicalPlan, other.logicalPlan)
+    Except(planWithBarrier, other.planWithBarrier)
   }
 
   /**
@@ -1912,7 +1963,7 @@ class Dataset[T] private[sql](
    */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): Dataset[T] = {
     withTypedPlan {
-      Sample(0.0, fraction, withReplacement, seed, logicalPlan)
+      Sample(0.0, fraction, withReplacement, seed, planWithBarrier)
     }
   }
 
@@ -1954,15 +2005,15 @@ class Dataset[T] private[sql](
     // overlapping splits. To prevent this, we explicitly sort each input partition to make the
     // ordering deterministic. Note that MapTypes cannot be sorted and are explicitly pruned out
     // from the sort order.
-    val sortOrder = logicalPlan.output
+    val sortOrder = planWithBarrier.output
       .filter(attr => RowOrdering.isOrderable(attr.dataType))
       .map(SortOrder(_, Ascending))
     val plan = if (sortOrder.nonEmpty) {
-      Sort(sortOrder, global = false, logicalPlan)
+      Sort(sortOrder, global = false, planWithBarrier)
     } else {
       // SPARK-12662: If sort order is empty, we materialize the dataset to guarantee determinism
       cache()
-      logicalPlan
+      planWithBarrier
     }
     val sum = weights.sum
     val normalizedCumWeights = weights.map(_ / sum).scanLeft(0.0d)(_ + _)
@@ -2045,8 +2096,8 @@ class Dataset[T] private[sql](
     val generator = UserDefinedGenerator(elementSchema, rowFunction, input.map(_.expr))
 
     withPlan {
-      Generate(generator, join = true, outer = false,
-        qualifier = None, generatorOutput = Nil, logicalPlan)
+      Generate(generator, unrequiredChildIndex = Nil, outer = false,
+        qualifier = None, generatorOutput = Nil, planWithBarrier)
     }
   }
 
@@ -2086,8 +2137,8 @@ class Dataset[T] private[sql](
     val generator = UserDefinedGenerator(elementSchema, rowFunction, apply(inputColumn).expr :: Nil)
 
     withPlan {
-      Generate(generator, join = true, outer = false,
-        qualifier = None, generatorOutput = Nil, logicalPlan)
+      Generate(generator, unrequiredChildIndex = Nil, outer = false,
+        qualifier = None, generatorOutput = Nil, planWithBarrier)
     }
   }
 
@@ -2235,7 +2286,7 @@ class Dataset[T] private[sql](
           u.name, sparkSession.sessionState.analyzer.resolver).getOrElse(u)
       case Column(expr: Expression) => expr
     }
-    val attrs = this.logicalPlan.output
+    val attrs = this.planWithBarrier.output
     val colsAfterDrop = attrs.filter { attr =>
       attr != expression
     }.map(attr => Column(attr))
@@ -2283,7 +2334,7 @@ class Dataset[T] private[sql](
       }
       cols
     }
-    Deduplicate(groupCols, logicalPlan)
+    Deduplicate(groupCols, planWithBarrier)
   }
 
   /**
@@ -2465,7 +2516,7 @@ class Dataset[T] private[sql](
   @Experimental
   @InterfaceStability.Evolving
   def filter(func: T => Boolean): Dataset[T] = {
-    withTypedPlan(TypedFilter(func, logicalPlan))
+    withTypedPlan(TypedFilter(func, planWithBarrier))
   }
 
   /**
@@ -2479,7 +2530,7 @@ class Dataset[T] private[sql](
   @Experimental
   @InterfaceStability.Evolving
   def filter(func: FilterFunction[T]): Dataset[T] = {
-    withTypedPlan(TypedFilter(func, logicalPlan))
+    withTypedPlan(TypedFilter(func, planWithBarrier))
   }
 
   /**
@@ -2493,7 +2544,7 @@ class Dataset[T] private[sql](
   @Experimental
   @InterfaceStability.Evolving
   def map[U : Encoder](func: T => U): Dataset[U] = withTypedPlan {
-    MapElements[T, U](func, logicalPlan)
+    MapElements[T, U](func, planWithBarrier)
   }
 
   /**
@@ -2508,7 +2559,7 @@ class Dataset[T] private[sql](
   @InterfaceStability.Evolving
   def map[U](func: MapFunction[T, U], encoder: Encoder[U]): Dataset[U] = {
     implicit val uEnc = encoder
-    withTypedPlan(MapElements[T, U](func, logicalPlan))
+    withTypedPlan(MapElements[T, U](func, planWithBarrier))
   }
 
   /**
@@ -2524,7 +2575,7 @@ class Dataset[T] private[sql](
   def mapPartitions[U : Encoder](func: Iterator[T] => Iterator[U]): Dataset[U] = {
     new Dataset[U](
       sparkSession,
-      MapPartitions[T, U](func, logicalPlan),
+      MapPartitions[T, U](func, planWithBarrier),
       implicitly[Encoder[U]])
   }
 
@@ -2555,7 +2606,7 @@ class Dataset[T] private[sql](
     val rowEncoder = encoder.asInstanceOf[ExpressionEncoder[Row]]
     Dataset.ofRows(
       sparkSession,
-      MapPartitionsInR(func, packageNames, broadcastVars, schema, rowEncoder, logicalPlan))
+      MapPartitionsInR(func, packageNames, broadcastVars, schema, rowEncoder, planWithBarrier))
   }
 
   /**
@@ -2719,7 +2770,7 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   def repartition(numPartitions: Int): Dataset[T] = withTypedPlan {
-    Repartition(numPartitions, shuffle = true, logicalPlan)
+    Repartition(numPartitions, shuffle = true, planWithBarrier)
   }
 
   /**
@@ -2732,8 +2783,18 @@ class Dataset[T] private[sql](
    * @since 2.0.0
    */
   @scala.annotation.varargs
-  def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T] = withTypedPlan {
-    RepartitionByExpression(partitionExprs.map(_.expr), logicalPlan, numPartitions)
+  def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T] = {
+    // The underlying `LogicalPlan` operator special-cases all-`SortOrder` arguments.
+    // However, we don't want to complicate the semantics of this API method.
+    // Instead, let's give users a friendly error message, pointing them to the new method.
+    val sortOrders = partitionExprs.filter(_.expr.isInstanceOf[SortOrder])
+    if (sortOrders.nonEmpty) throw new IllegalArgumentException(
+      s"""Invalid partitionExprs specified: $sortOrders
+         |For range partitioning use repartitionByRange(...) instead.
+       """.stripMargin)
+    withTypedPlan {
+      RepartitionByExpression(partitionExprs.map(_.expr), planWithBarrier, numPartitions)
+    }
   }
 
   /**
@@ -2747,9 +2808,46 @@ class Dataset[T] private[sql](
    * @since 2.0.0
    */
   @scala.annotation.varargs
-  def repartition(partitionExprs: Column*): Dataset[T] = withTypedPlan {
-    RepartitionByExpression(
-      partitionExprs.map(_.expr), logicalPlan, sparkSession.sessionState.conf.numShufflePartitions)
+  def repartition(partitionExprs: Column*): Dataset[T] = {
+    repartition(sparkSession.sessionState.conf.numShufflePartitions, partitionExprs: _*)
+  }
+
+  /**
+   * Returns a new Dataset partitioned by the given partitioning expressions into
+   * `numPartitions`. The resulting Dataset is range partitioned.
+   *
+   * At least one partition-by expression must be specified.
+   * When no explicit sort order is specified, "ascending nulls first" is assumed.
+   *
+   * @group typedrel
+   * @since 2.3.0
+   */
+  @scala.annotation.varargs
+  def repartitionByRange(numPartitions: Int, partitionExprs: Column*): Dataset[T] = {
+    require(partitionExprs.nonEmpty, "At least one partition-by expression must be specified.")
+    val sortOrder: Seq[SortOrder] = partitionExprs.map(_.expr match {
+      case expr: SortOrder => expr
+      case expr: Expression => SortOrder(expr, Ascending)
+    })
+    withTypedPlan {
+      RepartitionByExpression(sortOrder, planWithBarrier, numPartitions)
+    }
+  }
+
+  /**
+   * Returns a new Dataset partitioned by the given partitioning expressions, using
+   * `spark.sql.shuffle.partitions` as number of partitions.
+   * The resulting Dataset is range partitioned.
+   *
+   * At least one partition-by expression must be specified.
+   * When no explicit sort order is specified, "ascending nulls first" is assumed.
+   *
+   * @group typedrel
+   * @since 2.3.0
+   */
+  @scala.annotation.varargs
+  def repartitionByRange(partitionExprs: Column*): Dataset[T] = {
+    repartitionByRange(sparkSession.sessionState.conf.numShufflePartitions, partitionExprs: _*)
   }
 
   /**
@@ -2770,7 +2868,7 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   def coalesce(numPartitions: Int): Dataset[T] = withTypedPlan {
-    Repartition(numPartitions, shuffle = false, logicalPlan)
+    Repartition(numPartitions, shuffle = false, planWithBarrier)
   }
 
   /**
@@ -2853,7 +2951,7 @@ class Dataset[T] private[sql](
 
   // Represents the `QueryExecution` used to produce the content of the Dataset as an `RDD`.
   @transient private lazy val rddQueryExecution: QueryExecution = {
-    val deserialized = CatalystSerde.deserialize[T](logicalPlan)
+    val deserialized = CatalystSerde.deserialize[T](planWithBarrier)
     sparkSession.sessionState.executePlan(deserialized)
   }
 
@@ -2979,7 +3077,7 @@ class Dataset[T] private[sql](
       comment = None,
       properties = Map.empty,
       originalText = None,
-      child = logicalPlan,
+      child = planWithBarrier,
       allowExisting = false,
       replace = replace,
       viewType = viewType)
@@ -3179,7 +3277,7 @@ class Dataset[T] private[sql](
       }
     }
     withTypedPlan {
-      Sort(sortOrder, global = global, logicalPlan)
+      Sort(sortOrder, global = global, planWithBarrier)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 69054fa8a9648..11bfaa0a726a7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -742,7 +742,10 @@ class SparkSession private(
   private[sql] def applySchemaToPythonRDD(
       rdd: RDD[Array[Any]],
       schema: StructType): DataFrame = {
-    val rowRdd = rdd.map(r => python.EvaluatePython.fromJava(r, schema).asInstanceOf[InternalRow])
+    val rowRdd = rdd.mapPartitions { iter =>
+      val fromJava = python.EvaluatePython.makeFromJava(schema)
+      iter.map(r => fromJava(r).asInstanceOf[InternalRow])
+    }
     internalCreateDataFrame(rowRdd, schema)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 3ff476147b8b7..f94baef39dfad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.sql
 
-import java.lang.reflect.{ParameterizedType, Type}
+import java.lang.reflect.ParameterizedType
 
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.Try
 
 import org.apache.spark.annotation.InterfaceStability
+import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.api.java._
 import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection}
@@ -41,8 +42,6 @@ import org.apache.spark.util.Utils
  *   spark.udf
  * }}}
  *
- * @note The user-defined functions must be deterministic.
- *
  * @since 1.3.0
  */
 @InterfaceStability.Stable
@@ -58,6 +57,8 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
         | pythonIncludes: ${udf.func.pythonIncludes}
         | pythonExec: ${udf.func.pythonExec}
         | dataType: ${udf.dataType}
+        | pythonEvalType: ${PythonEvalType.toString(udf.pythonEvalType)}
+        | udfDeterministic: ${udf.udfDeterministic}
       """.stripMargin)
 
     functionRegistry.createOrReplaceTempFunction(name, udf.builder)
@@ -109,29 +110,29 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
 
   /* register 0-22 were generated by this script
 
-    (0 to 22).map { x =>
+    (0 to 22).foreach { x =>
       val types = (1 to x).foldRight("RT")((i, s) => {s"A$i, $s"})
-      val typeTags = (1 to x).map(i => s"A${i}: TypeTag").foldLeft("RT: TypeTag")(_ + ", " + _)
+      val typeTags = (1 to x).map(i => s"A$i: TypeTag").foldLeft("RT: TypeTag")(_ + ", " + _)
       val inputTypes = (1 to x).foldRight("Nil")((i, s) => {s"ScalaReflection.schemaFor[A$i].dataType :: $s"})
       println(s"""
-        /**
-         * Registers a deterministic Scala closure of ${x} arguments as user-defined function (UDF).
-         * @tparam RT return type of UDF.
-         * @since 1.3.0
-         */
-        def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = {
-          val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
-          val inputTypes = Try($inputTypes).toOption
-          def builder(e: Seq[Expression]) = if (e.length == $x) {
-            ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable, udfDeterministic = true)
-          } else {
-             throw new AnalysisException("Invalid number of arguments for function " + name +
-               ". Expected: $x; Found: " + e.length)
-          }
-          functionRegistry.createOrReplaceTempFunction(name, builder)
-          val udf = UserDefinedFunction(func, dataType, inputTypes).withName(name)
-          if (nullable) udf else udf.asNonNullable()
-        }""")
+        |/**
+        | * Registers a deterministic Scala closure of $x arguments as user-defined function (UDF).
+        | * @tparam RT return type of UDF.
+        | * @since 1.3.0
+        | */
+        |def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = {
+        |  val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
+        |  val inputTypes = Try($inputTypes).toOption
+        |  def builder(e: Seq[Expression]) = if (e.length == $x) {
+        |    ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable, udfDeterministic = true)
+        |  } else {
+        |    throw new AnalysisException("Invalid number of arguments for function " + name +
+        |      ". Expected: $x; Found: " + e.length)
+        |  }
+        |  functionRegistry.createOrReplaceTempFunction(name, builder)
+        |  val udf = UserDefinedFunction(func, dataType, inputTypes).withName(name)
+        |  if (nullable) udf else udf.asNonNullable()
+        |}""".stripMargin)
     }
 
     (0 to 22).foreach { i =>
@@ -143,7 +144,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
       val funcCall = if (i == 0) "() => func" else "func"
       println(s"""
         |/**
-        | * Register a user-defined function with ${i} arguments.
+        | * Register a deterministic Java UDF$i instance as user-defined function (UDF).
         | * @since $version
         | */
         |def register(name: String, f: UDF$i[$extTypeArgs], returnType: DataType): Unit = {
@@ -688,7 +689,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 0 arguments.
+   * Register a deterministic Java UDF0 instance as user-defined function (UDF).
    * @since 2.3.0
    */
   def register(name: String, f: UDF0[_], returnType: DataType): Unit = {
@@ -703,7 +704,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 1 arguments.
+   * Register a deterministic Java UDF1 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF1[_, _], returnType: DataType): Unit = {
@@ -718,7 +719,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 2 arguments.
+   * Register a deterministic Java UDF2 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF2[_, _, _], returnType: DataType): Unit = {
@@ -733,7 +734,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 3 arguments.
+   * Register a deterministic Java UDF3 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF3[_, _, _, _], returnType: DataType): Unit = {
@@ -748,7 +749,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 4 arguments.
+   * Register a deterministic Java UDF4 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType): Unit = {
@@ -763,7 +764,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 5 arguments.
+   * Register a deterministic Java UDF5 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType): Unit = {
@@ -778,7 +779,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 6 arguments.
+   * Register a deterministic Java UDF6 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -793,7 +794,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 7 arguments.
+   * Register a deterministic Java UDF7 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -808,7 +809,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 8 arguments.
+   * Register a deterministic Java UDF8 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -823,7 +824,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 9 arguments.
+   * Register a deterministic Java UDF9 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -838,7 +839,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 10 arguments.
+   * Register a deterministic Java UDF10 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -853,7 +854,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 11 arguments.
+   * Register a deterministic Java UDF11 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -868,7 +869,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 12 arguments.
+   * Register a deterministic Java UDF12 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -883,7 +884,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 13 arguments.
+   * Register a deterministic Java UDF13 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -898,7 +899,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 14 arguments.
+   * Register a deterministic Java UDF14 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -913,7 +914,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 15 arguments.
+   * Register a deterministic Java UDF15 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -928,7 +929,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 16 arguments.
+   * Register a deterministic Java UDF16 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -943,7 +944,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 17 arguments.
+   * Register a deterministic Java UDF17 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -958,7 +959,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 18 arguments.
+   * Register a deterministic Java UDF18 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -973,7 +974,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 19 arguments.
+   * Register a deterministic Java UDF19 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -988,7 +989,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 20 arguments.
+   * Register a deterministic Java UDF20 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -1003,7 +1004,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 21 arguments.
+   * Register a deterministic Java UDF21 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
@@ -1018,7 +1019,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   }
 
   /**
-   * Register a user-defined function with 22 arguments.
+   * Register a deterministic Java UDF22 instance as user-defined function (UDF).
    * @since 1.3.0
    */
   def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 9fd76865b5a95..c95212aaae00c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics}
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.storage.StorageLevel
@@ -94,14 +94,13 @@ class CacheManager extends Logging {
       logWarning("Asked to cache already cached data.")
     } else {
       val sparkSession = query.sparkSession
-      cachedData.add(CachedData(
-        planToCache,
-        InMemoryRelation(
-          sparkSession.sessionState.conf.useCompression,
-          sparkSession.sessionState.conf.columnBatchSize,
-          storageLevel,
-          sparkSession.sessionState.executePlan(planToCache).executedPlan,
-          tableName)))
+      val inMemoryRelation = InMemoryRelation(
+        sparkSession.sessionState.conf.useCompression,
+        sparkSession.sessionState.conf.columnBatchSize, storageLevel,
+        sparkSession.sessionState.executePlan(planToCache).executedPlan,
+        tableName,
+        planToCache.stats)
+      cachedData.add(CachedData(planToCache, inMemoryRelation))
     }
   }
 
@@ -148,7 +147,8 @@ class CacheManager extends Logging {
           batchSize = cd.cachedRepresentation.batchSize,
           storageLevel = cd.cachedRepresentation.storageLevel,
           child = spark.sessionState.executePlan(cd.plan).executedPlan,
-          tableName = cd.cachedRepresentation.tableName)
+          tableName = cd.cachedRepresentation.tableName,
+          statsOfPlanToCache = cd.plan.stats)
         needToRecache += cd.copy(cachedRepresentation = newCache)
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
index a9bfb634fbdea..5617046e1396e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
@@ -20,13 +20,13 @@ package org.apache.spark.sql.execution
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.execution.vectorized.{ColumnarBatch, ColumnVector}
 import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
 
 
 /**
  * Helper trait for abstracting scan functionality using
- * [[org.apache.spark.sql.execution.vectorized.ColumnarBatch]]es.
+ * [[ColumnarBatch]]es.
  */
 private[sql] trait ColumnarBatchScan extends CodegenSupport {
 
@@ -68,30 +68,26 @@ private[sql] trait ColumnarBatchScan extends CodegenSupport {
    */
   // TODO: return ColumnarBatch.Rows instead
   override protected def doProduce(ctx: CodegenContext): String = {
-    val input = ctx.freshName("input")
     // PhysicalRDD always just has one input
-    ctx.addMutableState("scala.collection.Iterator", input, s"$input = inputs[0];")
+    val input = ctx.addMutableState("scala.collection.Iterator", "input",
+      v => s"$v = inputs[0];")
 
     // metrics
     val numOutputRows = metricTerm(ctx, "numOutputRows")
     val scanTimeMetric = metricTerm(ctx, "scanTime")
-    val scanTimeTotalNs = ctx.freshName("scanTime")
-    ctx.addMutableState(ctx.JAVA_LONG, scanTimeTotalNs, s"$scanTimeTotalNs = 0;")
+    val scanTimeTotalNs = ctx.addMutableState(ctx.JAVA_LONG, "scanTime") // init as scanTime = 0
 
     val columnarBatchClz = classOf[ColumnarBatch].getName
-    val batch = ctx.freshName("batch")
-    ctx.addMutableState(columnarBatchClz, batch, s"$batch = null;")
+    val batch = ctx.addMutableState(columnarBatchClz, "batch")
 
-    val idx = ctx.freshName("batchIdx")
-    ctx.addMutableState(ctx.JAVA_INT, idx, s"$idx = 0;")
-    val colVars = output.indices.map(i => ctx.freshName("colInstance" + i))
+    val idx = ctx.addMutableState(ctx.JAVA_INT, "batchIdx") // init as batchIdx = 0
     val columnVectorClzs = vectorTypes.getOrElse(
-      Seq.fill(colVars.size)(classOf[ColumnVector].getName))
-    val columnAssigns = colVars.zip(columnVectorClzs).zipWithIndex.map {
-      case ((name, columnVectorClz), i) =>
-        ctx.addMutableState(columnVectorClz, name, s"$name = null;")
-        s"$name = ($columnVectorClz) $batch.column($i);"
-    }
+      Seq.fill(output.indices.size)(classOf[ColumnVector].getName))
+    val (colVars, columnAssigns) = columnVectorClzs.zipWithIndex.map {
+      case (columnVectorClz, i) =>
+        val name = ctx.addMutableState(columnVectorClz, s"colInstance$i")
+        (name, s"$name = ($columnVectorClz) $batch.column($i);")
+    }.unzip
 
     val nextBatch = ctx.freshName("nextBatch")
     val nextBatchFuncName = ctx.addNewFunction(nextBatch,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 1297126b9f47e..bd286abd7b027 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -69,7 +69,7 @@ trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
    * Shorthand for calling redactString() without specifying redacting rules
    */
   private def redact(text: String): String = {
-    Utils.redact(SparkSession.getActiveSession.map(_.sparkContext.conf).orNull, text)
+    Utils.redact(sqlContext.sessionState.conf.stringRedationPattern, text)
   }
 }
 
@@ -110,8 +110,7 @@ case class RowDataSourceScanExec(
   override protected def doProduce(ctx: CodegenContext): String = {
     val numOutputRows = metricTerm(ctx, "numOutputRows")
     // PhysicalRDD always just has one input
-    val input = ctx.freshName("input")
-    ctx.addMutableState("scala.collection.Iterator", input, s"$input = inputs[0];")
+    val input = ctx.addMutableState("scala.collection.Iterator", "input", v => s"$v = inputs[0];")
     val exprRows = output.zipWithIndex.map{ case (a, i) =>
       BoundReference(i, a.dataType, a.nullable)
     }
@@ -353,8 +352,7 @@ case class FileSourceScanExec(
     }
     val numOutputRows = metricTerm(ctx, "numOutputRows")
     // PhysicalRDD always just has one input
-    val input = ctx.freshName("input")
-    ctx.addMutableState("scala.collection.Iterator", input, s"$input = inputs[0];")
+    val input = ctx.addMutableState("scala.collection.Iterator", "input", v => s"$v = inputs[0];")
     val row = ctx.freshName("row")
 
     ctx.INPUT_ROW = row
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
index e1562befe14f9..0c2c4a1a9100d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
@@ -47,8 +47,7 @@ private[execution] sealed case class LazyIterator(func: () => TraversableOnce[In
  * terminate().
  *
  * @param generator the generator expression
- * @param join  when true, each output row is implicitly joined with the input tuple that produced
- *              it.
+ * @param requiredChildOutput required attributes from child's output
  * @param outer when true, each input row will be output at least once, even if the output of the
  *              given `generator` is empty.
  * @param generatorOutput the qualified output attributes of the generator of this node, which
@@ -57,19 +56,13 @@ private[execution] sealed case class LazyIterator(func: () => TraversableOnce[In
  */
 case class GenerateExec(
     generator: Generator,
-    join: Boolean,
+    requiredChildOutput: Seq[Attribute],
     outer: Boolean,
     generatorOutput: Seq[Attribute],
     child: SparkPlan)
   extends UnaryExecNode with CodegenSupport {
 
-  override def output: Seq[Attribute] = {
-    if (join) {
-      child.output ++ generatorOutput
-    } else {
-      generatorOutput
-    }
-  }
+  override def output: Seq[Attribute] = requiredChildOutput ++ generatorOutput
 
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
@@ -85,11 +78,19 @@ case class GenerateExec(
     val numOutputRows = longMetric("numOutputRows")
     child.execute().mapPartitionsWithIndexInternal { (index, iter) =>
       val generatorNullRow = new GenericInternalRow(generator.elementSchema.length)
-      val rows = if (join) {
+      val rows = if (requiredChildOutput.nonEmpty) {
+
+        val pruneChildForResult: InternalRow => InternalRow =
+          if (child.outputSet == AttributeSet(requiredChildOutput)) {
+            identity
+          } else {
+            UnsafeProjection.create(requiredChildOutput, child.output)
+          }
+
         val joinedRow = new JoinedRow
         iter.flatMap { row =>
-          // we should always set the left (child output)
-          joinedRow.withLeft(row)
+          // we should always set the left (required child output)
+          joinedRow.withLeft(pruneChildForResult(row))
           val outputRows = boundGenerator.eval(row)
           if (outer && outputRows.isEmpty) {
             joinedRow.withRight(generatorNullRow) :: Nil
@@ -136,7 +137,7 @@ case class GenerateExec(
 
   override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
     // Add input rows to the values when we are joining
-    val values = if (join) {
+    val values = if (requiredChildOutput.nonEmpty) {
       input
     } else {
       Seq.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index f404621399cea..8bfe3eff0c3b3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.command.{DescribeTableCommand, ExecutedCommandExec, ShowTablesCommand}
 import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReuseExchange}
-import org.apache.spark.sql.execution.joins.ReorderJoinPredicates
 import org.apache.spark.sql.types.{BinaryType, DateType, DecimalType, TimestampType, _}
 import org.apache.spark.util.Utils
 
@@ -104,7 +103,6 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
   protected def preparations: Seq[Rule[SparkPlan]] = Seq(
     python.ExtractPythonUDFs,
     PlanSubqueries(sparkSession),
-    new ReorderJoinPredicates,
     EnsureRequirements(sparkSession.sessionState.conf),
     CollapseCodegenStages(sparkSession.sessionState.conf),
     ReuseExchange(sparkSession.sessionState.conf),
@@ -196,13 +194,13 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
     }
   }
 
-  def simpleString: String = {
+  def simpleString: String = withRedaction {
     s"""== Physical Plan ==
        |${stringOrError(executedPlan.treeString(verbose = false))}
       """.stripMargin.trim
   }
 
-  override def toString: String = {
+  override def toString: String = withRedaction {
     def output = Utils.truncatedString(
       analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}"), ", ")
     val analyzedPlan = Seq(
@@ -221,7 +219,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
     """.stripMargin.trim
   }
 
-  def stringWithStats: String = {
+  def stringWithStats: String = withRedaction {
     // trigger to compute stats for logical plans
     optimizedPlan.stats
 
@@ -233,6 +231,13 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
     """.stripMargin.trim
   }
 
+  /**
+   * Redact the sensitive information in the given string.
+   */
+  private def withRedaction(message: String): String = {
+    Utils.redact(sparkSession.sessionState.conf.stringRedationPattern, message)
+  }
+
   /** A special namespace for commands that can be used to debug query execution. */
   // scalastyle:off
   object debug {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
index c0e21343ae623..ef1bb1c2a4468 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
@@ -133,20 +133,18 @@ case class SortExec(
   override def needStopCheck: Boolean = false
 
   override protected def doProduce(ctx: CodegenContext): String = {
-    val needToSort = ctx.freshName("needToSort")
-    ctx.addMutableState(ctx.JAVA_BOOLEAN, needToSort, s"$needToSort = true;")
+    val needToSort = ctx.addMutableState(ctx.JAVA_BOOLEAN, "needToSort", v => s"$v = true;")
 
     // Initialize the class member variables. This includes the instance of the Sorter and
     // the iterator to return sorted rows.
     val thisPlan = ctx.addReferenceObj("plan", this)
-    sorterVariable = ctx.freshName("sorter")
-    ctx.addMutableState(classOf[UnsafeExternalRowSorter].getName, sorterVariable,
-      s"$sorterVariable = $thisPlan.createSorter();")
-    val metrics = ctx.freshName("metrics")
-    ctx.addMutableState(classOf[TaskMetrics].getName, metrics,
-      s"$metrics = org.apache.spark.TaskContext.get().taskMetrics();")
-    val sortedIterator = ctx.freshName("sortedIter")
-    ctx.addMutableState("scala.collection.Iterator<UnsafeRow>", sortedIterator, "")
+    // Inline mutable state since not many Sort operations in a task
+    sorterVariable = ctx.addMutableState(classOf[UnsafeExternalRowSorter].getName, "sorter",
+      v => s"$v = $thisPlan.createSorter();", forceInline = true)
+    val metrics = ctx.addMutableState(classOf[TaskMetrics].getName, "metrics",
+      v => s"$v = org.apache.spark.TaskContext.get().taskMetrics();", forceInline = true)
+    val sortedIterator = ctx.addMutableState("scala.collection.Iterator<UnsafeRow>", "sortedIter",
+      forceInline = true)
 
     val addToSorter = ctx.freshName("addToSorter")
     val addToSorterFuncName = ctx.addNewFunction(addToSorter,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 657b265260135..398758a3331b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.ExecutionContext
 
 import org.codehaus.commons.compiler.CompileException
-import org.codehaus.janino.JaninoRuntimeException
+import org.codehaus.janino.InternalCompilerException
 
 import org.apache.spark.{broadcast, SparkEnv}
 import org.apache.spark.internal.Logging
@@ -94,7 +94,21 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   /** Specifies how data is partitioned across different nodes in the cluster. */
   def outputPartitioning: Partitioning = UnknownPartitioning(0) // TODO: WRONG WIDTH!
 
-  /** Specifies any partition requirements on the input data for this operator. */
+  /**
+   * Specifies the data distribution requirements of all the children for this operator. By default
+   * it's [[UnspecifiedDistribution]] for each child, which means each child can have any
+   * distribution.
+   *
+   * If an operator overwrites this method, and specifies distribution requirements(excluding
+   * [[UnspecifiedDistribution]] and [[BroadcastDistribution]]) for more than one child, Spark
+   * guarantees that the outputs of these children will have same number of partitions, so that the
+   * operator can safely zip partitions of these children's result RDDs. Some operators can leverage
+   * this guarantee to satisfy some interesting requirement, e.g., non-broadcast joins can specify
+   * HashClusteredDistribution(a,b) for its left child, and specify HashClusteredDistribution(c,d)
+   * for its right child, then it's guaranteed that left and right child are co-partitioned by
+   * a,b/c,d, which means tuples of same value are in the partitions of same index, e.g.,
+   * (a=1,b=2) and (c=1,d=2) are both in the second partition of left and right child.
+   */
   def requiredChildDistribution: Seq[Distribution] =
     Seq.fill(children.size)(UnspecifiedDistribution)
 
@@ -337,8 +351,9 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
         if (buf.isEmpty) {
           numPartsToTry = partsScanned * limitScaleUpFactor
         } else {
-          // the left side of max is >=1 whenever partsScanned >= 2
-          numPartsToTry = Math.max((1.5 * n * partsScanned / buf.size).toInt - partsScanned, 1)
+          val left = n - buf.size
+          // As left > 0, numPartsToTry is always >= 1
+          numPartsToTry = Math.ceil(1.5 * left * partsScanned / buf.size).toInt
           numPartsToTry = Math.min(numPartsToTry, partsScanned * limitScaleUpFactor)
         }
       }
@@ -385,7 +400,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     try {
       GeneratePredicate.generate(expression, inputSchema)
     } catch {
-      case _ @ (_: JaninoRuntimeException | _: CompileException) if codeGenFallBack =>
+      case _ @ (_: InternalCompilerException | _: CompileException) if codeGenFallBack =>
         genInterpretedPredicate(expression, inputSchema)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 29b584b55972c..d3cfd2a1ffbf2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -383,16 +383,19 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * {{{
    *   CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name
    *   USING table_provider
-   *   [OPTIONS table_property_list]
-   *   [PARTITIONED BY (col_name, col_name, ...)]
-   *   [CLUSTERED BY (col_name, col_name, ...)
-   *    [SORTED BY (col_name [ASC|DESC], ...)]
-   *    INTO num_buckets BUCKETS
-   *   ]
-   *   [LOCATION path]
-   *   [COMMENT table_comment]
-   *   [TBLPROPERTIES (property_name=property_value, ...)]
+   *   create_table_clauses
    *   [[AS] select_statement];
+   *
+   *   create_table_clauses (order insensitive):
+   *     [OPTIONS table_property_list]
+   *     [PARTITIONED BY (col_name, col_name, ...)]
+   *     [CLUSTERED BY (col_name, col_name, ...)
+   *       [SORTED BY (col_name [ASC|DESC], ...)]
+   *       INTO num_buckets BUCKETS
+   *     ]
+   *     [LOCATION path]
+   *     [COMMENT table_comment]
+   *     [TBLPROPERTIES (property_name=property_value, ...)]
    * }}}
    */
   override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) {
@@ -400,6 +403,14 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     if (external) {
       operationNotAllowed("CREATE EXTERNAL TABLE ... USING", ctx)
     }
+
+    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
+    checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
+    checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
+    checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
+    checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
+    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
+
     val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
     val provider = ctx.tableProvider.qualifiedName.getText
     val schema = Option(ctx.colTypeList()).map(createSchema)
@@ -408,9 +419,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
         .map(visitIdentifierList(_).toArray)
         .getOrElse(Array.empty[String])
     val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
-    val bucketSpec = Option(ctx.bucketSpec()).map(visitBucketSpec)
+    val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
 
-    val location = Option(ctx.locationSpec).map(visitLocationSpec)
+    val location = ctx.locationSpec.asScala.headOption.map(visitLocationSpec)
     val storage = DataSource.buildStorageFormatFromOptions(options)
 
     if (location.isDefined && storage.locationUri.isDefined) {
@@ -1087,13 +1098,16 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * {{{
    *   CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
    *   [(col1[:] data_type [COMMENT col_comment], ...)]
-   *   [COMMENT table_comment]
-   *   [PARTITIONED BY (col2[:] data_type [COMMENT col_comment], ...)]
-   *   [ROW FORMAT row_format]
-   *   [STORED AS file_format]
-   *   [LOCATION path]
-   *   [TBLPROPERTIES (property_name=property_value, ...)]
+   *   create_table_clauses
    *   [AS select_statement];
+   *
+   *   create_table_clauses (order insensitive):
+   *     [COMMENT table_comment]
+   *     [PARTITIONED BY (col2[:] data_type [COMMENT col_comment], ...)]
+   *     [ROW FORMAT row_format]
+   *     [STORED AS file_format]
+   *     [LOCATION path]
+   *     [TBLPROPERTIES (property_name=property_value, ...)]
    * }}}
    */
   override def visitCreateHiveTable(ctx: CreateHiveTableContext): LogicalPlan = withOrigin(ctx) {
@@ -1104,15 +1118,23 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
         "CREATE TEMPORARY TABLE is not supported yet. " +
           "Please use CREATE TEMPORARY VIEW as an alternative.", ctx)
     }
-    if (ctx.skewSpec != null) {
+    if (ctx.skewSpec.size > 0) {
       operationNotAllowed("CREATE TABLE ... SKEWED BY", ctx)
     }
 
+    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
+    checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
+    checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
+    checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
+    checkDuplicateClauses(ctx.createFileFormat, "STORED AS/BY", ctx)
+    checkDuplicateClauses(ctx.rowFormat, "ROW FORMAT", ctx)
+    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
+
     val dataCols = Option(ctx.columns).map(visitColTypeList).getOrElse(Nil)
     val partitionCols = Option(ctx.partitionColumns).map(visitColTypeList).getOrElse(Nil)
-    val properties = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)
+    val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
     val selectQuery = Option(ctx.query).map(plan)
-    val bucketSpec = Option(ctx.bucketSpec()).map(visitBucketSpec)
+    val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
 
     // Note: Hive requires partition columns to be distinct from the schema, so we need
     // to include the partition columns here explicitly
@@ -1120,12 +1142,12 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
 
     // Storage format
     val defaultStorage = HiveSerDe.getDefaultStorage(conf)
-    validateRowFormatFileFormat(ctx.rowFormat, ctx.createFileFormat, ctx)
-    val fileStorage = Option(ctx.createFileFormat).map(visitCreateFileFormat)
+    validateRowFormatFileFormat(ctx.rowFormat.asScala, ctx.createFileFormat.asScala, ctx)
+    val fileStorage = ctx.createFileFormat.asScala.headOption.map(visitCreateFileFormat)
       .getOrElse(CatalogStorageFormat.empty)
-    val rowStorage = Option(ctx.rowFormat).map(visitRowFormat)
+    val rowStorage = ctx.rowFormat.asScala.headOption.map(visitRowFormat)
       .getOrElse(CatalogStorageFormat.empty)
-    val location = Option(ctx.locationSpec).map(visitLocationSpec)
+    val location = ctx.locationSpec.asScala.headOption.map(visitLocationSpec)
     // If we are creating an EXTERNAL table, then the LOCATION field is required
     if (external && location.isEmpty) {
       operationNotAllowed("CREATE EXTERNAL TABLE must be accompanied by LOCATION", ctx)
@@ -1180,7 +1202,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
             ctx)
         }
 
-        val hasStorageProperties = (ctx.createFileFormat != null) || (ctx.rowFormat != null)
+        val hasStorageProperties = (ctx.createFileFormat.size != 0) || (ctx.rowFormat.size != 0)
         if (conf.convertCTAS && !hasStorageProperties) {
           // At here, both rowStorage.serdeProperties and fileStorage.serdeProperties
           // are empty Maps.
@@ -1366,6 +1388,15 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     }
   }
 
+  private def validateRowFormatFileFormat(
+      rowFormatCtx: Seq[RowFormatContext],
+      createFileFormatCtx: Seq[CreateFileFormatContext],
+      parentCtx: ParserRuleContext): Unit = {
+    if (rowFormatCtx.size == 1 && createFileFormatCtx.size == 1) {
+      validateRowFormatFileFormat(rowFormatCtx.head, createFileFormatCtx.head, parentCtx)
+    }
+  }
+
   /**
    * Create or replace a view. This creates a [[CreateViewCommand]] command.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 19b858faba6ea..910294853c318 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -29,10 +29,12 @@ import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
-import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight}
+import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide}
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.sources.MemoryPlanV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.StreamingQuery
+import org.apache.spark.sql.types.StructType
 
 /**
  * Converts a logical plan into zero or more SparkPlans.  This API is exposed for experimenting
@@ -91,12 +93,12 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
    * predicates can be evaluated by matching join keys. If found,  Join implementations are chosen
    * with the following precedence:
    *
-   * - Broadcast: if one side of the join has an estimated physical size that is smaller than the
-   *     user-configurable [[SQLConf.AUTO_BROADCASTJOIN_THRESHOLD]] threshold
-   *     or if that side has an explicit broadcast hint (e.g. the user applied the
-   *     [[org.apache.spark.sql.functions.broadcast()]] function to a DataFrame), then that side
-   *     of the join will be broadcasted and the other side will be streamed, with no shuffling
-   *     performed. If both sides of the join are eligible to be broadcasted then the
+   * - Broadcast: We prefer to broadcast the join side with an explicit broadcast hint(e.g. the
+   *     user applied the [[org.apache.spark.sql.functions.broadcast()]] function to a DataFrame).
+   *     If both sides have the broadcast hint, we prefer to broadcast the side with a smaller
+   *     estimated physical size. If neither one of the sides has the broadcast hint,
+   *     we only broadcast the join side if its estimated physical size that is smaller than
+   *     the user-configurable [[SQLConf.AUTO_BROADCASTJOIN_THRESHOLD]] threshold.
    * - Shuffle hash join: if the average size of a single partition is small enough to build a hash
    *     table.
    * - Sort merge: if the matching join keys are sortable.
@@ -112,9 +114,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
      * Matches a plan whose output should be small enough to be used in broadcast join.
      */
     private def canBroadcast(plan: LogicalPlan): Boolean = {
-      plan.stats.hints.broadcast ||
-        (plan.stats.sizeInBytes >= 0 &&
-          plan.stats.sizeInBytes <= conf.autoBroadcastJoinThreshold)
+      plan.stats.sizeInBytes >= 0 && plan.stats.sizeInBytes <= conf.autoBroadcastJoinThreshold
     }
 
     /**
@@ -149,19 +149,74 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case _ => false
     }
 
+    private def broadcastSide(
+        canBuildLeft: Boolean,
+        canBuildRight: Boolean,
+        left: LogicalPlan,
+        right: LogicalPlan): BuildSide = {
+
+      def smallerSide =
+        if (right.stats.sizeInBytes <= left.stats.sizeInBytes) BuildRight else BuildLeft
+
+      if (canBuildRight && canBuildLeft) {
+        // Broadcast smaller side base on its estimated physical size
+        // if both sides have broadcast hint
+        smallerSide
+      } else if (canBuildRight) {
+        BuildRight
+      } else if (canBuildLeft) {
+        BuildLeft
+      } else {
+        // for the last default broadcast nested loop join
+        smallerSide
+      }
+    }
+
+    private def canBroadcastByHints(joinType: JoinType, left: LogicalPlan, right: LogicalPlan)
+      : Boolean = {
+      val buildLeft = canBuildLeft(joinType) && left.stats.hints.broadcast
+      val buildRight = canBuildRight(joinType) && right.stats.hints.broadcast
+      buildLeft || buildRight
+    }
+
+    private def broadcastSideByHints(joinType: JoinType, left: LogicalPlan, right: LogicalPlan)
+      : BuildSide = {
+      val buildLeft = canBuildLeft(joinType) && left.stats.hints.broadcast
+      val buildRight = canBuildRight(joinType) && right.stats.hints.broadcast
+      broadcastSide(buildLeft, buildRight, left, right)
+    }
+
+    private def canBroadcastBySizes(joinType: JoinType, left: LogicalPlan, right: LogicalPlan)
+      : Boolean = {
+      val buildLeft = canBuildLeft(joinType) && canBroadcast(left)
+      val buildRight = canBuildRight(joinType) && canBroadcast(right)
+      buildLeft || buildRight
+    }
+
+    private def broadcastSideBySizes(joinType: JoinType, left: LogicalPlan, right: LogicalPlan)
+      : BuildSide = {
+      val buildLeft = canBuildLeft(joinType) && canBroadcast(left)
+      val buildRight = canBuildRight(joinType) && canBroadcast(right)
+      broadcastSide(buildLeft, buildRight, left, right)
+    }
+
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
 
       // --- BroadcastHashJoin --------------------------------------------------------------------
 
+      // broadcast hints were specified
       case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right)
-        if canBuildRight(joinType) && canBroadcast(right) =>
+        if canBroadcastByHints(joinType, left, right) =>
+        val buildSide = broadcastSideByHints(joinType, left, right)
         Seq(joins.BroadcastHashJoinExec(
-          leftKeys, rightKeys, joinType, BuildRight, condition, planLater(left), planLater(right)))
+          leftKeys, rightKeys, joinType, buildSide, condition, planLater(left), planLater(right)))
 
+      // broadcast hints were not specified, so need to infer it from size and configuration.
       case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right)
-        if canBuildLeft(joinType) && canBroadcast(left) =>
+        if canBroadcastBySizes(joinType, left, right) =>
+        val buildSide = broadcastSideBySizes(joinType, left, right)
         Seq(joins.BroadcastHashJoinExec(
-          leftKeys, rightKeys, joinType, BuildLeft, condition, planLater(left), planLater(right)))
+          leftKeys, rightKeys, joinType, buildSide, condition, planLater(left), planLater(right)))
 
       // --- ShuffledHashJoin ---------------------------------------------------------------------
 
@@ -190,25 +245,24 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
       // Pick BroadcastNestedLoopJoin if one side could be broadcasted
       case j @ logical.Join(left, right, joinType, condition)
-          if canBuildRight(joinType) && canBroadcast(right) =>
+          if canBroadcastByHints(joinType, left, right) =>
+        val buildSide = broadcastSideByHints(joinType, left, right)
         joins.BroadcastNestedLoopJoinExec(
-          planLater(left), planLater(right), BuildRight, joinType, condition) :: Nil
+          planLater(left), planLater(right), buildSide, joinType, condition) :: Nil
+
       case j @ logical.Join(left, right, joinType, condition)
-          if canBuildLeft(joinType) && canBroadcast(left) =>
+          if canBroadcastBySizes(joinType, left, right) =>
+        val buildSide = broadcastSideBySizes(joinType, left, right)
         joins.BroadcastNestedLoopJoinExec(
-          planLater(left), planLater(right), BuildLeft, joinType, condition) :: Nil
+          planLater(left), planLater(right), buildSide, joinType, condition) :: Nil
 
       // Pick CartesianProduct for InnerJoin
       case logical.Join(left, right, _: InnerLike, condition) =>
         joins.CartesianProductExec(planLater(left), planLater(right), condition) :: Nil
 
       case logical.Join(left, right, joinType, condition) =>
-        val buildSide =
-          if (right.stats.sizeInBytes <= left.stats.sizeInBytes) {
-            BuildRight
-          } else {
-            BuildLeft
-          }
+        val buildSide = broadcastSide(
+          left.stats.hints.broadcast, right.stats.hints.broadcast, left, right)
         // This join could be very slow or OOM
         joins.BroadcastNestedLoopJoinExec(
           planLater(left), planLater(right), buildSide, joinType, condition) :: Nil
@@ -339,6 +393,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         StreamingRelationExec(s.sourceName, s.output) :: Nil
       case s: StreamingExecutionRelation =>
         StreamingRelationExec(s.toString, s.output) :: Nil
+      case s: StreamingRelationV2 =>
+        StreamingRelationExec(s.sourceName, s.output) :: Nil
       case _ => Nil
     }
   }
@@ -364,11 +420,15 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   // Can we automate these 'pass through' operations?
   object BasicOperators extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case d: DataWritingCommand => DataWritingCommandExec(d, planLater(d.query)) :: Nil
       case r: RunnableCommand => ExecutedCommandExec(r) :: Nil
 
       case MemoryPlan(sink, output) =>
         val encoder = RowEncoder(sink.schema)
         LocalTableScanExec(output, sink.allData.map(r => encoder.toRow(r).copy())) :: Nil
+      case MemoryPlanV2(sink, output) =>
+        val encoder = RowEncoder(StructType.fromAttributes(output))
+        LocalTableScanExec(output, sink.allData.map(r => encoder.toRow(r).copy())) :: Nil
 
       case logical.Distinct(child) =>
         throw new IllegalStateException(
@@ -439,17 +499,16 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         execution.GlobalLimitExec(limit, planLater(child)) :: Nil
       case logical.Union(unionChildren) =>
         execution.UnionExec(unionChildren.map(planLater)) :: Nil
-      case g @ logical.Generate(generator, join, outer, _, _, child) =>
+      case g @ logical.Generate(generator, _, outer, _, _, child) =>
         execution.GenerateExec(
-          generator, join = join, outer = outer, g.qualifiedGeneratorOutput,
-          planLater(child)) :: Nil
+          generator, g.requiredChildOutput, outer,
+          g.qualifiedGeneratorOutput, planLater(child)) :: Nil
       case _: logical.OneRowRelation =>
         execution.RDDScanExec(Nil, singleRowRdd, "OneRowRelation") :: Nil
       case r: logical.Range =>
         execution.RangeExec(r) :: Nil
-      case logical.RepartitionByExpression(expressions, child, numPartitions) =>
-        exchange.ShuffleExchangeExec(HashPartitioning(
-          expressions, numPartitions), planLater(child)) :: Nil
+      case r: logical.RepartitionByExpression =>
+        exchange.ShuffleExchangeExec(r.partitioning, planLater(r.child)) :: Nil
       case ExternalRDD(outputObjAttr, rdd) => ExternalRDDScanExec(outputObjAttr, rdd) :: Nil
       case r: LogicalRDD =>
         RDDScanExec(r.output, r.rdd, "ExistingRDD", r.outputPartitioning, r.outputOrdering) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 7166b7771e4db..065954559e487 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -282,9 +282,10 @@ case class InputAdapter(child: SparkPlan) extends UnaryExecNode with CodegenSupp
   }
 
   override def doProduce(ctx: CodegenContext): String = {
-    val input = ctx.freshName("input")
     // Right now, InputAdapter is only used when there is one input RDD.
-    ctx.addMutableState("scala.collection.Iterator", input, s"$input = inputs[0];")
+    // Inline mutable state since an InputAdapter is used once in a task for WholeStageCodegen
+    val input = ctx.addMutableState("scala.collection.Iterator", "input", v => s"$v = inputs[0];",
+      forceInline = true)
     val row = ctx.freshName("row")
     s"""
        | while ($input.hasNext() && !stopEarly()) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index dc8aecf185a96..ce3c68810f3b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.sql.execution.vectorized.MutableColumnarRow
 import org.apache.spark.sql.types.{DecimalType, StringType, StructType}
 import org.apache.spark.unsafe.KVIterator
 import org.apache.spark.util.Utils
@@ -177,8 +178,7 @@ case class HashAggregateExec(
   private var bufVars: Seq[ExprCode] = _
 
   private def doProduceWithoutKeys(ctx: CodegenContext): String = {
-    val initAgg = ctx.freshName("initAgg")
-    ctx.addMutableState(ctx.JAVA_BOOLEAN, initAgg, s"$initAgg = false;")
+    val initAgg = ctx.addMutableState(ctx.JAVA_BOOLEAN, "initAgg")
     // The generated function doesn't have input row in the code context.
     ctx.INPUT_ROW = null
 
@@ -186,10 +186,8 @@ case class HashAggregateExec(
     val functions = aggregateExpressions.map(_.aggregateFunction.asInstanceOf[DeclarativeAggregate])
     val initExpr = functions.flatMap(f => f.initialValues)
     bufVars = initExpr.map { e =>
-      val isNull = ctx.freshName("bufIsNull")
-      val value = ctx.freshName("bufValue")
-      ctx.addMutableState(ctx.JAVA_BOOLEAN, isNull)
-      ctx.addMutableState(ctx.javaType(e.dataType), value)
+      val isNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, "bufIsNull")
+      val value = ctx.addMutableState(ctx.javaType(e.dataType), "bufValue")
       // The initial expression should not access any column
       val ev = e.genCode(ctx)
       val initVars = s"""
@@ -443,6 +441,7 @@ case class HashAggregateExec(
     val funcName = ctx.freshName("doAggregateWithKeysOutput")
     val keyTerm = ctx.freshName("keyTerm")
     val bufferTerm = ctx.freshName("bufferTerm")
+    val numOutput = metricTerm(ctx, "numOutputRows")
 
     val body =
     if (modes.contains(Final) || modes.contains(Complete)) {
@@ -519,6 +518,7 @@ case class HashAggregateExec(
       s"""
         private void $funcName(UnsafeRow $keyTerm, UnsafeRow $bufferTerm)
             throws java.io.IOException {
+          $numOutput.add(1);
           $body
         }
        """)
@@ -548,7 +548,7 @@ case class HashAggregateExec(
     isSupported  && isNotByteArrayDecimalType
   }
 
-  private def enableTwoLevelHashMap(ctx: CodegenContext) = {
+  private def enableTwoLevelHashMap(ctx: CodegenContext): Unit = {
     if (!checkIfFastHashMapSupported(ctx)) {
       if (modes.forall(mode => mode == Partial || mode == PartialMerge) && !Utils.isTesting) {
         logInfo("spark.sql.codegen.aggregate.map.twolevel.enabled is set to true, but"
@@ -559,107 +559,96 @@ case class HashAggregateExec(
 
       // This is for testing/benchmarking only.
       // We enforce to first level to be a vectorized hashmap, instead of the default row-based one.
-      sqlContext.getConf("spark.sql.codegen.aggregate.map.vectorized.enable", null) match {
-        case "true" => isVectorizedHashMapEnabled = true
-        case null | "" | "false" => None      }
+      isVectorizedHashMapEnabled = sqlContext.getConf(
+        "spark.sql.codegen.aggregate.map.vectorized.enable", "false") == "true"
     }
   }
 
   private def doProduceWithKeys(ctx: CodegenContext): String = {
-    val initAgg = ctx.freshName("initAgg")
-    ctx.addMutableState(ctx.JAVA_BOOLEAN, initAgg, s"$initAgg = false;")
+    val initAgg = ctx.addMutableState(ctx.JAVA_BOOLEAN, "initAgg")
     if (sqlContext.conf.enableTwoLevelAggMap) {
       enableTwoLevelHashMap(ctx)
     } else {
       sqlContext.getConf("spark.sql.codegen.aggregate.map.vectorized.enable", null) match {
-        case "true" => logWarning("Two level hashmap is disabled but vectorized hashmap is " +
-          "enabled.")
-        case null | "" | "false" => None
+        case "true" =>
+          logWarning("Two level hashmap is disabled but vectorized hashmap is enabled.")
+        case _ =>
       }
     }
-    fastHashMapTerm = ctx.freshName("fastHashMap")
-    val fastHashMapClassName = ctx.freshName("FastHashMap")
-    val fastHashMapGenerator =
-      if (isVectorizedHashMapEnabled) {
-        new VectorizedHashMapGenerator(ctx, aggregateExpressions,
-          fastHashMapClassName, groupingKeySchema, bufferSchema)
-      } else {
-        new RowBasedHashMapGenerator(ctx, aggregateExpressions,
-          fastHashMapClassName, groupingKeySchema, bufferSchema)
-      }
 
     val thisPlan = ctx.addReferenceObj("plan", this)
 
-    // Create a name for iterator from vectorized HashMap
-    val iterTermForFastHashMap = ctx.freshName("fastHashMapIter")
-    if (isFastHashMapEnabled) {
+    // Create a name for the iterator from the fast hash map.
+    val iterTermForFastHashMap = if (isFastHashMapEnabled) {
+      // Generates the fast hash map class and creates the fash hash map term.
+      val fastHashMapClassName = ctx.freshName("FastHashMap")
       if (isVectorizedHashMapEnabled) {
-        ctx.addMutableState(fastHashMapClassName, fastHashMapTerm,
-          s"$fastHashMapTerm = new $fastHashMapClassName();")
-        ctx.addMutableState(
-          "java.util.Iterator<org.apache.spark.sql.execution.vectorized.ColumnarRow>",
-          iterTermForFastHashMap)
+        val generatedMap = new VectorizedHashMapGenerator(ctx, aggregateExpressions,
+          fastHashMapClassName, groupingKeySchema, bufferSchema).generate()
+        ctx.addInnerClass(generatedMap)
+
+        // Inline mutable state since not many aggregation operations in a task
+        fastHashMapTerm = ctx.addMutableState(fastHashMapClassName, "vectorizedHastHashMap",
+          v => s"$v = new $fastHashMapClassName();", forceInline = true)
+        ctx.addMutableState(s"java.util.Iterator<InternalRow>", "vectorizedFastHashMapIter",
+          forceInline = true)
       } else {
-        ctx.addMutableState(fastHashMapClassName, fastHashMapTerm,
-          s"$fastHashMapTerm = new $fastHashMapClassName(" +
-            s"$thisPlan.getTaskMemoryManager(), $thisPlan.getEmptyAggregationBuffer());")
+        val generatedMap = new RowBasedHashMapGenerator(ctx, aggregateExpressions,
+          fastHashMapClassName, groupingKeySchema, bufferSchema).generate()
+        ctx.addInnerClass(generatedMap)
+
+        // Inline mutable state since not many aggregation operations in a task
+        fastHashMapTerm = ctx.addMutableState(fastHashMapClassName, "fastHashMap",
+          v => s"$v = new $fastHashMapClassName(" +
+            s"$thisPlan.getTaskMemoryManager(), $thisPlan.getEmptyAggregationBuffer());",
+          forceInline = true)
         ctx.addMutableState(
-          "org.apache.spark.unsafe.KVIterator",
-          iterTermForFastHashMap)
+          "org.apache.spark.unsafe.KVIterator<UnsafeRow, UnsafeRow>",
+          "fastHashMapIter", forceInline = true)
       }
     }
 
+    // Create a name for the iterator from the regular hash map.
+    // Inline mutable state since not many aggregation operations in a task
+    val iterTerm = ctx.addMutableState(classOf[KVIterator[UnsafeRow, UnsafeRow]].getName,
+      "mapIter", forceInline = true)
     // create hashMap
-    hashMapTerm = ctx.freshName("hashMap")
     val hashMapClassName = classOf[UnsafeFixedWidthAggregationMap].getName
-    ctx.addMutableState(hashMapClassName, hashMapTerm)
-    sorterTerm = ctx.freshName("sorter")
-    ctx.addMutableState(classOf[UnsafeKVExternalSorter].getName, sorterTerm)
-
-    // Create a name for iterator from HashMap
-    val iterTerm = ctx.freshName("mapIter")
-    ctx.addMutableState(classOf[KVIterator[UnsafeRow, UnsafeRow]].getName, iterTerm)
-
-    def generateGenerateCode(): String = {
-      if (isFastHashMapEnabled) {
-        if (isVectorizedHashMapEnabled) {
-          s"""
-               | ${fastHashMapGenerator.asInstanceOf[VectorizedHashMapGenerator].generate()}
-          """.stripMargin
-        } else {
-          s"""
-               | ${fastHashMapGenerator.asInstanceOf[RowBasedHashMapGenerator].generate()}
-          """.stripMargin
-        }
-      } else ""
-    }
-    ctx.addInnerClass(generateGenerateCode())
+    hashMapTerm = ctx.addMutableState(hashMapClassName, "hashMap",
+      v => s"$v = $thisPlan.createHashMap();", forceInline = true)
+    sorterTerm = ctx.addMutableState(classOf[UnsafeKVExternalSorter].getName, "sorter",
+      forceInline = true)
 
     val doAgg = ctx.freshName("doAggregateWithKeys")
     val peakMemory = metricTerm(ctx, "peakMemory")
     val spillSize = metricTerm(ctx, "spillSize")
     val avgHashProbe = metricTerm(ctx, "avgHashProbe")
-    val doAggFuncName = ctx.addNewFunction(doAgg,
-      s"""
-        private void $doAgg() throws java.io.IOException {
-          $hashMapTerm = $thisPlan.createHashMap();
-          ${child.asInstanceOf[CodegenSupport].produce(ctx, this)}
 
-          ${if (isFastHashMapEnabled) {
-              s"$iterTermForFastHashMap = $fastHashMapTerm.rowIterator();"} else ""}
+    val finishRegularHashMap = s"$iterTerm = $thisPlan.finishAggregate(" +
+      s"$hashMapTerm, $sorterTerm, $peakMemory, $spillSize, $avgHashProbe);"
+    val finishHashMap = if (isFastHashMapEnabled) {
+      s"""
+         |$iterTermForFastHashMap = $fastHashMapTerm.rowIterator();
+         |$finishRegularHashMap
+       """.stripMargin
+    } else {
+      finishRegularHashMap
+    }
 
-          $iterTerm = $thisPlan.finishAggregate($hashMapTerm, $sorterTerm, $peakMemory, $spillSize,
-            $avgHashProbe);
-        }
-       """)
+    val doAggFuncName = ctx.addNewFunction(doAgg,
+      s"""
+         |private void $doAgg() throws java.io.IOException {
+         |  ${child.asInstanceOf[CodegenSupport].produce(ctx, this)}
+         |  $finishHashMap
+         |}
+       """.stripMargin)
 
     // generate code for output
     val keyTerm = ctx.freshName("aggKey")
     val bufferTerm = ctx.freshName("aggBuffer")
     val outputFunc = generateResultFunction(ctx)
-    val numOutput = metricTerm(ctx, "numOutputRows")
 
-    def outputFromGeneratedMap: String = {
+    def outputFromFastHashMap: String = {
       if (isFastHashMapEnabled) {
         if (isVectorizedHashMapEnabled) {
           outputFromVectorizedMap
@@ -671,48 +660,55 @@ case class HashAggregateExec(
 
     def outputFromRowBasedMap: String = {
       s"""
-       while ($iterTermForFastHashMap.next()) {
-         $numOutput.add(1);
-         UnsafeRow $keyTerm = (UnsafeRow) $iterTermForFastHashMap.getKey();
-         UnsafeRow $bufferTerm = (UnsafeRow) $iterTermForFastHashMap.getValue();
-         $outputFunc($keyTerm, $bufferTerm);
-
-         if (shouldStop()) return;
-       }
-       $fastHashMapTerm.close();
-     """
+         |while ($iterTermForFastHashMap.next()) {
+         |  UnsafeRow $keyTerm = (UnsafeRow) $iterTermForFastHashMap.getKey();
+         |  UnsafeRow $bufferTerm = (UnsafeRow) $iterTermForFastHashMap.getValue();
+         |  $outputFunc($keyTerm, $bufferTerm);
+         |
+         |  if (shouldStop()) return;
+         |}
+         |$fastHashMapTerm.close();
+       """.stripMargin
     }
 
-    // Iterate over the aggregate rows and convert them from ColumnarRow to UnsafeRow
+    // Iterate over the aggregate rows and convert them from InternalRow to UnsafeRow
     def outputFromVectorizedMap: String = {
-        val row = ctx.freshName("fastHashMapRow")
-        ctx.currentVars = null
-        ctx.INPUT_ROW = row
-        val generateKeyRow = GenerateUnsafeProjection.createCode(ctx,
-          groupingKeySchema.toAttributes.zipWithIndex
+      val row = ctx.freshName("fastHashMapRow")
+      ctx.currentVars = null
+      ctx.INPUT_ROW = row
+      val generateKeyRow = GenerateUnsafeProjection.createCode(ctx,
+        groupingKeySchema.toAttributes.zipWithIndex
           .map { case (attr, i) => BoundReference(i, attr.dataType, attr.nullable) }
-        )
-        val generateBufferRow = GenerateUnsafeProjection.createCode(ctx,
-          bufferSchema.toAttributes.zipWithIndex
-          .map { case (attr, i) =>
-            BoundReference(groupingKeySchema.length + i, attr.dataType, attr.nullable) })
-        s"""
-           | while ($iterTermForFastHashMap.hasNext()) {
-           |   $numOutput.add(1);
-           |   org.apache.spark.sql.execution.vectorized.ColumnarRow $row =
-           |     (org.apache.spark.sql.execution.vectorized.ColumnarRow)
-           |     $iterTermForFastHashMap.next();
-           |   ${generateKeyRow.code}
-           |   ${generateBufferRow.code}
-           |   $outputFunc(${generateKeyRow.value}, ${generateBufferRow.value});
-           |
-           |   if (shouldStop()) return;
-           | }
-           |
-           | $fastHashMapTerm.close();
-         """.stripMargin
+      )
+      val generateBufferRow = GenerateUnsafeProjection.createCode(ctx,
+        bufferSchema.toAttributes.zipWithIndex.map { case (attr, i) =>
+          BoundReference(groupingKeySchema.length + i, attr.dataType, attr.nullable)
+        })
+      s"""
+         |while ($iterTermForFastHashMap.hasNext()) {
+         |  InternalRow $row = (InternalRow) $iterTermForFastHashMap.next();
+         |  ${generateKeyRow.code}
+         |  ${generateBufferRow.code}
+         |  $outputFunc(${generateKeyRow.value}, ${generateBufferRow.value});
+         |
+         |  if (shouldStop()) return;
+         |}
+         |
+         |$fastHashMapTerm.close();
+       """.stripMargin
     }
 
+    def outputFromRegularHashMap: String = {
+      s"""
+         |while ($iterTerm.next()) {
+         |  UnsafeRow $keyTerm = (UnsafeRow) $iterTerm.getKey();
+         |  UnsafeRow $bufferTerm = (UnsafeRow) $iterTerm.getValue();
+         |  $outputFunc($keyTerm, $bufferTerm);
+         |
+         |  if (shouldStop()) return;
+         |}
+       """.stripMargin
+    }
 
     val aggTime = metricTerm(ctx, "aggTime")
     val beforeAgg = ctx.freshName("beforeAgg")
@@ -725,16 +721,8 @@ case class HashAggregateExec(
      }
 
      // output the result
-     ${outputFromGeneratedMap}
-
-     while ($iterTerm.next()) {
-       $numOutput.add(1);
-       UnsafeRow $keyTerm = (UnsafeRow) $iterTerm.getKey();
-       UnsafeRow $bufferTerm = (UnsafeRow) $iterTerm.getValue();
-       $outputFunc($keyTerm, $bufferTerm);
-
-       if (shouldStop()) return;
-     }
+     $outputFromFastHashMap
+     $outputFromRegularHashMap
 
      $iterTerm.close();
      if ($sorterTerm == null) {
@@ -744,13 +732,11 @@ case class HashAggregateExec(
   }
 
   private def doConsumeWithKeys(ctx: CodegenContext, input: Seq[ExprCode]): String = {
-
     // create grouping key
-    ctx.currentVars = input
     val unsafeRowKeyCode = GenerateUnsafeProjection.createCode(
       ctx, groupingExpressions.map(e => BindReferences.bindReference[Expression](e, child.output)))
     val fastRowKeys = ctx.generateExpressions(
-          groupingExpressions.map(e => BindReferences.bindReference[Expression](e, child.output)))
+      groupingExpressions.map(e => BindReferences.bindReference[Expression](e, child.output)))
     val unsafeRowKeys = unsafeRowKeyCode.value
     val unsafeRowBuffer = ctx.freshName("unsafeRowAggBuffer")
     val fastRowBuffer = ctx.freshName("fastAggBuffer")
@@ -767,102 +753,76 @@ case class HashAggregateExec(
 
     // generate hash code for key
     val hashExpr = Murmur3Hash(groupingExpressions, 42)
-    ctx.currentVars = input
     val hashEval = BindReferences.bindReference(hashExpr, child.output).genCode(ctx)
 
-    val inputAttr = aggregateBufferAttributes ++ child.output
-    ctx.currentVars = new Array[ExprCode](aggregateBufferAttributes.length) ++ input
-
     val (checkFallbackForGeneratedHashMap, checkFallbackForBytesToBytesMap, resetCounter,
     incCounter) = if (testFallbackStartsAt.isDefined) {
-      val countTerm = ctx.freshName("fallbackCounter")
-      ctx.addMutableState(ctx.JAVA_INT, countTerm, s"$countTerm = 0;")
+      val countTerm = ctx.addMutableState(ctx.JAVA_INT, "fallbackCounter")
       (s"$countTerm < ${testFallbackStartsAt.get._1}",
         s"$countTerm < ${testFallbackStartsAt.get._2}", s"$countTerm = 0;", s"$countTerm += 1;")
     } else {
       ("true", "true", "", "")
     }
 
-    // We first generate code to probe and update the fast hash map. If the probe is
-    // successful the corresponding fast row buffer will hold the mutable row
-    val findOrInsertFastHashMap: Option[String] = {
+    val findOrInsertRegularHashMap: String =
+      s"""
+         |// generate grouping key
+         |${unsafeRowKeyCode.code.trim}
+         |${hashEval.code.trim}
+         |if ($checkFallbackForBytesToBytesMap) {
+         |  // try to get the buffer from hash map
+         |  $unsafeRowBuffer =
+         |    $hashMapTerm.getAggregationBufferFromUnsafeRow($unsafeRowKeys, ${hashEval.value});
+         |}
+         |// Can't allocate buffer from the hash map. Spill the map and fallback to sort-based
+         |// aggregation after processing all input rows.
+         |if ($unsafeRowBuffer == null) {
+         |  if ($sorterTerm == null) {
+         |    $sorterTerm = $hashMapTerm.destructAndCreateExternalSorter();
+         |  } else {
+         |    $sorterTerm.merge($hashMapTerm.destructAndCreateExternalSorter());
+         |  }
+         |  $resetCounter
+         |  // the hash map had be spilled, it should have enough memory now,
+         |  // try to allocate buffer again.
+         |  $unsafeRowBuffer = $hashMapTerm.getAggregationBufferFromUnsafeRow(
+         |    $unsafeRowKeys, ${hashEval.value});
+         |  if ($unsafeRowBuffer == null) {
+         |    // failed to allocate the first page
+         |    throw new OutOfMemoryError("No enough memory for aggregation");
+         |  }
+         |}
+       """.stripMargin
+
+    val findOrInsertHashMap: String = {
       if (isFastHashMapEnabled) {
-        Option(
-          s"""
-             |
-             |if ($checkFallbackForGeneratedHashMap) {
-             |  ${fastRowKeys.map(_.code).mkString("\n")}
-             |  if (${fastRowKeys.map("!" + _.isNull).mkString(" && ")}) {
-             |    $fastRowBuffer = $fastHashMapTerm.findOrInsert(
-             |        ${fastRowKeys.map(_.value).mkString(", ")});
-             |  }
-             |}
-         """.stripMargin)
+        // If fast hash map is on, we first generate code to probe and update the fast hash map.
+        // If the probe is successful the corresponding fast row buffer will hold the mutable row.
+        s"""
+           |if ($checkFallbackForGeneratedHashMap) {
+           |  ${fastRowKeys.map(_.code).mkString("\n")}
+           |  if (${fastRowKeys.map("!" + _.isNull).mkString(" && ")}) {
+           |    $fastRowBuffer = $fastHashMapTerm.findOrInsert(
+           |      ${fastRowKeys.map(_.value).mkString(", ")});
+           |  }
+           |}
+           |// Cannot find the key in fast hash map, try regular hash map.
+           |if ($fastRowBuffer == null) {
+           |  $findOrInsertRegularHashMap
+           |}
+         """.stripMargin
       } else {
-        None
+        findOrInsertRegularHashMap
       }
     }
 
+    val inputAttr = aggregateBufferAttributes ++ child.output
+    // Here we set `currentVars(0)` to `currentVars(numBufferSlots)` to null, so that when
+    // generating code for buffer columns, we use `INPUT_ROW`(will be the buffer row), while
+    // generating input columns, we use `currentVars`.
+    ctx.currentVars = new Array[ExprCode](aggregateBufferAttributes.length) ++ input
 
-    def updateRowInFastHashMap(isVectorized: Boolean): Option[String] = {
-      ctx.INPUT_ROW = fastRowBuffer
-      val boundUpdateExpr = updateExpr.map(BindReferences.bindReference(_, inputAttr))
-      val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
-      val effectiveCodes = subExprs.codes.mkString("\n")
-      val fastRowEvals = ctx.withSubExprEliminationExprs(subExprs.states) {
-        boundUpdateExpr.map(_.genCode(ctx))
-      }
-      val updateFastRow = fastRowEvals.zipWithIndex.map { case (ev, i) =>
-        val dt = updateExpr(i).dataType
-        ctx.updateColumn(fastRowBuffer, dt, i, ev, updateExpr(i).nullable, isVectorized)
-      }
-      Option(
-        s"""
-           |// common sub-expressions
-           |$effectiveCodes
-           |// evaluate aggregate function
-           |${evaluateVariables(fastRowEvals)}
-           |// update fast row
-           |${updateFastRow.mkString("\n").trim}
-           |
-         """.stripMargin)
-    }
-
-    // Next, we generate code to probe and update the unsafe row hash map.
-    val findOrInsertInUnsafeRowMap: String = {
-      s"""
-         | if ($fastRowBuffer == null) {
-         |   // generate grouping key
-         |   ${unsafeRowKeyCode.code.trim}
-         |   ${hashEval.code.trim}
-         |   if ($checkFallbackForBytesToBytesMap) {
-         |     // try to get the buffer from hash map
-         |     $unsafeRowBuffer =
-         |       $hashMapTerm.getAggregationBufferFromUnsafeRow($unsafeRowKeys, ${hashEval.value});
-         |   }
-         |   // Can't allocate buffer from the hash map. Spill the map and fallback to sort-based
-         |   // aggregation after processing all input rows.
-         |   if ($unsafeRowBuffer == null) {
-         |     if ($sorterTerm == null) {
-         |       $sorterTerm = $hashMapTerm.destructAndCreateExternalSorter();
-         |     } else {
-         |       $sorterTerm.merge($hashMapTerm.destructAndCreateExternalSorter());
-         |     }
-         |     $resetCounter
-         |     // the hash map had be spilled, it should have enough memory now,
-         |     // try to allocate buffer again.
-         |     $unsafeRowBuffer =
-         |       $hashMapTerm.getAggregationBufferFromUnsafeRow($unsafeRowKeys, ${hashEval.value});
-         |     if ($unsafeRowBuffer == null) {
-         |       // failed to allocate the first page
-         |       throw new OutOfMemoryError("No enough memory for aggregation");
-         |     }
-         |   }
-         | }
-       """.stripMargin
-    }
-
-    val updateRowInUnsafeRowMap: String = {
+    val updateRowInRegularHashMap: String = {
       ctx.INPUT_ROW = unsafeRowBuffer
       val boundUpdateExpr = updateExpr.map(BindReferences.bindReference(_, inputAttr))
       val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
@@ -881,45 +841,69 @@ case class HashAggregateExec(
          |${evaluateVariables(unsafeRowBufferEvals)}
          |// update unsafe row buffer
          |${updateUnsafeRowBuffer.mkString("\n").trim}
-           """.stripMargin
+       """.stripMargin
     }
 
+    val updateRowInHashMap: String = {
+      if (isFastHashMapEnabled) {
+        ctx.INPUT_ROW = fastRowBuffer
+        val boundUpdateExpr = updateExpr.map(BindReferences.bindReference(_, inputAttr))
+        val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
+        val effectiveCodes = subExprs.codes.mkString("\n")
+        val fastRowEvals = ctx.withSubExprEliminationExprs(subExprs.states) {
+          boundUpdateExpr.map(_.genCode(ctx))
+        }
+        val updateFastRow = fastRowEvals.zipWithIndex.map { case (ev, i) =>
+          val dt = updateExpr(i).dataType
+          ctx.updateColumn(
+            fastRowBuffer, dt, i, ev, updateExpr(i).nullable, isVectorizedHashMapEnabled)
+        }
+
+        // If fast hash map is on, we first generate code to update row in fast hash map, if the
+        // previous loop up hit fast hash map. Otherwise, update row in regular hash map.
+        s"""
+           |if ($fastRowBuffer != null) {
+           |  // common sub-expressions
+           |  $effectiveCodes
+           |  // evaluate aggregate function
+           |  ${evaluateVariables(fastRowEvals)}
+           |  // update fast row
+           |  ${updateFastRow.mkString("\n").trim}
+           |} else {
+           |  $updateRowInRegularHashMap
+           |}
+       """.stripMargin
+      } else {
+        updateRowInRegularHashMap
+      }
+    }
+
+    val declareRowBuffer: String = if (isFastHashMapEnabled) {
+      val fastRowType = if (isVectorizedHashMapEnabled) {
+        classOf[MutableColumnarRow].getName
+      } else {
+        "UnsafeRow"
+      }
+      s"""
+         |UnsafeRow $unsafeRowBuffer = null;
+         |$fastRowType $fastRowBuffer = null;
+       """.stripMargin
+    } else {
+      s"UnsafeRow $unsafeRowBuffer = null;"
+    }
 
     // We try to do hash map based in-memory aggregation first. If there is not enough memory (the
     // hash map will return null for new key), we spill the hash map to disk to free memory, then
     // continue to do in-memory aggregation and spilling until all the rows had been processed.
     // Finally, sort the spilled aggregate buffers by key, and merge them together for same key.
     s"""
-     UnsafeRow $unsafeRowBuffer = null;
-     ${
-        if (isVectorizedHashMapEnabled) {
-          s"""
-             | org.apache.spark.sql.execution.vectorized.ColumnarRow $fastRowBuffer = null;
-           """.stripMargin
-        } else {
-          s"""
-             | UnsafeRow $fastRowBuffer = null;
-           """.stripMargin
-        }
-      }
-
-     ${findOrInsertFastHashMap.getOrElse("")}
+     $declareRowBuffer
 
-     $findOrInsertInUnsafeRowMap
+     $findOrInsertHashMap
 
      $incCounter
 
-     if ($fastRowBuffer != null) {
-       // update fast row
-       ${
-          if (isFastHashMapEnabled) {
-            updateRowInFastHashMap(isVectorizedHashMapEnabled).getOrElse("")
-          } else ""
-        }
-     } else {
-       // update unsafe row
-       $updateRowInUnsafeRowMap
-     }
+     $updateRowInHashMap
      """
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
index 85b4529501ea8..1c613b19c4ab1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
@@ -46,10 +46,8 @@ abstract class HashMapGenerator(
     val functions = aggregateExpressions.map(_.aggregateFunction.asInstanceOf[DeclarativeAggregate])
     val initExpr = functions.flatMap(f => f.initialValues)
     initExpr.map { e =>
-      val isNull = ctx.freshName("bufIsNull")
-      val value = ctx.freshName("bufValue")
-      ctx.addMutableState(ctx.JAVA_BOOLEAN, isNull)
-      ctx.addMutableState(ctx.javaType(e.dataType), value)
+      val isNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, "bufIsNull")
+      val value = ctx.addMutableState(ctx.javaType(e.dataType), "bufValue")
       val ev = e.genCode(ctx)
       val initVars =
         s"""
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
index 3718424931b40..fd25707dd4ca6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
@@ -47,7 +47,7 @@ class RowBasedHashMapGenerator(
     val generatedKeySchema: String =
       s"new org.apache.spark.sql.types.StructType()" +
         groupingKeySchema.map { key =>
-          val keyName = ctx.addReferenceMinorObj(key.name)
+          val keyName = ctx.addReferenceObj("keyName", key.name)
           key.dataType match {
             case d: DecimalType =>
               s""".add($keyName, org.apache.spark.sql.types.DataTypes.createDecimalType(
@@ -60,7 +60,7 @@ class RowBasedHashMapGenerator(
     val generatedValueSchema: String =
       s"new org.apache.spark.sql.types.StructType()" +
         bufferSchema.map { key =>
-          val keyName = ctx.addReferenceMinorObj(key.name)
+          val keyName = ctx.addReferenceObj("keyName", key.name)
           key.dataType match {
             case d: DecimalType =>
               s""".add($keyName, org.apache.spark.sql.types.DataTypes.createDecimalType(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
index 756eeb642e2d0..9dc334c1ead3c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.aggregate
 
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
+import org.apache.spark.memory.SparkOutOfMemoryError
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
@@ -205,7 +206,7 @@ class TungstenAggregationIterator(
           buffer = hashMap.getAggregationBufferFromUnsafeRow(groupingKey)
           if (buffer == null) {
             // failed to allocate the first page
-            throw new OutOfMemoryError("No enough memory for aggregation")
+            throw new SparkOutOfMemoryError("No enough memory for aggregation")
           }
         }
         processRow(buffer, newInput)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/VectorizedHashMapGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/VectorizedHashMapGenerator.scala
index fd783d905b776..0cf9b53ce1d5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/VectorizedHashMapGenerator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/VectorizedHashMapGenerator.scala
@@ -17,9 +17,12 @@
 
 package org.apache.spark.sql.execution.aggregate
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.execution.vectorized.{MutableColumnarRow, OnHeapColumnVector}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.ColumnarBatch
 
 /**
  * This is a helper class to generate an append-only vectorized hash map that can act as a 'cache'
@@ -52,7 +55,7 @@ class VectorizedHashMapGenerator(
     val generatedSchema: String =
       s"new org.apache.spark.sql.types.StructType()" +
         (groupingKeySchema ++ bufferSchema).map { key =>
-          val keyName = ctx.addReferenceMinorObj(key.name)
+          val keyName = ctx.addReferenceObj("keyName", key.name)
           key.dataType match {
             case d: DecimalType =>
               s""".add($keyName, org.apache.spark.sql.types.DataTypes.createDecimalType(
@@ -65,7 +68,7 @@ class VectorizedHashMapGenerator(
     val generatedAggBufferSchema: String =
       s"new org.apache.spark.sql.types.StructType()" +
         bufferSchema.map { key =>
-          val keyName = ctx.addReferenceMinorObj(key.name)
+          val keyName = ctx.addReferenceObj("keyName", key.name)
           key.dataType match {
             case d: DecimalType =>
               s""".add($keyName, org.apache.spark.sql.types.DataTypes.createDecimalType(
@@ -76,10 +79,9 @@ class VectorizedHashMapGenerator(
         }.mkString("\n").concat(";")
 
     s"""
-       |  private org.apache.spark.sql.execution.vectorized.OnHeapColumnVector[] batchVectors;
-       |  private org.apache.spark.sql.execution.vectorized.OnHeapColumnVector[] bufferVectors;
-       |  private org.apache.spark.sql.execution.vectorized.ColumnarBatch batch;
-       |  private org.apache.spark.sql.execution.vectorized.ColumnarBatch aggregateBufferBatch;
+       |  private ${classOf[OnHeapColumnVector].getName}[] vectors;
+       |  private ${classOf[ColumnarBatch].getName} batch;
+       |  private ${classOf[MutableColumnarRow].getName} aggBufferRow;
        |  private int[] buckets;
        |  private int capacity = 1 << 16;
        |  private double loadFactor = 0.5;
@@ -91,19 +93,16 @@ class VectorizedHashMapGenerator(
        |    $generatedAggBufferSchema
        |
        |  public $generatedClassName() {
-       |    batchVectors = org.apache.spark.sql.execution.vectorized
-       |      .OnHeapColumnVector.allocateColumns(capacity, schema);
-       |    batch = new org.apache.spark.sql.execution.vectorized.ColumnarBatch(
-       |      schema, batchVectors, capacity);
+       |    vectors = ${classOf[OnHeapColumnVector].getName}.allocateColumns(capacity, schema);
+       |    batch = new ${classOf[ColumnarBatch].getName}(schema, vectors, capacity);
        |
-       |    bufferVectors = new org.apache.spark.sql.execution.vectorized
-       |      .OnHeapColumnVector[aggregateBufferSchema.fields().length];
+       |    // Generates a projection to return the aggregate buffer only.
+       |    ${classOf[OnHeapColumnVector].getName}[] aggBufferVectors =
+       |      new ${classOf[OnHeapColumnVector].getName}[aggregateBufferSchema.fields().length];
        |    for (int i = 0; i < aggregateBufferSchema.fields().length; i++) {
-       |      bufferVectors[i] = batchVectors[i + ${groupingKeys.length}];
+       |      aggBufferVectors[i] = vectors[i + ${groupingKeys.length}];
        |    }
-       |    // TODO: Possibly generate this projection in HashAggregate directly
-       |    aggregateBufferBatch = new org.apache.spark.sql.execution.vectorized.ColumnarBatch(
-       |      aggregateBufferSchema, bufferVectors, capacity);
+       |    aggBufferRow = new ${classOf[MutableColumnarRow].getName}(aggBufferVectors);
        |
        |    buckets = new int[numBuckets];
        |    java.util.Arrays.fill(buckets, -1);
@@ -114,13 +113,13 @@ class VectorizedHashMapGenerator(
 
   /**
    * Generates a method that returns true if the group-by keys exist at a given index in the
-   * associated [[org.apache.spark.sql.execution.vectorized.ColumnarBatch]]. For instance, if we
-   * have 2 long group-by keys, the generated function would be of the form:
+   * associated [[org.apache.spark.sql.execution.vectorized.OnHeapColumnVector]]. For instance,
+   * if we have 2 long group-by keys, the generated function would be of the form:
    *
    * {{{
    * private boolean equals(int idx, long agg_key, long agg_key1) {
-   *   return batchVectors[0].getLong(buckets[idx]) == agg_key &&
-   *     batchVectors[1].getLong(buckets[idx]) == agg_key1;
+   *   return vectors[0].getLong(buckets[idx]) == agg_key &&
+   *     vectors[1].getLong(buckets[idx]) == agg_key1;
    * }
    * }}}
    */
@@ -128,7 +127,7 @@ class VectorizedHashMapGenerator(
 
     def genEqualsForKeys(groupingKeys: Seq[Buffer]): String = {
       groupingKeys.zipWithIndex.map { case (key: Buffer, ordinal: Int) =>
-        s"""(${ctx.genEqual(key.dataType, ctx.getValue(s"batchVectors[$ordinal]", "buckets[idx]",
+        s"""(${ctx.genEqual(key.dataType, ctx.getValue(s"vectors[$ordinal]", "buckets[idx]",
           key.dataType), key.name)})"""
       }.mkString(" && ")
     }
@@ -141,29 +140,35 @@ class VectorizedHashMapGenerator(
   }
 
   /**
-   * Generates a method that returns a mutable
-   * [[org.apache.spark.sql.execution.vectorized.ColumnarRow]] which keeps track of the
+   * Generates a method that returns a
+   * [[org.apache.spark.sql.execution.vectorized.MutableColumnarRow]] which keeps track of the
    * aggregate value(s) for a given set of keys. If the corresponding row doesn't exist, the
    * generated method adds the corresponding row in the associated
-   * [[org.apache.spark.sql.execution.vectorized.ColumnarBatch]]. For instance, if we
+   * [[org.apache.spark.sql.execution.vectorized.OnHeapColumnVector]]. For instance, if we
    * have 2 long group-by keys, the generated function would be of the form:
    *
    * {{{
-   * public org.apache.spark.sql.execution.vectorized.ColumnarRow findOrInsert(
-   *     long agg_key, long agg_key1) {
+   * public MutableColumnarRow findOrInsert(long agg_key, long agg_key1) {
    *   long h = hash(agg_key, agg_key1);
    *   int step = 0;
    *   int idx = (int) h & (numBuckets - 1);
    *   while (step < maxSteps) {
    *     // Return bucket index if it's either an empty slot or already contains the key
    *     if (buckets[idx] == -1) {
-   *       batchVectors[0].putLong(numRows, agg_key);
-   *       batchVectors[1].putLong(numRows, agg_key1);
-   *       batchVectors[2].putLong(numRows, 0);
-   *       buckets[idx] = numRows++;
-   *       return batch.getRow(buckets[idx]);
+   *       if (numRows < capacity) {
+   *         vectors[0].putLong(numRows, agg_key);
+   *         vectors[1].putLong(numRows, agg_key1);
+   *         vectors[2].putLong(numRows, 0);
+   *         buckets[idx] = numRows++;
+   *         aggBufferRow.rowId = numRows;
+   *         return aggBufferRow;
+   *       } else {
+   *         // No more space
+   *         return null;
+   *       }
    *     } else if (equals(idx, agg_key, agg_key1)) {
-   *       return batch.getRow(buckets[idx]);
+   *       aggBufferRow.rowId = buckets[idx];
+   *       return aggBufferRow;
    *     }
    *     idx = (idx + 1) & (numBuckets - 1);
    *     step++;
@@ -177,20 +182,19 @@ class VectorizedHashMapGenerator(
 
     def genCodeToSetKeys(groupingKeys: Seq[Buffer]): Seq[String] = {
       groupingKeys.zipWithIndex.map { case (key: Buffer, ordinal: Int) =>
-        ctx.setValue(s"batchVectors[$ordinal]", "numRows", key.dataType, key.name)
+        ctx.setValue(s"vectors[$ordinal]", "numRows", key.dataType, key.name)
       }
     }
 
     def genCodeToSetAggBuffers(bufferValues: Seq[Buffer]): Seq[String] = {
       bufferValues.zipWithIndex.map { case (key: Buffer, ordinal: Int) =>
-        ctx.updateColumn(s"batchVectors[${groupingKeys.length + ordinal}]", "numRows", key.dataType,
+        ctx.updateColumn(s"vectors[${groupingKeys.length + ordinal}]", "numRows", key.dataType,
           buffVars(ordinal), nullable = true)
       }
     }
 
     s"""
-       |public org.apache.spark.sql.execution.vectorized.ColumnarRow findOrInsert(${
-            groupingKeySignature}) {
+       |public ${classOf[MutableColumnarRow].getName} findOrInsert($groupingKeySignature) {
        |  long h = hash(${groupingKeys.map(_.name).mkString(", ")});
        |  int step = 0;
        |  int idx = (int) h & (numBuckets - 1);
@@ -208,15 +212,15 @@ class VectorizedHashMapGenerator(
        |        ${genCodeToSetAggBuffers(bufferValues).mkString("\n")}
        |
        |        buckets[idx] = numRows++;
-       |        batch.setNumRows(numRows);
-       |        aggregateBufferBatch.setNumRows(numRows);
-       |        return aggregateBufferBatch.getRow(buckets[idx]);
+       |        aggBufferRow.rowId = buckets[idx];
+       |        return aggBufferRow;
        |      } else {
        |        // No more space
        |        return null;
        |      }
        |    } else if (equals(idx, ${groupingKeys.map(_.name).mkString(", ")})) {
-       |      return aggregateBufferBatch.getRow(buckets[idx]);
+       |      aggBufferRow.rowId = buckets[idx];
+       |      return aggBufferRow;
        |    }
        |    idx = (idx + 1) & (numBuckets - 1);
        |    step++;
@@ -229,8 +233,8 @@ class VectorizedHashMapGenerator(
 
   protected def generateRowIterator(): String = {
     s"""
-       |public java.util.Iterator<org.apache.spark.sql.execution.vectorized.ColumnarRow>
-       |    rowIterator() {
+       |public java.util.Iterator<${classOf[InternalRow].getName}> rowIterator() {
+       |  batch.setNumRows(numRows);
        |  return batch.rowIterator();
        |}
      """.stripMargin
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
index 3cafb344ef553..bcd1aa0890ba3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
@@ -24,16 +24,16 @@ import scala.collection.JavaConverters._
 
 import org.apache.arrow.memory.BufferAllocator
 import org.apache.arrow.vector._
-import org.apache.arrow.vector.file._
-import org.apache.arrow.vector.schema.ArrowRecordBatch
+import org.apache.arrow.vector.ipc.{ArrowFileReader, ArrowFileWriter}
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch
 import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel
 
 import org.apache.spark.TaskContext
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
 import org.apache.spark.util.Utils
 
 
@@ -86,13 +86,9 @@ private[sql] object ArrowConverters {
     val root = VectorSchemaRoot.create(arrowSchema, allocator)
     val arrowWriter = ArrowWriter.create(root)
 
-    var closed = false
-
     context.addTaskCompletionListener { _ =>
-      if (!closed) {
-        root.close()
-        allocator.close()
-      }
+      root.close()
+      allocator.close()
     }
 
     new Iterator[ArrowPayload] {
@@ -100,7 +96,6 @@ private[sql] object ArrowConverters {
       override def hasNext: Boolean = rowIter.hasNext || {
         root.close()
         allocator.close()
-        closed = true
         false
       }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
index e4af4f65da127..22b63513548fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
@@ -46,17 +46,19 @@ object ArrowWriter {
   private def createFieldWriter(vector: ValueVector): ArrowFieldWriter = {
     val field = vector.getField()
     (ArrowUtils.fromArrowField(field), vector) match {
-      case (BooleanType, vector: NullableBitVector) => new BooleanWriter(vector)
-      case (ByteType, vector: NullableTinyIntVector) => new ByteWriter(vector)
-      case (ShortType, vector: NullableSmallIntVector) => new ShortWriter(vector)
-      case (IntegerType, vector: NullableIntVector) => new IntegerWriter(vector)
-      case (LongType, vector: NullableBigIntVector) => new LongWriter(vector)
-      case (FloatType, vector: NullableFloat4Vector) => new FloatWriter(vector)
-      case (DoubleType, vector: NullableFloat8Vector) => new DoubleWriter(vector)
-      case (StringType, vector: NullableVarCharVector) => new StringWriter(vector)
-      case (BinaryType, vector: NullableVarBinaryVector) => new BinaryWriter(vector)
-      case (DateType, vector: NullableDateDayVector) => new DateWriter(vector)
-      case (TimestampType, vector: NullableTimeStampMicroTZVector) => new TimestampWriter(vector)
+      case (BooleanType, vector: BitVector) => new BooleanWriter(vector)
+      case (ByteType, vector: TinyIntVector) => new ByteWriter(vector)
+      case (ShortType, vector: SmallIntVector) => new ShortWriter(vector)
+      case (IntegerType, vector: IntVector) => new IntegerWriter(vector)
+      case (LongType, vector: BigIntVector) => new LongWriter(vector)
+      case (FloatType, vector: Float4Vector) => new FloatWriter(vector)
+      case (DoubleType, vector: Float8Vector) => new DoubleWriter(vector)
+      case (DecimalType.Fixed(precision, scale), vector: DecimalVector) =>
+        new DecimalWriter(vector, precision, scale)
+      case (StringType, vector: VarCharVector) => new StringWriter(vector)
+      case (BinaryType, vector: VarBinaryVector) => new BinaryWriter(vector)
+      case (DateType, vector: DateDayVector) => new DateWriter(vector)
+      case (TimestampType, vector: TimeStampMicroTZVector) => new TimestampWriter(vector)
       case (ArrayType(_, _), vector: ListVector) =>
         val elementVector = createFieldWriter(vector.getDataVector())
         new ArrayWriter(vector, elementVector)
@@ -103,7 +105,6 @@ class ArrowWriter(val root: VectorSchemaRoot, fields: Array[ArrowFieldWriter]) {
 private[arrow] abstract class ArrowFieldWriter {
 
   def valueVector: ValueVector
-  def valueMutator: ValueVector.Mutator
 
   def name: String = valueVector.getField().getName()
   def dataType: DataType = ArrowUtils.fromArrowField(valueVector.getField())
@@ -124,161 +125,163 @@ private[arrow] abstract class ArrowFieldWriter {
   }
 
   def finish(): Unit = {
-    valueMutator.setValueCount(count)
+    valueVector.setValueCount(count)
   }
 
   def reset(): Unit = {
-    valueMutator.reset()
+    // TODO: reset() should be in a common interface
+    valueVector match {
+      case fixedWidthVector: BaseFixedWidthVector => fixedWidthVector.reset()
+      case variableWidthVector: BaseVariableWidthVector => variableWidthVector.reset()
+      case _ =>
+    }
     count = 0
   }
 }
 
-private[arrow] class BooleanWriter(val valueVector: NullableBitVector) extends ArrowFieldWriter {
-
-  override def valueMutator: NullableBitVector#Mutator = valueVector.getMutator()
+private[arrow] class BooleanWriter(val valueVector: BitVector) extends ArrowFieldWriter {
 
   override def setNull(): Unit = {
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
-    valueMutator.setSafe(count, if (input.getBoolean(ordinal)) 1 else 0)
+    valueVector.setSafe(count, if (input.getBoolean(ordinal)) 1 else 0)
   }
 }
 
-private[arrow] class ByteWriter(val valueVector: NullableTinyIntVector) extends ArrowFieldWriter {
-
-  override def valueMutator: NullableTinyIntVector#Mutator = valueVector.getMutator()
+private[arrow] class ByteWriter(val valueVector: TinyIntVector) extends ArrowFieldWriter {
 
   override def setNull(): Unit = {
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
-    valueMutator.setSafe(count, input.getByte(ordinal))
+    valueVector.setSafe(count, input.getByte(ordinal))
   }
 }
 
-private[arrow] class ShortWriter(val valueVector: NullableSmallIntVector) extends ArrowFieldWriter {
-
-  override def valueMutator: NullableSmallIntVector#Mutator = valueVector.getMutator()
+private[arrow] class ShortWriter(val valueVector: SmallIntVector) extends ArrowFieldWriter {
 
   override def setNull(): Unit = {
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
-    valueMutator.setSafe(count, input.getShort(ordinal))
+    valueVector.setSafe(count, input.getShort(ordinal))
   }
 }
 
-private[arrow] class IntegerWriter(val valueVector: NullableIntVector) extends ArrowFieldWriter {
-
-  override def valueMutator: NullableIntVector#Mutator = valueVector.getMutator()
+private[arrow] class IntegerWriter(val valueVector: IntVector) extends ArrowFieldWriter {
 
   override def setNull(): Unit = {
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
-    valueMutator.setSafe(count, input.getInt(ordinal))
+    valueVector.setSafe(count, input.getInt(ordinal))
   }
 }
 
-private[arrow] class LongWriter(val valueVector: NullableBigIntVector) extends ArrowFieldWriter {
-
-  override def valueMutator: NullableBigIntVector#Mutator = valueVector.getMutator()
+private[arrow] class LongWriter(val valueVector: BigIntVector) extends ArrowFieldWriter {
 
   override def setNull(): Unit = {
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
-    valueMutator.setSafe(count, input.getLong(ordinal))
+    valueVector.setSafe(count, input.getLong(ordinal))
   }
 }
 
-private[arrow] class FloatWriter(val valueVector: NullableFloat4Vector) extends ArrowFieldWriter {
-
-  override def valueMutator: NullableFloat4Vector#Mutator = valueVector.getMutator()
+private[arrow] class FloatWriter(val valueVector: Float4Vector) extends ArrowFieldWriter {
 
   override def setNull(): Unit = {
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
-    valueMutator.setSafe(count, input.getFloat(ordinal))
+    valueVector.setSafe(count, input.getFloat(ordinal))
   }
 }
 
-private[arrow] class DoubleWriter(val valueVector: NullableFloat8Vector) extends ArrowFieldWriter {
-
-  override def valueMutator: NullableFloat8Vector#Mutator = valueVector.getMutator()
+private[arrow] class DoubleWriter(val valueVector: Float8Vector) extends ArrowFieldWriter {
 
   override def setNull(): Unit = {
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
-    valueMutator.setSafe(count, input.getDouble(ordinal))
+    valueVector.setSafe(count, input.getDouble(ordinal))
   }
 }
 
-private[arrow] class StringWriter(val valueVector: NullableVarCharVector) extends ArrowFieldWriter {
+private[arrow] class DecimalWriter(
+    val valueVector: DecimalVector,
+    precision: Int,
+    scale: Int) extends ArrowFieldWriter {
 
-  override def valueMutator: NullableVarCharVector#Mutator = valueVector.getMutator()
+  override def setNull(): Unit = {
+    valueVector.setNull(count)
+  }
+
+  override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
+    val decimal = input.getDecimal(ordinal, precision, scale)
+    if (decimal.changePrecision(precision, scale)) {
+      valueVector.setSafe(count, decimal.toJavaBigDecimal)
+    } else {
+      setNull()
+    }
+  }
+}
+
+private[arrow] class StringWriter(val valueVector: VarCharVector) extends ArrowFieldWriter {
 
   override def setNull(): Unit = {
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
     val utf8 = input.getUTF8String(ordinal)
     val utf8ByteBuffer = utf8.getByteBuffer
     // todo: for off-heap UTF8String, how to pass in to arrow without copy?
-    valueMutator.setSafe(count, utf8ByteBuffer, utf8ByteBuffer.position(), utf8.numBytes())
+    valueVector.setSafe(count, utf8ByteBuffer, utf8ByteBuffer.position(), utf8.numBytes())
   }
 }
 
 private[arrow] class BinaryWriter(
-    val valueVector: NullableVarBinaryVector) extends ArrowFieldWriter {
-
-  override def valueMutator: NullableVarBinaryVector#Mutator = valueVector.getMutator()
+    val valueVector: VarBinaryVector) extends ArrowFieldWriter {
 
   override def setNull(): Unit = {
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
     val bytes = input.getBinary(ordinal)
-    valueMutator.setSafe(count, bytes, 0, bytes.length)
+    valueVector.setSafe(count, bytes, 0, bytes.length)
   }
 }
 
-private[arrow] class DateWriter(val valueVector: NullableDateDayVector) extends ArrowFieldWriter {
-
-  override def valueMutator: NullableDateDayVector#Mutator = valueVector.getMutator()
+private[arrow] class DateWriter(val valueVector: DateDayVector) extends ArrowFieldWriter {
 
   override def setNull(): Unit = {
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
-    valueMutator.setSafe(count, input.getInt(ordinal))
+    valueVector.setSafe(count, input.getInt(ordinal))
   }
 }
 
 private[arrow] class TimestampWriter(
-    val valueVector: NullableTimeStampMicroTZVector) extends ArrowFieldWriter {
-
-  override def valueMutator: NullableTimeStampMicroTZVector#Mutator = valueVector.getMutator()
+    val valueVector: TimeStampMicroTZVector) extends ArrowFieldWriter {
 
   override def setNull(): Unit = {
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
-    valueMutator.setSafe(count, input.getLong(ordinal))
+    valueVector.setSafe(count, input.getLong(ordinal))
   }
 }
 
@@ -286,20 +289,18 @@ private[arrow] class ArrayWriter(
     val valueVector: ListVector,
     val elementWriter: ArrowFieldWriter) extends ArrowFieldWriter {
 
-  override def valueMutator: ListVector#Mutator = valueVector.getMutator()
-
   override def setNull(): Unit = {
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
     val array = input.getArray(ordinal)
     var i = 0
-    valueMutator.startNewValue(count)
+    valueVector.startNewValue(count)
     while (i < array.numElements()) {
       elementWriter.write(array, i)
       i += 1
     }
-    valueMutator.endValue(count, array.numElements())
+    valueVector.endValue(count, array.numElements())
   }
 
   override def finish(): Unit = {
@@ -317,8 +318,6 @@ private[arrow] class StructWriter(
     val valueVector: NullableMapVector,
     children: Array[ArrowFieldWriter]) extends ArrowFieldWriter {
 
-  override def valueMutator: NullableMapVector#Mutator = valueVector.getMutator()
-
   override def setNull(): Unit = {
     var i = 0
     while (i < children.length) {
@@ -326,7 +325,7 @@ private[arrow] class StructWriter(
       children(i).count += 1
       i += 1
     }
-    valueMutator.setNull(count)
+    valueVector.setNull(count)
   }
 
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
@@ -336,7 +335,7 @@ private[arrow] class StructWriter(
       children(i).write(struct, i)
       i += 1
     }
-    valueMutator.setIndexDefined(count)
+    valueVector.setIndexDefined(count)
   }
 
   override def finish(): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index c9a15147e30d0..a15a8d11aa2a0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -279,29 +279,30 @@ case class SampleExec(
 
   override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
     val numOutput = metricTerm(ctx, "numOutputRows")
-    val sampler = ctx.freshName("sampler")
 
     if (withReplacement) {
       val samplerClass = classOf[PoissonSampler[UnsafeRow]].getName
       val initSampler = ctx.freshName("initSampler")
 
-      val initSamplerFuncName = ctx.addNewFunction(initSampler,
-        s"""
-          | private void $initSampler() {
-          |   $sampler = new $samplerClass<UnsafeRow>($upperBound - $lowerBound, false);
-          |   java.util.Random random = new java.util.Random(${seed}L);
-          |   long randomSeed = random.nextLong();
-          |   int loopCount = 0;
-          |   while (loopCount < partitionIndex) {
-          |     randomSeed = random.nextLong();
-          |     loopCount += 1;
-          |   }
-          |   $sampler.setSeed(randomSeed);
-          | }
-         """.stripMargin.trim)
-
-      ctx.addMutableState(s"$samplerClass<UnsafeRow>", sampler,
-        s"$initSamplerFuncName();")
+      // Inline mutable state since not many Sample operations in a task
+      val sampler = ctx.addMutableState(s"$samplerClass<UnsafeRow>", "sampleReplace",
+        v => {
+          val initSamplerFuncName = ctx.addNewFunction(initSampler,
+            s"""
+              | private void $initSampler() {
+              |   $v = new $samplerClass<UnsafeRow>($upperBound - $lowerBound, false);
+              |   java.util.Random random = new java.util.Random(${seed}L);
+              |   long randomSeed = random.nextLong();
+              |   int loopCount = 0;
+              |   while (loopCount < partitionIndex) {
+              |     randomSeed = random.nextLong();
+              |     loopCount += 1;
+              |   }
+              |   $v.setSeed(randomSeed);
+              | }
+           """.stripMargin.trim)
+          s"$initSamplerFuncName();"
+        }, forceInline = true)
 
       val samplingCount = ctx.freshName("samplingCount")
       s"""
@@ -313,10 +314,10 @@ case class SampleExec(
        """.stripMargin.trim
     } else {
       val samplerClass = classOf[BernoulliCellSampler[UnsafeRow]].getName
-      ctx.addMutableState(s"$samplerClass<UnsafeRow>", sampler,
-        s"""
-          | $sampler = new $samplerClass<UnsafeRow>($lowerBound, $upperBound, false);
-          | $sampler.setSeed(${seed}L + partitionIndex);
+      val sampler = ctx.addMutableState(s"$samplerClass<UnsafeRow>", "sampler",
+        v => s"""
+          | $v = new $samplerClass<UnsafeRow>($lowerBound, $upperBound, false);
+          | $v.setSeed(${seed}L + partitionIndex);
          """.stripMargin.trim)
 
       s"""
@@ -363,20 +364,18 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
   protected override def doProduce(ctx: CodegenContext): String = {
     val numOutput = metricTerm(ctx, "numOutputRows")
 
-    val initTerm = ctx.freshName("initRange")
-    ctx.addMutableState(ctx.JAVA_BOOLEAN, initTerm, s"$initTerm = false;")
-    val number = ctx.freshName("number")
-    ctx.addMutableState(ctx.JAVA_LONG, number, s"$number = 0L;")
+    val initTerm = ctx.addMutableState(ctx.JAVA_BOOLEAN, "initRange")
+    val number = ctx.addMutableState(ctx.JAVA_LONG, "number")
 
     val value = ctx.freshName("value")
     val ev = ExprCode("", "false", value)
     val BigInt = classOf[java.math.BigInteger].getName
 
-    val taskContext = ctx.freshName("taskContext")
-    ctx.addMutableState("TaskContext", taskContext, s"$taskContext = TaskContext.get();")
-    val inputMetrics = ctx.freshName("inputMetrics")
-    ctx.addMutableState("InputMetrics", inputMetrics,
-        s"$inputMetrics = $taskContext.taskMetrics().inputMetrics();")
+    // Inline mutable state since not many Range operations in a task
+    val taskContext = ctx.addMutableState("TaskContext", "taskContext",
+      v => s"$v = TaskContext.get();", forceInline = true)
+    val inputMetrics = ctx.addMutableState("InputMetrics", "inputMetrics",
+      v => s"$v = $taskContext.taskMetrics().inputMetrics();", forceInline = true)
 
     // In order to periodically update the metrics without inflicting performance penalty, this
     // operator produces elements in batches. After a batch is complete, the metrics are updated
@@ -386,12 +385,10 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
     // the metrics.
 
     // Once number == batchEnd, it's time to progress to the next batch.
-    val batchEnd = ctx.freshName("batchEnd")
-    ctx.addMutableState(ctx.JAVA_LONG, batchEnd, s"$batchEnd = 0;")
+    val batchEnd = ctx.addMutableState(ctx.JAVA_LONG, "batchEnd")
 
     // How many values should still be generated by this range operator.
-    val numElementsTodo = ctx.freshName("numElementsTodo")
-    ctx.addMutableState(ctx.JAVA_LONG, numElementsTodo, s"$numElementsTodo = 0L;")
+    val numElementsTodo = ctx.addMutableState(ctx.JAVA_LONG, "numElementsTodo")
 
     // How many values should be generated in the next batch.
     val nextBatchTodo = ctx.freshName("nextBatchTodo")
@@ -440,10 +437,6 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
         | }
        """.stripMargin)
 
-    val input = ctx.freshName("input")
-    // Right now, Range is only used when there is one upstream.
-    ctx.addMutableState("scala.collection.Iterator", input, s"$input = inputs[0];")
-
     val localIdx = ctx.freshName("localIdx")
     val localEnd = ctx.freshName("localEnd")
     val range = ctx.freshName("range")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index ff5dd707f0b38..4f28eeb725cbb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -70,7 +70,6 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
     val ctx = newCodeGenContext()
     val numFields = columnTypes.size
     val (initializeAccessors, extractors) = columnTypes.zipWithIndex.map { case (dt, index) =>
-      val accessorName = ctx.freshName("accessor")
       val accessorCls = dt match {
         case NullType => classOf[NullColumnAccessor].getName
         case BooleanType => classOf[BooleanColumnAccessor].getName
@@ -89,7 +88,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
         case array: ArrayType => classOf[ArrayColumnAccessor].getName
         case t: MapType => classOf[MapColumnAccessor].getName
       }
-      ctx.addMutableState(accessorCls, accessorName)
+      val accessorName = ctx.addMutableState(accessorCls, "accessor")
 
       val createCode = dt match {
         case t if ctx.isPrimitiveType(dt) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index a1c62a729900e..51928d914841e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -37,8 +37,10 @@ object InMemoryRelation {
       batchSize: Int,
       storageLevel: StorageLevel,
       child: SparkPlan,
-      tableName: Option[String]): InMemoryRelation =
-    new InMemoryRelation(child.output, useCompression, batchSize, storageLevel, child, tableName)()
+      tableName: Option[String],
+      statsOfPlanToCache: Statistics): InMemoryRelation =
+    new InMemoryRelation(child.output, useCompression, batchSize, storageLevel, child, tableName)(
+      statsOfPlanToCache = statsOfPlanToCache)
 }
 
 
@@ -60,7 +62,8 @@ case class InMemoryRelation(
     @transient child: SparkPlan,
     tableName: Option[String])(
     @transient var _cachedColumnBuffers: RDD[CachedBatch] = null,
-    val batchStats: LongAccumulator = child.sqlContext.sparkContext.longAccumulator)
+    val batchStats: LongAccumulator = child.sqlContext.sparkContext.longAccumulator,
+    statsOfPlanToCache: Statistics = null)
   extends logical.LeafNode with MultiInstanceRelation {
 
   override protected def innerChildren: Seq[SparkPlan] = Seq(child)
@@ -71,9 +74,8 @@ case class InMemoryRelation(
 
   override def computeStats(): Statistics = {
     if (batchStats.value == 0L) {
-      // Underlying columnar RDD hasn't been materialized, no useful statistics information
-      // available, return the default statistics.
-      Statistics(sizeInBytes = child.sqlContext.conf.defaultSizeInBytes)
+      // Underlying columnar RDD hasn't been materialized, use the stats from the plan to cache
+      statsOfPlanToCache
     } else {
       Statistics(sizeInBytes = batchStats.value.longValue)
     }
@@ -142,7 +144,7 @@ case class InMemoryRelation(
   def withOutput(newOutput: Seq[Attribute]): InMemoryRelation = {
     InMemoryRelation(
       newOutput, useCompression, batchSize, storageLevel, child, tableName)(
-        _cachedColumnBuffers, batchStats)
+        _cachedColumnBuffers, batchStats, statsOfPlanToCache)
   }
 
   override def newInstance(): this.type = {
@@ -154,11 +156,12 @@ case class InMemoryRelation(
       child,
       tableName)(
         _cachedColumnBuffers,
-        batchStats).asInstanceOf[this.type]
+        batchStats,
+        statsOfPlanToCache).asInstanceOf[this.type]
   }
 
   def cachedColumnBuffers: RDD[CachedBatch] = _cachedColumnBuffers
 
   override protected def otherCopyArgs: Seq[AnyRef] =
-    Seq(_cachedColumnBuffers, batchStats)
+    Seq(_cachedColumnBuffers, batchStats, statsOfPlanToCache)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
index 3e73393b12850..933b9753faa61 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition
 import org.apache.spark.sql.execution.{ColumnarBatchScan, LeafExecNode, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.vectorized._
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
 
 
 case class InMemoryTableScanExec(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessor.scala
index 2f09757aa341c..341ade1a5c613 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessor.scala
@@ -35,7 +35,7 @@ private[columnar] trait NullableColumnAccessor extends ColumnAccessor {
     nextNullIndex = if (nullCount > 0) ByteBufferHelper.getInt(nullsBuffer) else -1
     pos = 0
 
-    underlyingBuffer.position(underlyingBuffer.position + 4 + nullCount * 4)
+    underlyingBuffer.position(underlyingBuffer.position() + 4 + nullCount * 4)
     super.initialize()
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala
index bf00ad997c76e..79dcf3a6105ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala
@@ -112,7 +112,7 @@ private[columnar] case object PassThrough extends CompressionScheme {
       var nextNullIndex = if (nullCount > 0) ByteBufferHelper.getInt(nullsBuffer) else capacity
       var pos = 0
       var seenNulls = 0
-      var bufferPos = buffer.position
+      var bufferPos = buffer.position()
       while (pos < capacity) {
         if (pos != nextNullIndex) {
           val len = nextNullIndex - pos
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index e3bb4d357b395..1122522ccb4cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -143,7 +143,12 @@ case class AnalyzeColumnCommand(
       val percentilesRow = new QueryExecution(sparkSession, Aggregate(Nil, namedExprs, relation))
         .executedPlan.executeTake(1).head
       attrsToGenHistogram.zipWithIndex.foreach { case (attr, i) =>
-        attributePercentiles += attr -> percentilesRow.getArray(i)
+        val percentiles = percentilesRow.getArray(i)
+        // When there is no non-null value, `percentiles` is null. In such case, there is no
+        // need to generate histogram.
+        if (percentiles != null) {
+          attributePercentiles += attr -> percentiles
+        }
       }
     }
     AttributeMap(attributePercentiles.toSeq)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index 1a0d67fc71fbc..c27048626c8eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -116,8 +116,8 @@ object CommandUtils extends Logging {
       oldStats: Option[CatalogStatistics],
       newTotalSize: BigInt,
       newRowCount: Option[BigInt]): Option[CatalogStatistics] = {
-    val oldTotalSize = oldStats.map(_.sizeInBytes.toLong).getOrElse(-1L)
-    val oldRowCount = oldStats.flatMap(_.rowCount.map(_.toLong)).getOrElse(-1L)
+    val oldTotalSize = oldStats.map(_.sizeInBytes).getOrElse(BigInt(-1))
+    val oldRowCount = oldStats.flatMap(_.rowCount).getOrElse(BigInt(-1))
     var newStats: Option[CatalogStatistics] = None
     if (newTotalSize >= 0 && newTotalSize != oldTotalSize) {
       newStats = Some(CatalogStatistics(sizeInBytes = newTotalSize))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
index 2cf06982e25f6..e56f8105fc9a7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
@@ -20,30 +20,32 @@ package org.apache.spark.sql.execution.command
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.SparkContext
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.datasources.BasicWriteJobStatsTracker
+import org.apache.spark.sql.execution.datasources.FileFormatWriter
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.util.SerializableConfiguration
 
-
 /**
- * A special `RunnableCommand` which writes data out and updates metrics.
+ * A special `Command` which writes data out and updates metrics.
  */
-trait DataWritingCommand extends RunnableCommand {
-
+trait DataWritingCommand extends Command {
   /**
    * The input query plan that produces the data to be written.
+   * IMPORTANT: the input query plan MUST be analyzed, so that we can carry its output columns
+   *            to [[FileFormatWriter]].
    */
   def query: LogicalPlan
 
-  // We make the input `query` an inner child instead of a child in order to hide it from the
-  // optimizer. This is because optimizer may not preserve the output schema names' case, and we
-  // have to keep the original analyzed plan here so that we can pass the corrected schema to the
-  // writer. The schema of analyzed plan is what user expects(or specifies), so we should respect
-  // it when writing.
-  override protected def innerChildren: Seq[LogicalPlan] = query :: Nil
+  override final def children: Seq[LogicalPlan] = query :: Nil
 
-  override lazy val metrics: Map[String, SQLMetric] = {
+  // Output columns of the analyzed input query plan
+  def outputColumns: Seq[Attribute]
+
+  lazy val metrics: Map[String, SQLMetric] = {
     val sparkContext = SparkContext.getActive.get
     Map(
       "numFiles" -> SQLMetrics.createMetric(sparkContext, "number of written files"),
@@ -57,4 +59,6 @@ trait DataWritingCommand extends RunnableCommand {
     val serializableHadoopConf = new SerializableConfiguration(hadoopConf)
     new BasicWriteJobStatsTracker(serializableHadoopConf, metrics)
   }
+
+  def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row]
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala
index 9e3519073303c..1dc24b3d221cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala
@@ -67,8 +67,7 @@ case class InsertIntoDataSourceDirCommand(
 
     val saveMode = if (overwrite) SaveMode.Overwrite else SaveMode.ErrorIfExists
     try {
-      sparkSession.sessionState.executePlan(dataSource.planForWriting(saveMode, query))
-      dataSource.writeAndRead(saveMode, query)
+      sparkSession.sessionState.executePlan(dataSource.planForWriting(saveMode, query)).toRdd
     } catch {
       case ex: AnalysisException =>
         logError(s"Failed to write to directory " + storage.locationUri.toString, ex)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index e28b5eb2e2a2b..2cc0e38adc2ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
-import org.apache.spark.sql.execution.LeafExecNode
+import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan}
 import org.apache.spark.sql.execution.debug._
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata}
@@ -87,6 +87,42 @@ case class ExecutedCommandExec(cmd: RunnableCommand) extends LeafExecNode {
   }
 }
 
+/**
+ * A physical operator that executes the run method of a `DataWritingCommand` and
+ * saves the result to prevent multiple executions.
+ *
+ * @param cmd the `DataWritingCommand` this operator will run.
+ * @param child the physical plan child ran by the `DataWritingCommand`.
+ */
+case class DataWritingCommandExec(cmd: DataWritingCommand, child: SparkPlan)
+  extends SparkPlan {
+
+  override lazy val metrics: Map[String, SQLMetric] = cmd.metrics
+
+  protected[sql] lazy val sideEffectResult: Seq[InternalRow] = {
+    val converter = CatalystTypeConverters.createToCatalystConverter(schema)
+    val rows = cmd.run(sqlContext.sparkSession, child)
+
+    rows.map(converter(_).asInstanceOf[InternalRow])
+  }
+
+  override def children: Seq[SparkPlan] = child :: Nil
+
+  override def output: Seq[Attribute] = cmd.output
+
+  override def nodeName: String = "Execute " + cmd.nodeName
+
+  override def executeCollect(): Array[InternalRow] = sideEffectResult.toArray
+
+  override def executeToIterator: Iterator[InternalRow] = sideEffectResult.toIterator
+
+  override def executeTake(limit: Int): Array[InternalRow] = sideEffectResult.take(limit).toArray
+
+  protected override def doExecute(): RDD[InternalRow] = {
+    sqlContext.sparkContext.parallelize(sideEffectResult, 1)
+  }
+}
+
 /**
  * An explain command for users to see how a command will be executed.
  *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 568567aa8ea88..0142f17ce62e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -203,14 +203,20 @@ case class DropTableCommand(
         case _ =>
       }
     }
-    try {
-      sparkSession.sharedState.cacheManager.uncacheQuery(sparkSession.table(tableName))
-    } catch {
-      case _: NoSuchTableException if ifExists =>
-      case NonFatal(e) => log.warn(e.toString, e)
+
+    if (catalog.isTemporaryTable(tableName) || catalog.tableExists(tableName)) {
+      try {
+        sparkSession.sharedState.cacheManager.uncacheQuery(sparkSession.table(tableName))
+      } catch {
+        case NonFatal(e) => log.warn(e.toString, e)
+      }
+      catalog.refreshTable(tableName)
+      catalog.dropTable(tableName, ifExists, purge)
+    } else if (ifExists) {
+      // no-op
+    } else {
+      throw new AnalysisException(s"Table or view not found: ${tableName.identifier}")
     }
-    catalog.refreshTable(tableName)
-    catalog.dropTable(tableName, ifExists, purge)
     Seq.empty[Row]
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index c9f6e571ddab3..e400975f19708 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.util.Utils
@@ -190,7 +191,7 @@ case class AlterTableAddColumnsCommand(
     colsToAdd: Seq[StructField]) extends RunnableCommand {
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    val catalogTable = verifyAlterTableAddColumn(catalog, table)
+    val catalogTable = verifyAlterTableAddColumn(sparkSession.sessionState.conf, catalog, table)
 
     try {
       sparkSession.catalog.uncacheTable(table.quotedString)
@@ -216,6 +217,7 @@ case class AlterTableAddColumnsCommand(
    * For datasource table, it currently only supports parquet, json, csv.
    */
   private def verifyAlterTableAddColumn(
+      conf: SQLConf,
       catalog: SessionCatalog,
       table: TableIdentifier): CatalogTable = {
     val catalogTable = catalog.getTempViewOrPermanentTableMetadata(table)
@@ -229,7 +231,7 @@ case class AlterTableAddColumnsCommand(
     }
 
     if (DDLUtils.isDatasourceTable(catalogTable)) {
-      DataSource.lookupDataSource(catalogTable.provider.get).newInstance() match {
+      DataSource.lookupDataSource(catalogTable.provider.get, conf).newInstance() match {
         // For datasource table, this command can only support the following File format.
         // TextFileFormat only default to one column "value"
         // Hive type is already considered as hive serde table, so the logic will not
@@ -340,7 +342,7 @@ case class LoadDataCommand(
         uri
       } else {
         val uri = new URI(path)
-        if (uri.getScheme() != null && uri.getAuthority() != null) {
+        val hdfsUri = if (uri.getScheme() != null && uri.getAuthority() != null) {
           uri
         } else {
           // Follow Hive's behavior:
@@ -380,6 +382,13 @@ case class LoadDataCommand(
           }
           new URI(scheme, authority, absolutePath, uri.getQuery(), uri.getFragment())
         }
+        val hadoopConf = sparkSession.sessionState.newHadoopConf()
+        val srcPath = new Path(hdfsUri)
+        val fs = srcPath.getFileSystem(hadoopConf)
+        if (!fs.exists(srcPath)) {
+          throw new AnalysisException(s"LOAD DATA input path does not exist: $path")
+        }
+        hdfsUri
       }
 
     if (partition.nonEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
index 11af0aaa7b206..9dbbe9946ee99 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
@@ -22,7 +22,7 @@ import java.io.FileNotFoundException
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.SparkContext
+import org.apache.spark.{SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.SQLExecution
@@ -44,7 +44,6 @@ case class BasicWriteTaskStats(
 
 /**
  * Simple [[WriteTaskStatsTracker]] implementation that produces [[BasicWriteTaskStats]].
- * @param hadoopConf
  */
 class BasicWriteTaskStatsTracker(hadoopConf: Configuration)
   extends WriteTaskStatsTracker with Logging {
@@ -106,6 +105,13 @@ class BasicWriteTaskStatsTracker(hadoopConf: Configuration)
 
   override def getFinalStats(): WriteTaskStats = {
     statCurrentFile()
+
+    // Reports bytesWritten and recordsWritten to the Spark output metrics.
+    Option(TaskContext.get()).map(_.taskMetrics().outputMetrics).foreach { outputMetrics =>
+      outputMetrics.setBytesWritten(numBytes)
+      outputMetrics.setRecordsWritten(numRows)
+    }
+
     if (submittedFiles != numFiles) {
       logInfo(s"Expected $submittedFiles files, but only saw $numFiles. " +
         "This could be due to the output format not writing empty files, " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index b43d282bd434c..25e1210504273 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -36,8 +36,10 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
+import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{CalendarIntervalType, StructType}
@@ -85,7 +87,8 @@ case class DataSource(
 
   case class SourceInfo(name: String, schema: StructType, partitionColumns: Seq[String])
 
-  lazy val providingClass: Class[_] = DataSource.lookupDataSource(className)
+  lazy val providingClass: Class[_] =
+    DataSource.lookupDataSource(className, sparkSession.sessionState.conf)
   lazy val sourceInfo: SourceInfo = sourceSchema()
   private val caseInsensitiveOptions = CaseInsensitiveMap(options)
   private val equality = sparkSession.sessionState.conf.resolver
@@ -453,17 +456,6 @@ case class DataSource(
     val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
     PartitioningUtils.validatePartitionColumn(data.schema, partitionColumns, caseSensitive)
 
-
-    // SPARK-17230: Resolve the partition columns so InsertIntoHadoopFsRelationCommand does
-    // not need to have the query as child, to avoid to analyze an optimized query,
-    // because InsertIntoHadoopFsRelationCommand will be optimized first.
-    val partitionAttributes = partitionColumns.map { name =>
-      data.output.find(a => equality(a.name, name)).getOrElse {
-        throw new AnalysisException(
-          s"Unable to resolve $name given [${data.output.map(_.name).mkString(", ")}]")
-      }
-    }
-
     val fileIndex = catalogTable.map(_.identifier).map { tableIdent =>
       sparkSession.table(tableIdent).queryExecution.analyzed.collect {
         case LogicalRelation(t: HadoopFsRelation, _, _, _) => t.location
@@ -476,14 +468,15 @@ case class DataSource(
       outputPath = outputPath,
       staticPartitions = Map.empty,
       ifPartitionNotExists = false,
-      partitionColumns = partitionAttributes,
+      partitionColumns = partitionColumns.map(UnresolvedAttribute.quoted),
       bucketSpec = bucketSpec,
       fileFormat = format,
       options = options,
       query = data,
       mode = mode,
       catalogTable = catalogTable,
-      fileIndex = fileIndex)
+      fileIndex = fileIndex,
+      outputColumns = data.output)
   }
 
   /**
@@ -537,6 +530,7 @@ object DataSource extends Logging {
     val csv = classOf[CSVFileFormat].getCanonicalName
     val libsvm = "org.apache.spark.ml.source.libsvm.LibSVMFileFormat"
     val orc = "org.apache.spark.sql.hive.orc.OrcFileFormat"
+    val nativeOrc = classOf[OrcFileFormat].getCanonicalName
 
     Map(
       "org.apache.spark.sql.jdbc" -> jdbc,
@@ -553,6 +547,8 @@ object DataSource extends Logging {
       "org.apache.spark.sql.execution.datasources.parquet.DefaultSource" -> parquet,
       "org.apache.spark.sql.hive.orc.DefaultSource" -> orc,
       "org.apache.spark.sql.hive.orc" -> orc,
+      "org.apache.spark.sql.execution.datasources.orc.DefaultSource" -> nativeOrc,
+      "org.apache.spark.sql.execution.datasources.orc" -> nativeOrc,
       "org.apache.spark.ml.source.libsvm.DefaultSource" -> libsvm,
       "org.apache.spark.ml.source.libsvm" -> libsvm,
       "com.databricks.spark.csv" -> csv
@@ -568,8 +564,16 @@ object DataSource extends Logging {
     "org.apache.spark.Logging")
 
   /** Given a provider name, look up the data source class definition. */
-  def lookupDataSource(provider: String): Class[_] = {
-    val provider1 = backwardCompatibilityMap.getOrElse(provider, provider)
+  def lookupDataSource(provider: String, conf: SQLConf): Class[_] = {
+    val provider1 = backwardCompatibilityMap.getOrElse(provider, provider) match {
+      case name if name.equalsIgnoreCase("orc") &&
+          conf.getConf(SQLConf.ORC_IMPLEMENTATION) == "native" =>
+        classOf[OrcFileFormat].getCanonicalName
+      case name if name.equalsIgnoreCase("orc") &&
+          conf.getConf(SQLConf.ORC_IMPLEMENTATION) == "hive" =>
+        "org.apache.spark.sql.hive.orc.OrcFileFormat"
+      case name => name
+    }
     val provider2 = s"$provider1.DefaultSource"
     val loader = Utils.getContextOrSparkClassLoader
     val serviceLoader = ServiceLoader.load(classOf[DataSourceRegister], loader)
@@ -584,10 +588,11 @@ object DataSource extends Logging {
                 // Found the data source using fully qualified path
                 dataSource
               case Failure(error) =>
-                if (provider1.toLowerCase(Locale.ROOT) == "orc" ||
-                  provider1.startsWith("org.apache.spark.sql.hive.orc")) {
+                if (provider1.startsWith("org.apache.spark.sql.hive.orc")) {
                   throw new AnalysisException(
-                    "The ORC data source must be used with Hive support enabled")
+                    "Hive built-in ORC data source must be used with Hive support enabled. " +
+                    "Please use the native ORC data source by setting 'spark.sql.orc.impl' to " +
+                    "'native'")
                 } else if (provider1.toLowerCase(Locale.ROOT) == "avro" ||
                   provider1 == "com.databricks.spark.avro") {
                   throw new AnalysisException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 400f2e03165b2..d94c5bbccdd84 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -208,7 +208,8 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
         actualQuery,
         mode,
         table,
-        Some(t.location))
+        Some(t.location),
+        actualQuery.output)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
index dfe752ce796b1..2fd722e5639db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -78,7 +78,7 @@ trait FileFormat {
   }
 
   /**
-   * Returns whether a file with `path` could be splitted or not.
+   * Returns whether a file with `path` could be split or not.
    */
   def isSplitable(
       sparkSession: SparkSession,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 1fac01a2c26c6..1d80a69bc5a1d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, _}
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
-import org.apache.spark.sql.execution.{QueryExecution, SortExec, SQLExecution}
+import org.apache.spark.sql.execution.{SortExec, SparkPlan, SQLExecution}
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.util.{SerializableConfiguration, Utils}
 
@@ -56,7 +56,9 @@ object FileFormatWriter extends Logging {
 
   /** Describes how output files should be placed in the filesystem. */
   case class OutputSpec(
-    outputPath: String, customPartitionLocations: Map[TablePartitionSpec, String])
+    outputPath: String,
+    customPartitionLocations: Map[TablePartitionSpec, String],
+    outputColumns: Seq[Attribute])
 
   /** A shared job description for all the write tasks. */
   private class WriteJobDescription(
@@ -101,7 +103,7 @@ object FileFormatWriter extends Logging {
    */
   def write(
       sparkSession: SparkSession,
-      queryExecution: QueryExecution,
+      plan: SparkPlan,
       fileFormat: FileFormat,
       committer: FileCommitProtocol,
       outputSpec: OutputSpec,
@@ -117,11 +119,8 @@ object FileFormatWriter extends Logging {
     job.setOutputValueClass(classOf[InternalRow])
     FileOutputFormat.setOutputPath(job, new Path(outputSpec.outputPath))
 
-    // Pick the attributes from analyzed plan, as optimizer may not preserve the output schema
-    // names' case.
-    val allColumns = queryExecution.analyzed.output
     val partitionSet = AttributeSet(partitionColumns)
-    val dataColumns = allColumns.filterNot(partitionSet.contains)
+    val dataColumns = outputSpec.outputColumns.filterNot(partitionSet.contains)
 
     val bucketIdExpression = bucketSpec.map { spec =>
       val bucketColumns = spec.bucketColumnNames.map(c => dataColumns.find(_.name == c).get)
@@ -144,7 +143,7 @@ object FileFormatWriter extends Logging {
       uuid = UUID.randomUUID().toString,
       serializableHadoopConf = new SerializableConfiguration(job.getConfiguration),
       outputWriterFactory = outputWriterFactory,
-      allColumns = allColumns,
+      allColumns = outputSpec.outputColumns,
       dataColumns = dataColumns,
       partitionColumns = partitionColumns,
       bucketIdExpression = bucketIdExpression,
@@ -160,7 +159,7 @@ object FileFormatWriter extends Logging {
     // We should first sort by partition columns, then bucket id, and finally sorting columns.
     val requiredOrdering = partitionColumns ++ bucketIdExpression ++ sortColumns
     // the sort order doesn't matter
-    val actualOrdering = queryExecution.executedPlan.outputOrdering.map(_.child)
+    val actualOrdering = plan.outputOrdering.map(_.child)
     val orderingMatched = if (requiredOrdering.length > actualOrdering.length) {
       false
     } else {
@@ -178,17 +177,18 @@ object FileFormatWriter extends Logging {
 
     try {
       val rdd = if (orderingMatched) {
-        queryExecution.toRdd
+        plan.execute()
       } else {
         // SPARK-21165: the `requiredOrdering` is based on the attributes from analyzed plan, and
         // the physical plan may have different attribute ids due to optimizer removing some
         // aliases. Here we bind the expression ahead to avoid potential attribute ids mismatch.
         val orderingExpr = requiredOrdering
-          .map(SortOrder(_, Ascending)).map(BindReferences.bindReference(_, allColumns))
+          .map(SortOrder(_, Ascending))
+          .map(BindReferences.bindReference(_, outputSpec.outputColumns))
         SortExec(
           orderingExpr,
           global = false,
-          child = queryExecution.executedPlan).execute()
+          child = plan).execute()
       }
       val ret = new Array[WriteTaskResult](rdd.partitions.length)
       sparkSession.sparkContext.runJob(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 8731ee88f87f2..835ce98462477 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -26,7 +26,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.{InputFileBlockHolder, RDD}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.vectorized.ColumnarBatch
+import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.NextIterator
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala
new file mode 100644
index 0000000000000..c61a89e6e8c3f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.io.Closeable
+import java.net.URI
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+
+import org.apache.spark.input.WholeTextFileRecordReader
+
+/**
+ * An adaptor from a [[PartitionedFile]] to an [[Iterator]] of [[Text]], which is all of the lines
+ * in that file.
+ */
+class HadoopFileWholeTextReader(file: PartitionedFile, conf: Configuration)
+  extends Iterator[Text] with Closeable {
+  private val iterator = {
+    val fileSplit = new CombineFileSplit(
+      Array(new Path(new URI(file.filePath))),
+      Array(file.start),
+      Array(file.length),
+      // TODO: Implement Locality
+      Array.empty[String])
+    val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
+    val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
+    val reader = new WholeTextFileRecordReader(fileSplit, hadoopAttemptContext, 0)
+    reader.initialize(fileSplit, hadoopAttemptContext)
+    new RecordReaderIterator(reader)
+  }
+
+  override def hasNext: Boolean = iterator.hasNext
+
+  override def next(): Text = iterator.next()
+
+  override def close(): Unit = iterator.close()
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
index 89d8a85a9cbd2..6b34638529770 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
@@ -82,7 +82,11 @@ case class HadoopFsRelation(
     }
   }
 
-  override def sizeInBytes: Long = location.sizeInBytes
+  override def sizeInBytes: Long = {
+    val compressionFactor = sqlContext.conf.fileCompressionFactor
+    (location.sizeInBytes * compressionFactor).toLong
+  }
+
 
   override def inputFiles: Array[String] = location.inputFiles
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index 675bee85bf61e..dd7ef0d15c140 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -27,7 +27,9 @@ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogT
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command._
+import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
 import org.apache.spark.sql.util.SchemaUtils
 
 /**
@@ -52,11 +54,12 @@ case class InsertIntoHadoopFsRelationCommand(
     query: LogicalPlan,
     mode: SaveMode,
     catalogTable: Option[CatalogTable],
-    fileIndex: Option[FileIndex])
+    fileIndex: Option[FileIndex],
+    outputColumns: Seq[Attribute])
   extends DataWritingCommand {
   import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
 
-  override def run(sparkSession: SparkSession): Seq[Row] = {
+  override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
     // Most formats don't do well with duplicate columns, so lets not allow that
     SchemaUtils.checkSchemaColumnNameDuplication(
       query.schema,
@@ -87,13 +90,19 @@ case class InsertIntoHadoopFsRelationCommand(
     }
 
     val pathExists = fs.exists(qualifiedOutputPath)
-    // If we are appending data to an existing dir.
-    val isAppend = pathExists && (mode == SaveMode.Append)
+
+    val enableDynamicOverwrite =
+      sparkSession.sessionState.conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC
+    // This config only makes sense when we are overwriting a partitioned dataset with dynamic
+    // partition columns.
+    val dynamicPartitionOverwrite = enableDynamicOverwrite && mode == SaveMode.Overwrite &&
+      staticPartitions.size < partitionColumns.length
 
     val committer = FileCommitProtocol.instantiate(
       sparkSession.sessionState.conf.fileCommitProtocolClass,
       jobId = java.util.UUID.randomUUID().toString,
-      outputPath = outputPath.toString)
+      outputPath = outputPath.toString,
+      dynamicPartitionOverwrite = dynamicPartitionOverwrite)
 
     val doInsertion = (mode, pathExists) match {
       case (SaveMode.ErrorIfExists, true) =>
@@ -101,6 +110,9 @@ case class InsertIntoHadoopFsRelationCommand(
       case (SaveMode.Overwrite, true) =>
         if (ifPartitionNotExists && matchingPartitions.nonEmpty) {
           false
+        } else if (dynamicPartitionOverwrite) {
+          // For dynamic partition overwrite, do not delete partition directories ahead.
+          true
         } else {
           deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations, committer)
           true
@@ -124,7 +136,9 @@ case class InsertIntoHadoopFsRelationCommand(
               catalogTable.get.identifier, newPartitions.toSeq.map(p => (p, None)),
               ifNotExists = true).run(sparkSession)
           }
-          if (mode == SaveMode.Overwrite) {
+          // For dynamic partition overwrite, we never remove partitions but only update existing
+          // ones.
+          if (mode == SaveMode.Overwrite && !dynamicPartitionOverwrite) {
             val deletedPartitions = initialMatchingPartitions.toSet -- updatedPartitions
             if (deletedPartitions.nonEmpty) {
               AlterTableDropPartitionCommand(
@@ -139,11 +153,11 @@ case class InsertIntoHadoopFsRelationCommand(
       val updatedPartitionPaths =
         FileFormatWriter.write(
           sparkSession = sparkSession,
-          queryExecution = Dataset.ofRows(sparkSession, query).queryExecution,
+          plan = child,
           fileFormat = fileFormat,
           committer = committer,
           outputSpec = FileFormatWriter.OutputSpec(
-            qualifiedOutputPath.toString, customPartitionLocations),
+            qualifiedOutputPath.toString, customPartitionLocations, outputColumns),
           hadoopConf = hadoopConf,
           partitionColumns = partitionColumns,
           bucketSpec = bucketSpec,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
index 40825a1f724b1..39c594a9bc618 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
@@ -29,11 +29,15 @@ import org.apache.spark.sql.internal.SQLConf
  * A variant of [[HadoopMapReduceCommitProtocol]] that allows specifying the actual
  * Hadoop output committer using an option specified in SQLConf.
  */
-class SQLHadoopMapReduceCommitProtocol(jobId: String, path: String)
-  extends HadoopMapReduceCommitProtocol(jobId, path) with Serializable with Logging {
+class SQLHadoopMapReduceCommitProtocol(
+    jobId: String,
+    path: String,
+    dynamicPartitionOverwrite: Boolean = false)
+  extends HadoopMapReduceCommitProtocol(jobId, path, dynamicPartitionOverwrite)
+    with Serializable with Logging {
 
   override protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
-    var committer = context.getOutputFormatClass.newInstance().getOutputCommitter(context)
+    var committer = super.setupCommitter(context)
 
     val configuration = context.getConfiguration
     val clazz =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
index b64d71bb4eef2..a585cbed2551b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
@@ -150,7 +150,7 @@ private[csv] object CSVInferSchema {
     if ((allCatch opt options.timestampFormat.parse(field)).isDefined) {
       TimestampType
     } else if ((allCatch opt DateTimeUtils.stringToTime(field)).isDefined) {
-      // We keep this for backwords competibility.
+      // We keep this for backwards compatibility.
       TimestampType
     } else {
       tryParseBoolean(field, options)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
index a13a5a34b4a84..c16790630ce17 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
@@ -89,6 +89,14 @@ class CSVOptions(
 
   val quote = getChar("quote", '\"')
   val escape = getChar("escape", '\\')
+  val charToEscapeQuoteEscaping = parameters.get("charToEscapeQuoteEscaping") match {
+    case None => None
+    case Some(null) => None
+    case Some(value) if value.length == 0 => None
+    case Some(value) if value.length == 1 => Some(value.charAt(0))
+    case _ =>
+      throw new RuntimeException("charToEscapeQuoteEscaping cannot be more than one character")
+  }
   val comment = getChar("comment", '\u0000')
 
   val headerFlag = getBool("header")
@@ -148,6 +156,7 @@ class CSVOptions(
     format.setDelimiter(delimiter)
     format.setQuote(quote)
     format.setQuoteEscape(escape)
+    charToEscapeQuoteEscaping.foreach(format.setCharToEscapeQuoteEscaping)
     format.setComment(comment)
     writerSettings.setIgnoreLeadingWhitespaces(ignoreLeadingWhiteSpaceFlagInWrite)
     writerSettings.setIgnoreTrailingWhitespaces(ignoreTrailingWhiteSpaceFlagInWrite)
@@ -165,6 +174,7 @@ class CSVOptions(
     format.setDelimiter(delimiter)
     format.setQuote(quote)
     format.setQuoteEscape(escape)
+    charToEscapeQuoteEscaping.foreach(format.setCharToEscapeQuoteEscaping)
     format.setComment(comment)
     settings.setIgnoreLeadingWhitespaces(ignoreLeadingWhiteSpaceInRead)
     settings.setIgnoreTrailingWhitespaces(ignoreTrailingWhiteSpaceInRead)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
index 37e7bb0a59bb6..cc506e51bd0c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
@@ -68,7 +68,7 @@ class JdbcRelationProvider extends CreatableRelationProvider
           case SaveMode.Overwrite =>
             if (options.isTruncate && isCascadingTruncateTable(options.url) == Some(false)) {
               // In this case, we should truncate table and then load.
-              truncateTable(conn, options.table)
+              truncateTable(conn, options)
               val tableSchema = JdbcUtils.getSchemaOption(conn, options)
               saveTable(df, tableSchema, isCaseSensitive, options)
             } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 75c94fc486493..e6dc2fda4eb1b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -96,12 +96,13 @@ object JdbcUtils extends Logging {
   }
 
   /**
-   * Truncates a table from the JDBC database.
+   * Truncates a table from the JDBC database without side effects.
    */
-  def truncateTable(conn: Connection, table: String): Unit = {
+  def truncateTable(conn: Connection, options: JDBCOptions): Unit = {
+    val dialect = JdbcDialects.get(options.url)
     val statement = conn.createStatement
     try {
-      statement.executeUpdate(s"TRUNCATE TABLE $table")
+      statement.executeUpdate(dialect.getTruncateQuery(options.table))
     } finally {
       statement.close()
     }
@@ -226,10 +227,10 @@ object JdbcUtils extends Logging {
       case java.sql.Types.STRUCT        => StringType
       case java.sql.Types.TIME          => TimestampType
       case java.sql.Types.TIME_WITH_TIMEZONE
-                                        => TimestampType
+                                        => null
       case java.sql.Types.TIMESTAMP     => TimestampType
       case java.sql.Types.TIMESTAMP_WITH_TIMEZONE
-                                        => TimestampType
+                                        => null
       case java.sql.Types.TINYINT       => IntegerType
       case java.sql.Types.VARBINARY     => BinaryType
       case java.sql.Types.VARCHAR       => StringType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
new file mode 100644
index 0000000000000..4ecc54bd2fd96
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import org.apache.hadoop.io._
+import org.apache.orc.mapred.{OrcList, OrcMap, OrcStruct, OrcTimestamp}
+import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable}
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData}
+import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * A deserializer to deserialize ORC structs to Spark rows.
+ */
+class OrcDeserializer(
+    dataSchema: StructType,
+    requiredSchema: StructType,
+    requestedColIds: Array[Int]) {
+
+  private val resultRow = new SpecificInternalRow(requiredSchema.map(_.dataType))
+
+  private val fieldWriters: Array[WritableComparable[_] => Unit] = {
+    requiredSchema.zipWithIndex
+      // The value of missing columns are always null, do not need writers.
+      .filterNot { case (_, index) => requestedColIds(index) == -1 }
+      .map { case (f, index) =>
+        val writer = newWriter(f.dataType, new RowUpdater(resultRow))
+        (value: WritableComparable[_]) => writer(index, value)
+      }.toArray
+  }
+
+  private val validColIds = requestedColIds.filterNot(_ == -1)
+
+  def deserialize(orcStruct: OrcStruct): InternalRow = {
+    var i = 0
+    while (i < validColIds.length) {
+      val value = orcStruct.getFieldValue(validColIds(i))
+      if (value == null) {
+        resultRow.setNullAt(i)
+      } else {
+        fieldWriters(i)(value)
+      }
+      i += 1
+    }
+    resultRow
+  }
+
+  /**
+   * Creates a writer to write ORC values to Catalyst data structure at the given ordinal.
+   */
+  private def newWriter(
+      dataType: DataType, updater: CatalystDataUpdater): (Int, WritableComparable[_]) => Unit =
+    dataType match {
+      case NullType => (ordinal, _) =>
+        updater.setNullAt(ordinal)
+
+      case BooleanType => (ordinal, value) =>
+        updater.setBoolean(ordinal, value.asInstanceOf[BooleanWritable].get)
+
+      case ByteType => (ordinal, value) =>
+        updater.setByte(ordinal, value.asInstanceOf[ByteWritable].get)
+
+      case ShortType => (ordinal, value) =>
+        updater.setShort(ordinal, value.asInstanceOf[ShortWritable].get)
+
+      case IntegerType => (ordinal, value) =>
+        updater.setInt(ordinal, value.asInstanceOf[IntWritable].get)
+
+      case LongType => (ordinal, value) =>
+        updater.setLong(ordinal, value.asInstanceOf[LongWritable].get)
+
+      case FloatType => (ordinal, value) =>
+        updater.setFloat(ordinal, value.asInstanceOf[FloatWritable].get)
+
+      case DoubleType => (ordinal, value) =>
+        updater.setDouble(ordinal, value.asInstanceOf[DoubleWritable].get)
+
+      case StringType => (ordinal, value) =>
+        updater.set(ordinal, UTF8String.fromBytes(value.asInstanceOf[Text].copyBytes))
+
+      case BinaryType => (ordinal, value) =>
+        val binary = value.asInstanceOf[BytesWritable]
+        val bytes = new Array[Byte](binary.getLength)
+        System.arraycopy(binary.getBytes, 0, bytes, 0, binary.getLength)
+        updater.set(ordinal, bytes)
+
+      case DateType => (ordinal, value) =>
+        updater.setInt(ordinal, DateTimeUtils.fromJavaDate(value.asInstanceOf[DateWritable].get))
+
+      case TimestampType => (ordinal, value) =>
+        updater.setLong(ordinal, DateTimeUtils.fromJavaTimestamp(value.asInstanceOf[OrcTimestamp]))
+
+      case DecimalType.Fixed(precision, scale) => (ordinal, value) =>
+        val decimal = value.asInstanceOf[HiveDecimalWritable].getHiveDecimal()
+        val v = Decimal(decimal.bigDecimalValue, decimal.precision(), decimal.scale())
+        v.changePrecision(precision, scale)
+        updater.set(ordinal, v)
+
+      case st: StructType => (ordinal, value) =>
+        val result = new SpecificInternalRow(st)
+        val fieldUpdater = new RowUpdater(result)
+        val fieldConverters = st.map(_.dataType).map { dt =>
+          newWriter(dt, fieldUpdater)
+        }.toArray
+        val orcStruct = value.asInstanceOf[OrcStruct]
+
+        var i = 0
+        while (i < st.length) {
+          val value = orcStruct.getFieldValue(i)
+          if (value == null) {
+            result.setNullAt(i)
+          } else {
+            fieldConverters(i)(i, value)
+          }
+          i += 1
+        }
+
+        updater.set(ordinal, result)
+
+      case ArrayType(elementType, _) => (ordinal, value) =>
+        val orcArray = value.asInstanceOf[OrcList[WritableComparable[_]]]
+        val length = orcArray.size()
+        val result = createArrayData(elementType, length)
+        val elementUpdater = new ArrayDataUpdater(result)
+        val elementConverter = newWriter(elementType, elementUpdater)
+
+        var i = 0
+        while (i < length) {
+          val value = orcArray.get(i)
+          if (value == null) {
+            result.setNullAt(i)
+          } else {
+            elementConverter(i, value)
+          }
+          i += 1
+        }
+
+        updater.set(ordinal, result)
+
+      case MapType(keyType, valueType, _) => (ordinal, value) =>
+        val orcMap = value.asInstanceOf[OrcMap[WritableComparable[_], WritableComparable[_]]]
+        val length = orcMap.size()
+        val keyArray = createArrayData(keyType, length)
+        val keyUpdater = new ArrayDataUpdater(keyArray)
+        val keyConverter = newWriter(keyType, keyUpdater)
+        val valueArray = createArrayData(valueType, length)
+        val valueUpdater = new ArrayDataUpdater(valueArray)
+        val valueConverter = newWriter(valueType, valueUpdater)
+
+        var i = 0
+        val it = orcMap.entrySet().iterator()
+        while (it.hasNext) {
+          val entry = it.next()
+          keyConverter(i, entry.getKey)
+          val value = entry.getValue
+          if (value == null) {
+            valueArray.setNullAt(i)
+          } else {
+            valueConverter(i, value)
+          }
+          i += 1
+        }
+
+        updater.set(ordinal, new ArrayBasedMapData(keyArray, valueArray))
+
+      case udt: UserDefinedType[_] => newWriter(udt.sqlType, updater)
+
+      case _ =>
+        throw new UnsupportedOperationException(s"$dataType is not supported yet.")
+    }
+
+  private def createArrayData(elementType: DataType, length: Int): ArrayData = elementType match {
+    case BooleanType => UnsafeArrayData.fromPrimitiveArray(new Array[Boolean](length))
+    case ByteType => UnsafeArrayData.fromPrimitiveArray(new Array[Byte](length))
+    case ShortType => UnsafeArrayData.fromPrimitiveArray(new Array[Short](length))
+    case IntegerType => UnsafeArrayData.fromPrimitiveArray(new Array[Int](length))
+    case LongType => UnsafeArrayData.fromPrimitiveArray(new Array[Long](length))
+    case FloatType => UnsafeArrayData.fromPrimitiveArray(new Array[Float](length))
+    case DoubleType => UnsafeArrayData.fromPrimitiveArray(new Array[Double](length))
+    case _ => new GenericArrayData(new Array[Any](length))
+  }
+
+  /**
+   * A base interface for updating values inside catalyst data structure like `InternalRow` and
+   * `ArrayData`.
+   */
+  sealed trait CatalystDataUpdater {
+    def set(ordinal: Int, value: Any): Unit
+
+    def setNullAt(ordinal: Int): Unit = set(ordinal, null)
+    def setBoolean(ordinal: Int, value: Boolean): Unit = set(ordinal, value)
+    def setByte(ordinal: Int, value: Byte): Unit = set(ordinal, value)
+    def setShort(ordinal: Int, value: Short): Unit = set(ordinal, value)
+    def setInt(ordinal: Int, value: Int): Unit = set(ordinal, value)
+    def setLong(ordinal: Int, value: Long): Unit = set(ordinal, value)
+    def setDouble(ordinal: Int, value: Double): Unit = set(ordinal, value)
+    def setFloat(ordinal: Int, value: Float): Unit = set(ordinal, value)
+  }
+
+  final class RowUpdater(row: InternalRow) extends CatalystDataUpdater {
+    override def setNullAt(ordinal: Int): Unit = row.setNullAt(ordinal)
+    override def set(ordinal: Int, value: Any): Unit = row.update(ordinal, value)
+
+    override def setBoolean(ordinal: Int, value: Boolean): Unit = row.setBoolean(ordinal, value)
+    override def setByte(ordinal: Int, value: Byte): Unit = row.setByte(ordinal, value)
+    override def setShort(ordinal: Int, value: Short): Unit = row.setShort(ordinal, value)
+    override def setInt(ordinal: Int, value: Int): Unit = row.setInt(ordinal, value)
+    override def setLong(ordinal: Int, value: Long): Unit = row.setLong(ordinal, value)
+    override def setDouble(ordinal: Int, value: Double): Unit = row.setDouble(ordinal, value)
+    override def setFloat(ordinal: Int, value: Float): Unit = row.setFloat(ordinal, value)
+  }
+
+  final class ArrayDataUpdater(array: ArrayData) extends CatalystDataUpdater {
+    override def setNullAt(ordinal: Int): Unit = array.setNullAt(ordinal)
+    override def set(ordinal: Int, value: Any): Unit = array.update(ordinal, value)
+
+    override def setBoolean(ordinal: Int, value: Boolean): Unit = array.setBoolean(ordinal, value)
+    override def setByte(ordinal: Int, value: Byte): Unit = array.setByte(ordinal, value)
+    override def setShort(ordinal: Int, value: Short): Unit = array.setShort(ordinal, value)
+    override def setInt(ordinal: Int, value: Int): Unit = array.setInt(ordinal, value)
+    override def setLong(ordinal: Int, value: Long): Unit = array.setLong(ordinal, value)
+    override def setDouble(ordinal: Int, value: Double): Unit = array.setDouble(ordinal, value)
+    override def setFloat(ordinal: Int, value: Float): Unit = array.setFloat(ordinal, value)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index 215740e90fe84..2dd314d165348 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -17,10 +17,31 @@
 
 package org.apache.spark.sql.execution.datasources.orc
 
-import org.apache.orc.TypeDescription
+import java.io._
+import java.net.URI
 
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.mapred.JobConf
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.input.FileSplit
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+import org.apache.orc._
+import org.apache.orc.OrcConf.{COMPRESS, MAPRED_OUTPUT_SCHEMA}
+import org.apache.orc.mapred.OrcStruct
+import org.apache.orc.mapreduce._
+
+import org.apache.spark.TaskContext
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.execution.datasources._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types._
+import org.apache.spark.util.SerializableConfiguration
 
 private[sql] object OrcFileFormat {
   private def checkFieldName(name: String): Unit = {
@@ -39,3 +60,163 @@ private[sql] object OrcFileFormat {
     names.foreach(checkFieldName)
   }
 }
+
+/**
+ * New ORC File Format based on Apache ORC.
+ */
+class OrcFileFormat
+  extends FileFormat
+  with DataSourceRegister
+  with Serializable {
+
+  override def shortName(): String = "orc"
+
+  override def toString: String = "ORC"
+
+  override def hashCode(): Int = getClass.hashCode()
+
+  override def equals(other: Any): Boolean = other.isInstanceOf[OrcFileFormat]
+
+  override def inferSchema(
+      sparkSession: SparkSession,
+      options: Map[String, String],
+      files: Seq[FileStatus]): Option[StructType] = {
+    OrcUtils.readSchema(sparkSession, files)
+  }
+
+  override def prepareWrite(
+      sparkSession: SparkSession,
+      job: Job,
+      options: Map[String, String],
+      dataSchema: StructType): OutputWriterFactory = {
+    val orcOptions = new OrcOptions(options, sparkSession.sessionState.conf)
+
+    val conf = job.getConfiguration
+
+    conf.set(MAPRED_OUTPUT_SCHEMA.getAttribute, dataSchema.catalogString)
+
+    conf.set(COMPRESS.getAttribute, orcOptions.compressionCodec)
+
+    conf.asInstanceOf[JobConf]
+      .setOutputFormat(classOf[org.apache.orc.mapred.OrcOutputFormat[OrcStruct]])
+
+    new OutputWriterFactory {
+      override def newInstance(
+          path: String,
+          dataSchema: StructType,
+          context: TaskAttemptContext): OutputWriter = {
+        new OrcOutputWriter(path, dataSchema, context)
+      }
+
+      override def getFileExtension(context: TaskAttemptContext): String = {
+        val compressionExtension: String = {
+          val name = context.getConfiguration.get(COMPRESS.getAttribute)
+          OrcUtils.extensionsForCompressionCodecNames.getOrElse(name, "")
+        }
+
+        compressionExtension + ".orc"
+      }
+    }
+  }
+
+  override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = {
+    val conf = sparkSession.sessionState.conf
+    conf.orcVectorizedReaderEnabled && conf.wholeStageEnabled &&
+      schema.length <= conf.wholeStageMaxNumFields &&
+      schema.forall(_.dataType.isInstanceOf[AtomicType])
+  }
+
+  override def isSplitable(
+      sparkSession: SparkSession,
+      options: Map[String, String],
+      path: Path): Boolean = {
+    true
+  }
+
+  override def buildReaderWithPartitionValues(
+      sparkSession: SparkSession,
+      dataSchema: StructType,
+      partitionSchema: StructType,
+      requiredSchema: StructType,
+      filters: Seq[Filter],
+      options: Map[String, String],
+      hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = {
+    if (sparkSession.sessionState.conf.orcFilterPushDown) {
+      OrcFilters.createFilter(dataSchema, filters).foreach { f =>
+        OrcInputFormat.setSearchArgument(hadoopConf, f, dataSchema.fieldNames)
+      }
+    }
+
+    val resultSchema = StructType(requiredSchema.fields ++ partitionSchema.fields)
+    val sqlConf = sparkSession.sessionState.conf
+    val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled
+    val enableVectorizedReader = supportBatch(sparkSession, resultSchema)
+    val copyToSpark = sparkSession.sessionState.conf.getConf(SQLConf.ORC_COPY_BATCH_TO_SPARK)
+
+    val broadcastedConf =
+      sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+    val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+
+    (file: PartitionedFile) => {
+      val conf = broadcastedConf.value.value
+
+      val filePath = new Path(new URI(file.filePath))
+
+      val fs = filePath.getFileSystem(conf)
+      val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
+      val reader = OrcFile.createReader(filePath, readerOptions)
+
+      val requestedColIdsOrEmptyFile = OrcUtils.requestedColumnIds(
+        isCaseSensitive, dataSchema, requiredSchema, reader, conf)
+
+      if (requestedColIdsOrEmptyFile.isEmpty) {
+        Iterator.empty
+      } else {
+        val requestedColIds = requestedColIdsOrEmptyFile.get
+        assert(requestedColIds.length == requiredSchema.length,
+          "[BUG] requested column IDs do not match required schema")
+        val taskConf = new Configuration(conf)
+        taskConf.set(OrcConf.INCLUDE_COLUMNS.getAttribute,
+          requestedColIds.filter(_ != -1).sorted.mkString(","))
+
+        val fileSplit = new FileSplit(filePath, file.start, file.length, Array.empty)
+        val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
+        val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
+
+        val taskContext = Option(TaskContext.get())
+        if (enableVectorizedReader) {
+          val batchReader = new OrcColumnarBatchReader(
+            enableOffHeapColumnVector && taskContext.isDefined, copyToSpark)
+          batchReader.initialize(fileSplit, taskAttemptContext)
+          batchReader.initBatch(
+            reader.getSchema,
+            requestedColIds,
+            requiredSchema.fields,
+            partitionSchema,
+            file.partitionValues)
+
+          val iter = new RecordReaderIterator(batchReader)
+          Option(TaskContext.get()).foreach(_.addTaskCompletionListener(_ => iter.close()))
+          iter.asInstanceOf[Iterator[InternalRow]]
+        } else {
+          val orcRecordReader = new OrcInputFormat[OrcStruct]
+            .createRecordReader(fileSplit, taskAttemptContext)
+          val iter = new RecordReaderIterator[OrcStruct](orcRecordReader)
+          Option(TaskContext.get()).foreach(_.addTaskCompletionListener(_ => iter.close()))
+
+          val fullSchema = requiredSchema.toAttributes ++ partitionSchema.toAttributes
+          val unsafeProjection = GenerateUnsafeProjection.generate(fullSchema, fullSchema)
+          val deserializer = new OrcDeserializer(dataSchema, requiredSchema, requestedColIds)
+
+          if (partitionSchema.length == 0) {
+            iter.map(value => unsafeProjection(deserializer.deserialize(value)))
+          } else {
+            val joinedRow = new JoinedRow()
+            iter.map(value =>
+              unsafeProjection(joinedRow(deserializer.deserialize(value), file.partitionValues)))
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
new file mode 100644
index 0000000000000..4f44ae4fa1d71
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument, SearchArgumentFactory}
+import org.apache.orc.storage.ql.io.sarg.SearchArgument.Builder
+import org.apache.orc.storage.serde2.io.HiveDecimalWritable
+
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types._
+
+/**
+ * Helper object for building ORC `SearchArgument`s, which are used for ORC predicate push-down.
+ *
+ * Due to limitation of ORC `SearchArgument` builder, we had to end up with a pretty weird double-
+ * checking pattern when converting `And`/`Or`/`Not` filters.
+ *
+ * An ORC `SearchArgument` must be built in one pass using a single builder.  For example, you can't
+ * build `a = 1` and `b = 2` first, and then combine them into `a = 1 AND b = 2`.  This is quite
+ * different from the cases in Spark SQL or Parquet, where complex filters can be easily built using
+ * existing simpler ones.
+ *
+ * The annoying part is that, `SearchArgument` builder methods like `startAnd()`, `startOr()`, and
+ * `startNot()` mutate internal state of the builder instance.  This forces us to translate all
+ * convertible filters with a single builder instance. However, before actually converting a filter,
+ * we've no idea whether it can be recognized by ORC or not. Thus, when an inconvertible filter is
+ * found, we may already end up with a builder whose internal state is inconsistent.
+ *
+ * For example, to convert an `And` filter with builder `b`, we call `b.startAnd()` first, and then
+ * try to convert its children.  Say we convert `left` child successfully, but find that `right`
+ * child is inconvertible.  Alas, `b.startAnd()` call can't be rolled back, and `b` is inconsistent
+ * now.
+ *
+ * The workaround employed here is that, for `And`/`Or`/`Not`, we first try to convert their
+ * children with brand new builders, and only do the actual conversion with the right builder
+ * instance when the children are proven to be convertible.
+ *
+ * P.S.: Hive seems to use `SearchArgument` together with `ExprNodeGenericFuncDesc` only.  Usage of
+ * builder methods mentioned above can only be found in test code, where all tested filters are
+ * known to be convertible.
+ */
+private[orc] object OrcFilters {
+
+  /**
+   * Create ORC filter as a SearchArgument instance.
+   */
+  def createFilter(schema: StructType, filters: Seq[Filter]): Option[SearchArgument] = {
+    val dataTypeMap = schema.map(f => f.name -> f.dataType).toMap
+
+    // First, tries to convert each filter individually to see whether it's convertible, and then
+    // collect all convertible ones to build the final `SearchArgument`.
+    val convertibleFilters = for {
+      filter <- filters
+      _ <- buildSearchArgument(dataTypeMap, filter, SearchArgumentFactory.newBuilder())
+    } yield filter
+
+    for {
+      // Combines all convertible filters using `And` to produce a single conjunction
+      conjunction <- convertibleFilters.reduceOption(org.apache.spark.sql.sources.And)
+      // Then tries to build a single ORC `SearchArgument` for the conjunction predicate
+      builder <- buildSearchArgument(dataTypeMap, conjunction, SearchArgumentFactory.newBuilder())
+    } yield builder.build()
+  }
+
+  /**
+   * Return true if this is a searchable type in ORC.
+   * Both CharType and VarcharType are cleaned at AstBuilder.
+   */
+  private def isSearchableType(dataType: DataType) = dataType match {
+    case BinaryType => false
+    case _: AtomicType => true
+    case _ => false
+  }
+
+  /**
+   * Get PredicateLeafType which is corresponding to the given DataType.
+   */
+  private def getPredicateLeafType(dataType: DataType) = dataType match {
+    case BooleanType => PredicateLeaf.Type.BOOLEAN
+    case ByteType | ShortType | IntegerType | LongType => PredicateLeaf.Type.LONG
+    case FloatType | DoubleType => PredicateLeaf.Type.FLOAT
+    case StringType => PredicateLeaf.Type.STRING
+    case DateType => PredicateLeaf.Type.DATE
+    case TimestampType => PredicateLeaf.Type.TIMESTAMP
+    case _: DecimalType => PredicateLeaf.Type.DECIMAL
+    case _ => throw new UnsupportedOperationException(s"DataType: $dataType")
+  }
+
+  /**
+   * Cast literal values for filters.
+   *
+   * We need to cast to long because ORC raises exceptions
+   * at 'checkLiteralType' of SearchArgumentImpl.java.
+   */
+  private def castLiteralValue(value: Any, dataType: DataType): Any = dataType match {
+    case ByteType | ShortType | IntegerType | LongType =>
+      value.asInstanceOf[Number].longValue
+    case FloatType | DoubleType =>
+      value.asInstanceOf[Number].doubleValue()
+    case _: DecimalType =>
+      val decimal = value.asInstanceOf[java.math.BigDecimal]
+      val decimalWritable = new HiveDecimalWritable(decimal.longValue)
+      decimalWritable.mutateEnforcePrecisionScale(decimal.precision, decimal.scale)
+      decimalWritable
+    case _ => value
+  }
+
+  /**
+   * Build a SearchArgument and return the builder so far.
+   */
+  private def buildSearchArgument(
+      dataTypeMap: Map[String, DataType],
+      expression: Filter,
+      builder: Builder): Option[Builder] = {
+    def newBuilder = SearchArgumentFactory.newBuilder()
+
+    def getType(attribute: String): PredicateLeaf.Type =
+      getPredicateLeafType(dataTypeMap(attribute))
+
+    import org.apache.spark.sql.sources._
+
+    expression match {
+      case And(left, right) =>
+        // At here, it is not safe to just convert one side if we do not understand the
+        // other side. Here is an example used to explain the reason.
+        // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to
+        // convert b in ('1'). If we only convert a = 2, we will end up with a filter
+        // NOT(a = 2), which will generate wrong results.
+        // Pushing one side of AND down is only safe to do at the top level.
+        // You can see ParquetRelation's initializeLocalJobFunc method as an example.
+        for {
+          _ <- buildSearchArgument(dataTypeMap, left, newBuilder)
+          _ <- buildSearchArgument(dataTypeMap, right, newBuilder)
+          lhs <- buildSearchArgument(dataTypeMap, left, builder.startAnd())
+          rhs <- buildSearchArgument(dataTypeMap, right, lhs)
+        } yield rhs.end()
+
+      case Or(left, right) =>
+        for {
+          _ <- buildSearchArgument(dataTypeMap, left, newBuilder)
+          _ <- buildSearchArgument(dataTypeMap, right, newBuilder)
+          lhs <- buildSearchArgument(dataTypeMap, left, builder.startOr())
+          rhs <- buildSearchArgument(dataTypeMap, right, lhs)
+        } yield rhs.end()
+
+      case Not(child) =>
+        for {
+          _ <- buildSearchArgument(dataTypeMap, child, newBuilder)
+          negate <- buildSearchArgument(dataTypeMap, child, builder.startNot())
+        } yield negate.end()
+
+      // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
+      // call is mandatory.  ORC `SearchArgument` builder requires that all leaf predicates must be
+      // wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
+
+      case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startAnd().equals(attribute, getType(attribute), castedValue).end())
+
+      case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startAnd().nullSafeEquals(attribute, getType(attribute), castedValue).end())
+
+      case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startAnd().lessThan(attribute, getType(attribute), castedValue).end())
+
+      case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startAnd().lessThanEquals(attribute, getType(attribute), castedValue).end())
+
+      case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startNot().lessThanEquals(attribute, getType(attribute), castedValue).end())
+
+      case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
+        val castedValue = castLiteralValue(value, dataTypeMap(attribute))
+        Some(builder.startNot().lessThan(attribute, getType(attribute), castedValue).end())
+
+      case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
+        Some(builder.startAnd().isNull(attribute, getType(attribute)).end())
+
+      case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
+        Some(builder.startNot().isNull(attribute, getType(attribute)).end())
+
+      case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
+        val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute)))
+        Some(builder.startAnd().in(attribute, getType(attribute),
+          castedValues.map(_.asInstanceOf[AnyRef]): _*).end())
+
+      case _ => None
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
new file mode 100644
index 0000000000000..84755bfa301f0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.NullWritable
+import org.apache.hadoop.mapreduce.TaskAttemptContext
+import org.apache.orc.mapred.OrcStruct
+import org.apache.orc.mapreduce.OrcOutputFormat
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.OutputWriter
+import org.apache.spark.sql.types._
+
+private[orc] class OrcOutputWriter(
+    path: String,
+    dataSchema: StructType,
+    context: TaskAttemptContext)
+  extends OutputWriter {
+
+  private[this] val serializer = new OrcSerializer(dataSchema)
+
+  private val recordWriter = {
+    new OrcOutputFormat[OrcStruct]() {
+      override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
+        new Path(path)
+      }
+    }.getRecordWriter(context)
+  }
+
+  override def write(row: InternalRow): Unit = {
+    recordWriter.write(NullWritable.get(), serializer.serialize(row))
+  }
+
+  override def close(): Unit = {
+    recordWriter.close(context)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala
new file mode 100644
index 0000000000000..899af0750cadf
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import org.apache.hadoop.io._
+import org.apache.orc.TypeDescription
+import org.apache.orc.mapred.{OrcList, OrcMap, OrcStruct, OrcTimestamp}
+import org.apache.orc.storage.common.`type`.HiveDecimal
+import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable}
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
+import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.types._
+
+/**
+ * A serializer to serialize Spark rows to ORC structs.
+ */
+class OrcSerializer(dataSchema: StructType) {
+
+  private val result = createOrcValue(dataSchema).asInstanceOf[OrcStruct]
+  private val converters = dataSchema.map(_.dataType).map(newConverter(_)).toArray
+
+  def serialize(row: InternalRow): OrcStruct = {
+    var i = 0
+    while (i < converters.length) {
+      if (row.isNullAt(i)) {
+        result.setFieldValue(i, null)
+      } else {
+        result.setFieldValue(i, converters(i)(row, i))
+      }
+      i += 1
+    }
+    result
+  }
+
+  private type Converter = (SpecializedGetters, Int) => WritableComparable[_]
+
+  /**
+   * Creates a converter to convert Catalyst data at the given ordinal to ORC values.
+   */
+  private def newConverter(
+      dataType: DataType,
+      reuseObj: Boolean = true): Converter = dataType match {
+    case NullType => (getter, ordinal) => null
+
+    case BooleanType =>
+      if (reuseObj) {
+        val result = new BooleanWritable()
+        (getter, ordinal) =>
+          result.set(getter.getBoolean(ordinal))
+          result
+      } else {
+        (getter, ordinal) => new BooleanWritable(getter.getBoolean(ordinal))
+      }
+
+    case ByteType =>
+      if (reuseObj) {
+        val result = new ByteWritable()
+        (getter, ordinal) =>
+          result.set(getter.getByte(ordinal))
+          result
+      } else {
+        (getter, ordinal) => new ByteWritable(getter.getByte(ordinal))
+      }
+
+    case ShortType =>
+      if (reuseObj) {
+        val result = new ShortWritable()
+        (getter, ordinal) =>
+          result.set(getter.getShort(ordinal))
+          result
+      } else {
+        (getter, ordinal) => new ShortWritable(getter.getShort(ordinal))
+      }
+
+    case IntegerType =>
+      if (reuseObj) {
+        val result = new IntWritable()
+        (getter, ordinal) =>
+          result.set(getter.getInt(ordinal))
+          result
+      } else {
+        (getter, ordinal) => new IntWritable(getter.getInt(ordinal))
+      }
+
+
+    case LongType =>
+      if (reuseObj) {
+        val result = new LongWritable()
+        (getter, ordinal) =>
+          result.set(getter.getLong(ordinal))
+          result
+      } else {
+        (getter, ordinal) => new LongWritable(getter.getLong(ordinal))
+      }
+
+    case FloatType =>
+      if (reuseObj) {
+        val result = new FloatWritable()
+        (getter, ordinal) =>
+          result.set(getter.getFloat(ordinal))
+          result
+      } else {
+        (getter, ordinal) => new FloatWritable(getter.getFloat(ordinal))
+      }
+
+    case DoubleType =>
+      if (reuseObj) {
+        val result = new DoubleWritable()
+        (getter, ordinal) =>
+          result.set(getter.getDouble(ordinal))
+          result
+      } else {
+        (getter, ordinal) => new DoubleWritable(getter.getDouble(ordinal))
+      }
+
+
+    // Don't reuse the result object for string and binary as it would cause extra data copy.
+    case StringType => (getter, ordinal) =>
+      new Text(getter.getUTF8String(ordinal).getBytes)
+
+    case BinaryType => (getter, ordinal) =>
+      new BytesWritable(getter.getBinary(ordinal))
+
+    case DateType =>
+      if (reuseObj) {
+        val result = new DateWritable()
+        (getter, ordinal) =>
+          result.set(getter.getInt(ordinal))
+          result
+      } else {
+        (getter, ordinal) => new DateWritable(getter.getInt(ordinal))
+      }
+
+    // The following cases are already expensive, reusing object or not doesn't matter.
+
+    case TimestampType => (getter, ordinal) =>
+      val ts = DateTimeUtils.toJavaTimestamp(getter.getLong(ordinal))
+      val result = new OrcTimestamp(ts.getTime)
+      result.setNanos(ts.getNanos)
+      result
+
+    case DecimalType.Fixed(precision, scale) => (getter, ordinal) =>
+      val d = getter.getDecimal(ordinal, precision, scale)
+      new HiveDecimalWritable(HiveDecimal.create(d.toJavaBigDecimal))
+
+    case st: StructType => (getter, ordinal) =>
+      val result = createOrcValue(st).asInstanceOf[OrcStruct]
+      val fieldConverters = st.map(_.dataType).map(newConverter(_))
+      val numFields = st.length
+      val struct = getter.getStruct(ordinal, numFields)
+      var i = 0
+      while (i < numFields) {
+        if (struct.isNullAt(i)) {
+          result.setFieldValue(i, null)
+        } else {
+          result.setFieldValue(i, fieldConverters(i)(struct, i))
+        }
+        i += 1
+      }
+      result
+
+    case ArrayType(elementType, _) => (getter, ordinal) =>
+      val result = createOrcValue(dataType).asInstanceOf[OrcList[WritableComparable[_]]]
+      // Need to put all converted values to a list, can't reuse object.
+      val elementConverter = newConverter(elementType, reuseObj = false)
+      val array = getter.getArray(ordinal)
+      var i = 0
+      while (i < array.numElements()) {
+        if (array.isNullAt(i)) {
+          result.add(null)
+        } else {
+          result.add(elementConverter(array, i))
+        }
+        i += 1
+      }
+      result
+
+    case MapType(keyType, valueType, _) => (getter, ordinal) =>
+      val result = createOrcValue(dataType)
+        .asInstanceOf[OrcMap[WritableComparable[_], WritableComparable[_]]]
+      // Need to put all converted values to a list, can't reuse object.
+      val keyConverter = newConverter(keyType, reuseObj = false)
+      val valueConverter = newConverter(valueType, reuseObj = false)
+      val map = getter.getMap(ordinal)
+      val keyArray = map.keyArray()
+      val valueArray = map.valueArray()
+      var i = 0
+      while (i < map.numElements()) {
+        val key = keyConverter(keyArray, i)
+        if (valueArray.isNullAt(i)) {
+          result.put(key, null)
+        } else {
+          result.put(key, valueConverter(valueArray, i))
+        }
+        i += 1
+      }
+      result
+
+    case udt: UserDefinedType[_] => newConverter(udt.sqlType)
+
+    case _ =>
+      throw new UnsupportedOperationException(s"$dataType is not supported yet.")
+  }
+
+  /**
+   * Return a Orc value object for the given Spark schema.
+   */
+  private def createOrcValue(dataType: DataType) = {
+    OrcStruct.createValue(TypeDescription.fromString(dataType.catalogString))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
new file mode 100644
index 0000000000000..460194ba61c8b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.orc.{OrcFile, Reader, TypeDescription}
+
+import org.apache.spark.SparkException
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.types._
+
+object OrcUtils extends Logging {
+
+  // The extensions for ORC compression codecs
+  val extensionsForCompressionCodecNames = Map(
+    "NONE" -> "",
+    "SNAPPY" -> ".snappy",
+    "ZLIB" -> ".zlib",
+    "LZO" -> ".lzo")
+
+  def listOrcFiles(pathStr: String, conf: Configuration): Seq[Path] = {
+    val origPath = new Path(pathStr)
+    val fs = origPath.getFileSystem(conf)
+    val paths = SparkHadoopUtil.get.listLeafStatuses(fs, origPath)
+      .filterNot(_.isDirectory)
+      .map(_.getPath)
+      .filterNot(_.getName.startsWith("_"))
+      .filterNot(_.getName.startsWith("."))
+    paths
+  }
+
+  def readSchema(file: Path, conf: Configuration, ignoreCorruptFiles: Boolean)
+      : Option[TypeDescription] = {
+    val fs = file.getFileSystem(conf)
+    val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
+    try {
+      val reader = OrcFile.createReader(file, readerOptions)
+      val schema = reader.getSchema
+      if (schema.getFieldNames.size == 0) {
+        None
+      } else {
+        Some(schema)
+      }
+    } catch {
+      case e: org.apache.orc.FileFormatException =>
+        if (ignoreCorruptFiles) {
+          logWarning(s"Skipped the footer in the corrupted file: $file", e)
+          None
+        } else {
+          throw new SparkException(s"Could not read footer for file: $file", e)
+        }
+    }
+  }
+
+  def readSchema(sparkSession: SparkSession, files: Seq[FileStatus])
+      : Option[StructType] = {
+    val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
+    val conf = sparkSession.sessionState.newHadoopConf()
+    // TODO: We need to support merge schema. Please see SPARK-11412.
+    files.map(_.getPath).flatMap(readSchema(_, conf, ignoreCorruptFiles)).headOption.map { schema =>
+      logDebug(s"Reading schema from file $files, got Hive schema string: $schema")
+      CatalystSqlParser.parseDataType(schema.toString).asInstanceOf[StructType]
+    }
+  }
+
+  /**
+   * Returns the requested column ids from the given ORC file. Column id can be -1, which means the
+   * requested column doesn't exist in the ORC file. Returns None if the given ORC file is empty.
+   */
+  def requestedColumnIds(
+      isCaseSensitive: Boolean,
+      dataSchema: StructType,
+      requiredSchema: StructType,
+      reader: Reader,
+      conf: Configuration): Option[Array[Int]] = {
+    val orcFieldNames = reader.getSchema.getFieldNames.asScala
+    if (orcFieldNames.isEmpty) {
+      // SPARK-8501: Some old empty ORC files always have an empty schema stored in their footer.
+      None
+    } else {
+      if (orcFieldNames.forall(_.startsWith("_col"))) {
+        // This is a ORC file written by Hive, no field names in the physical schema, assume the
+        // physical schema maps to the data scheme by index.
+        assert(orcFieldNames.length <= dataSchema.length, "The given data schema " +
+          s"${dataSchema.simpleString} has less fields than the actual ORC physical schema, " +
+          "no idea which columns were dropped, fail to read.")
+        Some(requiredSchema.fieldNames.map { name =>
+          val index = dataSchema.fieldIndex(name)
+          if (index < orcFieldNames.length) {
+            index
+          } else {
+            -1
+          }
+        })
+      } else {
+        val resolver = if (isCaseSensitive) caseSensitiveResolution else caseInsensitiveResolution
+        Some(requiredSchema.fieldNames.map { name => orcFieldNames.indexWhere(resolver(_, name)) })
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index b1d49af3de465..8e2bc1033c948 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -49,6 +49,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.vectorized.{OffHeapColumnVector, OnHeapColumnVector}
 import org.apache.spark.sql.internal.SQLConf
@@ -377,6 +378,9 @@ class ParquetFileFormat
     hadoopConf.set(
       ParquetWriteSupport.SPARK_ROW_SCHEMA,
       requiredSchema.json)
+    hadoopConf.set(
+      SQLConf.SESSION_LOCAL_TIMEZONE.key,
+      sparkSession.sessionState.conf.sessionLocalTimeZone)
 
     ParquetWriteSupport.setSchema(requiredSchema, hadoopConf)
 
@@ -424,6 +428,8 @@ class ParquetFileFormat
       resultSchema.forall(_.dataType.isInstanceOf[AtomicType])
     val enableRecordFilter: Boolean =
       sparkSession.sessionState.conf.parquetRecordFilterEnabled
+    val timestampConversion: Boolean =
+      sparkSession.sessionState.conf.isParquetINT96TimestampConversion
     // Whole stage codegen (PhysicalRDD) is able to deal with batches directly
     val returningBatch = supportBatch(sparkSession, resultSchema)
 
@@ -442,6 +448,22 @@ class ParquetFileFormat
           fileSplit.getLocations,
           null)
 
+      val sharedConf = broadcastedHadoopConf.value.value
+      // PARQUET_INT96_TIMESTAMP_CONVERSION says to apply timezone conversions to int96 timestamps'
+      // *only* if the file was created by something other than "parquet-mr", so check the actual
+      // writer here for this file.  We have to do this per-file, as each file in the table may
+      // have different writers.
+      def isCreatedByParquetMr(): Boolean = {
+        val footer = ParquetFileReader.readFooter(sharedConf, fileSplit.getPath, SKIP_ROW_GROUPS)
+        footer.getFileMetaData().getCreatedBy().startsWith("parquet-mr")
+      }
+      val convertTz =
+        if (timestampConversion && !isCreatedByParquetMr()) {
+          Some(DateTimeUtils.getTimeZone(sharedConf.get(SQLConf.SESSION_LOCAL_TIMEZONE.key)))
+        } else {
+          None
+        }
+
       val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
       val hadoopAttemptContext =
         new TaskAttemptContextImpl(broadcastedHadoopConf.value.value, attemptId)
@@ -453,8 +475,8 @@ class ParquetFileFormat
       }
       val taskContext = Option(TaskContext.get())
       val parquetReader = if (enableVectorizedReader) {
-        val vectorizedReader =
-          new VectorizedParquetRecordReader(enableOffHeapColumnVector && taskContext.isDefined)
+        val vectorizedReader = new VectorizedParquetRecordReader(
+          convertTz.orNull, enableOffHeapColumnVector && taskContext.isDefined)
         vectorizedReader.initialize(split, hadoopAttemptContext)
         logDebug(s"Appending $partitionSchema ${file.partitionValues}")
         vectorizedReader.initBatch(partitionSchema, file.partitionValues)
@@ -467,9 +489,9 @@ class ParquetFileFormat
         // ParquetRecordReader returns UnsafeRow
         val reader = if (pushed.isDefined && enableRecordFilter) {
           val parquetFilter = FilterCompat.get(pushed.get, null)
-          new ParquetRecordReader[UnsafeRow](new ParquetReadSupport, parquetFilter)
+          new ParquetRecordReader[UnsafeRow](new ParquetReadSupport(convertTz), parquetFilter)
         } else {
-          new ParquetRecordReader[UnsafeRow](new ParquetReadSupport)
+          new ParquetRecordReader[UnsafeRow](new ParquetReadSupport(convertTz))
         }
         reader.initialize(split, hadoopAttemptContext)
         reader
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
index 772d4565de548..ef67ea7d17cea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import java.util.Locale
 
+import org.apache.parquet.hadoop.ParquetOutputFormat
 import org.apache.parquet.hadoop.metadata.CompressionCodecName
 
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
@@ -42,8 +43,15 @@ private[parquet] class ParquetOptions(
    * Acceptable values are defined in [[shortParquetCompressionCodecNames]].
    */
   val compressionCodecClassName: String = {
-    val codecName = parameters.getOrElse("compression",
-      sqlConf.parquetCompressionCodec).toLowerCase(Locale.ROOT)
+    // `compression`, `parquet.compression`(i.e., ParquetOutputFormat.COMPRESSION), and
+    // `spark.sql.parquet.compression.codec`
+    // are in order of precedence from highest to lowest.
+    val parquetCompressionConf = parameters.get(ParquetOutputFormat.COMPRESSION)
+    val codecName = parameters
+      .get("compression")
+      .orElse(parquetCompressionConf)
+      .getOrElse(sqlConf.parquetCompressionCodec)
+      .toLowerCase(Locale.ROOT)
     if (!shortParquetCompressionCodecNames.contains(codecName)) {
       val availableCodecs =
         shortParquetCompressionCodecNames.keys.map(_.toLowerCase(Locale.ROOT))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index 2854cb1bc0c25..40ce5d5e0564e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
-import java.util.{Map => JMap}
+import java.util.{Map => JMap, TimeZone}
 
 import scala.collection.JavaConverters._
 
@@ -48,9 +48,17 @@ import org.apache.spark.sql.types._
  * Due to this reason, we no longer rely on [[ReadContext]] to pass requested schema from [[init()]]
  * to [[prepareForRead()]], but use a private `var` for simplicity.
  */
-private[parquet] class ParquetReadSupport extends ReadSupport[UnsafeRow] with Logging {
+private[parquet] class ParquetReadSupport(val convertTz: Option[TimeZone])
+    extends ReadSupport[UnsafeRow] with Logging {
   private var catalystRequestedSchema: StructType = _
 
+  def this() {
+    // We need a zero-arg constructor for SpecificParquetRecordReaderBase.  But that is only
+    // used in the vectorized reader, where we get the convertTz value directly, and the value here
+    // is ignored.
+    this(None)
+  }
+
   /**
    * Called on executor side before [[prepareForRead()]] and instantiating actual Parquet record
    * readers.  Responsible for figuring out Parquet requested schema used for column pruning.
@@ -95,7 +103,8 @@ private[parquet] class ParquetReadSupport extends ReadSupport[UnsafeRow] with Lo
     new ParquetRecordMaterializer(
       parquetRequestedSchema,
       ParquetReadSupport.expandUDT(catalystRequestedSchema),
-      new ParquetToSparkSchemaConverter(conf))
+      new ParquetToSparkSchemaConverter(conf),
+      convertTz)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
index 793755e9aaeb5..b2459dd0e8bba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
+import java.util.TimeZone
+
 import org.apache.parquet.io.api.{GroupConverter, RecordMaterializer}
 import org.apache.parquet.schema.MessageType
 
@@ -33,11 +35,12 @@ import org.apache.spark.sql.types.StructType
 private[parquet] class ParquetRecordMaterializer(
     parquetSchema: MessageType,
     catalystSchema: StructType,
-    schemaConverter: ParquetToSparkSchemaConverter)
+    schemaConverter: ParquetToSparkSchemaConverter,
+    convertTz: Option[TimeZone])
   extends RecordMaterializer[UnsafeRow] {
 
   private val rootConverter =
-    new ParquetRowConverter(schemaConverter, parquetSchema, catalystSchema, NoopUpdater)
+    new ParquetRowConverter(schemaConverter, parquetSchema, catalystSchema, convertTz, NoopUpdater)
 
   override def getCurrentRecord: UnsafeRow = rootConverter.currentRecord
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index 10f6c3b4f15e3..1199725941842 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import java.math.{BigDecimal, BigInteger}
 import java.nio.ByteOrder
+import java.util.TimeZone
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
@@ -117,12 +118,14 @@ private[parquet] class ParquetPrimitiveConverter(val updater: ParentContainerUpd
  * @param parquetType Parquet schema of Parquet records
  * @param catalystType Spark SQL schema that corresponds to the Parquet record type. User-defined
  *        types should have been expanded.
+ * @param convertTz the optional time zone to convert to for int96 data
  * @param updater An updater which propagates converted field values to the parent container
  */
 private[parquet] class ParquetRowConverter(
     schemaConverter: ParquetToSparkSchemaConverter,
     parquetType: GroupType,
     catalystType: StructType,
+    convertTz: Option[TimeZone],
     updater: ParentContainerUpdater)
   extends ParquetGroupConverter(updater) with Logging {
 
@@ -151,6 +154,8 @@ private[parquet] class ParquetRowConverter(
        |${catalystType.prettyJson}
      """.stripMargin)
 
+  private val UTC = DateTimeUtils.TimeZoneUTC
+
   /**
    * Updater used together with field converters within a [[ParquetRowConverter]].  It propagates
    * converted filed values to the `ordinal`-th cell in `currentRow`.
@@ -279,7 +284,9 @@ private[parquet] class ParquetRowConverter(
             val buf = value.toByteBuffer.order(ByteOrder.LITTLE_ENDIAN)
             val timeOfDayNanos = buf.getLong
             val julianDay = buf.getInt
-            updater.setLong(DateTimeUtils.fromJulianDay(julianDay, timeOfDayNanos))
+            val rawTime = DateTimeUtils.fromJulianDay(julianDay, timeOfDayNanos)
+            val adjTime = convertTz.map(DateTimeUtils.convertTz(rawTime, _, UTC)).getOrElse(rawTime)
+            updater.setLong(adjTime)
           }
         }
 
@@ -309,7 +316,7 @@ private[parquet] class ParquetRowConverter(
 
       case t: StructType =>
         new ParquetRowConverter(
-          schemaConverter, parquetType.asGroupType(), t, new ParentContainerUpdater {
+          schemaConverter, parquetType.asGroupType(), t, convertTz, new ParentContainerUpdater {
             override def set(value: Any): Unit = updater.set(value.asInstanceOf[InternalRow].copy())
           })
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 60c430bcfece2..f64e079539c4f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -39,7 +39,7 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
     sparkSession.sessionState.conf.runSQLonFile && u.tableIdentifier.database.isDefined
   }
 
-  def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case u: UnresolvedRelation if maybeSQLFile(u) =>
       try {
         val dataSource = DataSource(
@@ -108,8 +108,9 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
       }
 
       // Check if the specified data source match the data source of the existing table.
-      val existingProvider = DataSource.lookupDataSource(existingTable.provider.get)
-      val specifiedProvider = DataSource.lookupDataSource(tableDesc.provider.get)
+      val conf = sparkSession.sessionState.conf
+      val existingProvider = DataSource.lookupDataSource(existingTable.provider.get, conf)
+      val specifiedProvider = DataSource.lookupDataSource(tableDesc.provider.get, conf)
       // TODO: Check that options from the resolved relation match the relation that we are
       // inserting into (i.e. using the same compression).
       if (existingProvider != specifiedProvider) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
index d0690445d7672..c661e9bd3b94c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
@@ -17,12 +17,16 @@
 
 package org.apache.spark.sql.execution.datasources.text
 
+import java.io.Closeable
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
 import org.apache.spark.TaskContext
-import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{BufferHolder, UnsafeRowWriter}
@@ -53,6 +57,14 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
     }
   }
 
+  override def isSplitable(
+      sparkSession: SparkSession,
+      options: Map[String, String],
+      path: Path): Boolean = {
+    val textOptions = new TextOptions(options)
+    super.isSplitable(sparkSession, options, path) && !textOptions.wholeText
+  }
+
   override def inferSchema(
       sparkSession: SparkSession,
       options: Map[String, String],
@@ -97,14 +109,26 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
     assert(
       requiredSchema.length <= 1,
       "Text data source only produces a single data column named \"value\".")
-
+    val textOptions = new TextOptions(options)
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
 
+    readToUnsafeMem(broadcastedHadoopConf, requiredSchema, textOptions.wholeText)
+  }
+
+  private def readToUnsafeMem(
+      conf: Broadcast[SerializableConfiguration],
+      requiredSchema: StructType,
+      wholeTextMode: Boolean): (PartitionedFile) => Iterator[UnsafeRow] = {
+
     (file: PartitionedFile) => {
-      val reader = new HadoopFileLinesReader(file, broadcastedHadoopConf.value.value)
+      val confValue = conf.value.value
+      val reader = if (!wholeTextMode) {
+        new HadoopFileLinesReader(file, confValue)
+      } else {
+        new HadoopFileWholeTextReader(file, confValue)
+      }
       Option(TaskContext.get()).foreach(_.addTaskCompletionListener(_ => reader.close()))
-
       if (requiredSchema.isEmpty) {
         val emptyUnsafeRow = new UnsafeRow(0)
         reader.map(_ => emptyUnsafeRow)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
index 49bd7382f9cf3..2a661561ab51e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
@@ -33,8 +33,15 @@ private[text] class TextOptions(@transient private val parameters: CaseInsensiti
    * Compression codec to use.
    */
   val compressionCodec = parameters.get(COMPRESSION).map(CompressionCodecs.getCodecClassName)
+
+  /**
+   * wholetext - If true, read a file as a single row and not split by "\n".
+   */
+  val wholeText = parameters.getOrElse(WHOLETEXT, "false").toBoolean
+
 }
 
 private[text] object TextOptions {
   val COMPRESSION = "compression"
+  val WHOLETEXT = "wholetext"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 7eb99a645001a..cba20dd902007 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -35,6 +35,16 @@ case class DataSourceV2Relation(
   }
 }
 
+/**
+ * A specialization of DataSourceV2Relation with the streaming bit set to true. Otherwise identical
+ * to the non-streaming relation.
+ */
+class StreamingDataSourceV2Relation(
+    fullOutput: Seq[AttributeReference],
+    reader: DataSourceV2Reader) extends DataSourceV2Relation(fullOutput, reader) {
+  override def isStreaming: Boolean = true
+}
+
 object DataSourceV2Relation {
   def apply(reader: DataSourceV2Reader): DataSourceV2Relation = {
     new DataSourceV2Relation(reader.readSchema().toAttributes, reader)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala
index 3f243dc44e043..49c506bc560cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala
@@ -26,7 +26,10 @@ import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.execution.metric.SQLMetrics
+import org.apache.spark.sql.execution.streaming.StreamExecution
+import org.apache.spark.sql.execution.streaming.continuous.{ContinuousDataSourceRDD, ContinuousExecution, EpochCoordinatorRef, SetReaderPartitions}
 import org.apache.spark.sql.sources.v2.reader._
+import org.apache.spark.sql.sources.v2.streaming.reader.ContinuousReader
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -52,10 +55,20 @@ case class DataSourceV2ScanExec(
         }.asJava
     }
 
-    val inputRDD = new DataSourceRDD(sparkContext, readTasks)
-      .asInstanceOf[RDD[InternalRow]]
+    val inputRDD = reader match {
+      case _: ContinuousReader =>
+        EpochCoordinatorRef.get(
+          sparkContext.getLocalProperty(ContinuousExecution.RUN_ID_KEY), sparkContext.env)
+          .askSync[Unit](SetReaderPartitions(readTasks.size()))
+
+        new ContinuousDataSourceRDD(sparkContext, sqlContext, readTasks)
+
+      case _ =>
+        new DataSourceRDD(sparkContext, readTasks)
+    }
+
     val numOutputRows = longMetric("numOutputRows")
-    inputRDD.map { r =>
+    inputRDD.asInstanceOf[RDD[InternalRow]].map { r =>
       numOutputRows += 1
       r
     }
@@ -73,7 +86,7 @@ class RowToUnsafeRowReadTask(rowReadTask: ReadTask[Row], schema: StructType)
   }
 }
 
-class RowToUnsafeDataReader(rowReader: DataReader[Row], encoder: ExpressionEncoder[Row])
+class RowToUnsafeDataReader(val rowReader: DataReader[Row], encoder: ExpressionEncoder[Row])
   extends DataReader[UnsafeRow] {
 
   override def next: Boolean = rowReader.next
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
new file mode 100644
index 0000000000000..5267f5f1580c3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.util.regex.Pattern
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.{DataSourceV2, SessionConfigSupport}
+
+private[sql] object DataSourceV2Utils extends Logging {
+
+  /**
+   * Helper method that extracts and transforms session configs into k/v pairs, the k/v pairs will
+   * be used to create data source options.
+   * Only extract when `ds` implements [[SessionConfigSupport]], in this case we may fetch the
+   * specified key-prefix from `ds`, and extract session configs with config keys that start with
+   * `spark.datasource.$keyPrefix`. A session config `spark.datasource.$keyPrefix.xxx -> yyy` will
+   * be transformed into `xxx -> yyy`.
+   *
+   * @param ds a [[DataSourceV2]] object
+   * @param conf the session conf
+   * @return an immutable map that contains all the extracted and transformed k/v pairs.
+   */
+  def extractSessionConfigs(ds: DataSourceV2, conf: SQLConf): Map[String, String] = ds match {
+    case cs: SessionConfigSupport =>
+      val keyPrefix = cs.keyPrefix()
+      require(keyPrefix != null, "The data source config key prefix can't be null.")
+
+      val pattern = Pattern.compile(s"^spark\\.datasource\\.$keyPrefix\\.(.+)")
+
+      conf.getAllConfs.flatMap { case (key, value) =>
+        val m = pattern.matcher(key)
+        if (m.matches() && m.groupCount() > 0) {
+          Seq((m.group(1), value))
+        } else {
+          Seq.empty
+        }
+      }
+
+    case _ => Map.empty
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownOperatorsToDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownOperatorsToDataSource.scala
index 0c1708131ae46..df034adf1e7d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownOperatorsToDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownOperatorsToDataSource.scala
@@ -40,12 +40,8 @@ object PushDownOperatorsToDataSource extends Rule[LogicalPlan] with PredicateHel
     // top-down, then we can simplify the logic here and only collect target operators.
     val filterPushed = plan transformUp {
       case FilterAndProject(fields, condition, r @ DataSourceV2Relation(_, reader)) =>
-        // Non-deterministic expressions are stateful and we must keep the input sequence unchanged
-        // to avoid changing the result. This means, we can't evaluate the filter conditions that
-        // are after the first non-deterministic condition ahead. Here we only try to push down
-        // deterministic conditions that are before the first non-deterministic condition.
-        val (candidates, containingNonDeterministic) =
-          splitConjunctivePredicates(condition).span(_.deterministic)
+        val (candidates, nonDeterministic) =
+          splitConjunctivePredicates(condition).partition(_.deterministic)
 
         val stayUpFilters: Seq[Expression] = reader match {
           case r: SupportsPushDownCatalystFilters =>
@@ -74,7 +70,7 @@ object PushDownOperatorsToDataSource extends Rule[LogicalPlan] with PredicateHel
           case _ => candidates
         }
 
-        val filterCondition = (stayUpFilters ++ containingNonDeterministic).reduceLeftOption(And)
+        val filterCondition = (stayUpFilters ++ nonDeterministic).reduceLeftOption(And)
         val withFilter = filterCondition.map(Filter(_, r)).getOrElse(r)
         if (withFilter.output == fields) {
           withFilter
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala
index b72d15ed15aed..f0bdf84bb7a84 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import org.apache.spark.{SparkException, TaskContext}
+import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Row
@@ -26,6 +26,9 @@ import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.streaming.StreamExecution
+import org.apache.spark.sql.execution.streaming.continuous.{CommitPartitionEpoch, ContinuousExecution, EpochCoordinatorRef, SetWriterPartitions}
+import org.apache.spark.sql.sources.v2.streaming.writer.ContinuousWriter
 import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
@@ -58,10 +61,22 @@ case class WriteToDataSourceV2Exec(writer: DataSourceV2Writer, query: SparkPlan)
       s"The input RDD has ${messages.length} partitions.")
 
     try {
+      val runTask = writer match {
+        case w: ContinuousWriter =>
+          EpochCoordinatorRef.get(
+            sparkContext.getLocalProperty(ContinuousExecution.RUN_ID_KEY), sparkContext.env)
+            .askSync[Unit](SetWriterPartitions(rdd.getNumPartitions))
+
+          (context: TaskContext, iter: Iterator[InternalRow]) =>
+            DataWritingSparkTask.runContinuous(writeTask, context, iter)
+        case _ =>
+          (context: TaskContext, iter: Iterator[InternalRow]) =>
+            DataWritingSparkTask.run(writeTask, context, iter)
+      }
+
       sparkContext.runJob(
         rdd,
-        (context: TaskContext, iter: Iterator[InternalRow]) =>
-          DataWritingSparkTask.run(writeTask, context, iter),
+        runTask,
         rdd.partitions.indices,
         (index, message: WriterCommitMessage) => messages(index) = message
       )
@@ -70,6 +85,8 @@ case class WriteToDataSourceV2Exec(writer: DataSourceV2Writer, query: SparkPlan)
       writer.commit(messages)
       logInfo(s"Data source writer $writer committed.")
     } catch {
+      case _: InterruptedException if writer.isInstanceOf[ContinuousWriter] =>
+        // Interruption is how continuous queries are ended, so accept and ignore the exception.
       case cause: Throwable =>
         logError(s"Data source writer $writer is aborting.")
         try {
@@ -109,6 +126,44 @@ object DataWritingSparkTask extends Logging {
       logError(s"Writer for partition ${context.partitionId()} aborted.")
     })
   }
+
+  def runContinuous(
+      writeTask: DataWriterFactory[InternalRow],
+      context: TaskContext,
+      iter: Iterator[InternalRow]): WriterCommitMessage = {
+    val dataWriter = writeTask.createDataWriter(context.partitionId(), context.attemptNumber())
+    val epochCoordinator = EpochCoordinatorRef.get(
+      context.getLocalProperty(ContinuousExecution.RUN_ID_KEY),
+      SparkEnv.get)
+    val currentMsg: WriterCommitMessage = null
+    var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong
+
+    do {
+      // write the data and commit this writer.
+      Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
+        try {
+          iter.foreach(dataWriter.write)
+          logInfo(s"Writer for partition ${context.partitionId()} is committing.")
+          val msg = dataWriter.commit()
+          logInfo(s"Writer for partition ${context.partitionId()} committed.")
+          epochCoordinator.send(
+            CommitPartitionEpoch(context.partitionId(), currentEpoch, msg)
+          )
+          currentEpoch += 1
+        } catch {
+          case _: InterruptedException =>
+            // Continuous shutdown always involves an interrupt. Just finish the task.
+        }
+      })(catchBlock = {
+        // If there is an error, abort this writer
+        logError(s"Writer for partition ${context.partitionId()} is aborting.")
+        dataWriter.abort()
+        logError(s"Writer for partition ${context.partitionId()} aborted.")
+      })
+    } while (!context.isInterrupted())
+
+    currentMsg
+  }
 }
 
 class InternalRowDataWriterFactory(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index 4e2ca37bc1a59..e3d28388c5470 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -17,10 +17,14 @@
 
 package org.apache.spark.sql.execution.exchange
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, ShuffledHashJoinExec,
+  SortMergeJoinExec}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -42,23 +46,6 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
     if (minNumPostShufflePartitions > 0) Some(minNumPostShufflePartitions) else None
   }
 
-  /**
-   * Given a required distribution, returns a partitioning that satisfies that distribution.
-   * @param requiredDistribution The distribution that is required by the operator
-   * @param numPartitions Used when the distribution doesn't require a specific number of partitions
-   */
-  private def createPartitioning(
-      requiredDistribution: Distribution,
-      numPartitions: Int): Partitioning = {
-    requiredDistribution match {
-      case AllTuples => SinglePartition
-      case ClusteredDistribution(clustering, desiredPartitions) =>
-        HashPartitioning(clustering, desiredPartitions.getOrElse(numPartitions))
-      case OrderedDistribution(ordering) => RangePartitioning(ordering, numPartitions)
-      case dist => sys.error(s"Do not know how to satisfy distribution $dist")
-    }
-  }
-
   /**
    * Adds [[ExchangeCoordinator]] to [[ShuffleExchangeExec]]s if adaptive query execution is enabled
    * and partitioning schemes of these [[ShuffleExchangeExec]]s support [[ExchangeCoordinator]].
@@ -84,8 +71,9 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
         // shuffle data when we have more than one children because data generated by
         // these children may not be partitioned in the same way.
         // Please see the comment in withCoordinator for more details.
-        val supportsDistribution =
-          requiredChildDistributions.forall(_.isInstanceOf[ClusteredDistribution])
+        val supportsDistribution = requiredChildDistributions.forall { dist =>
+          dist.isInstanceOf[ClusteredDistribution] || dist.isInstanceOf[HashClusteredDistribution]
+        }
         children.length > 1 && supportsDistribution
       }
 
@@ -138,8 +126,7 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
             //
             // It will be great to introduce a new Partitioning to represent the post-shuffle
             // partitions when one post-shuffle partition includes multiple pre-shuffle partitions.
-            val targetPartitioning =
-              createPartitioning(distribution, defaultNumPreShufflePartitions)
+            val targetPartitioning = distribution.createPartitioning(defaultNumPreShufflePartitions)
             assert(targetPartitioning.isInstanceOf[HashPartitioning])
             ShuffleExchangeExec(targetPartitioning, child, Some(coordinator))
         }
@@ -158,71 +145,56 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
     assert(requiredChildDistributions.length == children.length)
     assert(requiredChildOrderings.length == children.length)
 
-    // Ensure that the operator's children satisfy their output distribution requirements:
+    // Ensure that the operator's children satisfy their output distribution requirements.
     children = children.zip(requiredChildDistributions).map {
       case (child, distribution) if child.outputPartitioning.satisfies(distribution) =>
         child
       case (child, BroadcastDistribution(mode)) =>
         BroadcastExchangeExec(mode, child)
       case (child, distribution) =>
-        ShuffleExchangeExec(createPartitioning(distribution, defaultNumPreShufflePartitions), child)
+        val numPartitions = distribution.requiredNumPartitions
+          .getOrElse(defaultNumPreShufflePartitions)
+        ShuffleExchangeExec(distribution.createPartitioning(numPartitions), child)
     }
 
-    // If the operator has multiple children and specifies child output distributions (e.g. join),
-    // then the children's output partitionings must be compatible:
-    def requireCompatiblePartitioning(distribution: Distribution): Boolean = distribution match {
-      case UnspecifiedDistribution => false
-      case BroadcastDistribution(_) => false
+    // Get the indexes of children which have specified distribution requirements and need to have
+    // same number of partitions.
+    val childrenIndexes = requiredChildDistributions.zipWithIndex.filter {
+      case (UnspecifiedDistribution, _) => false
+      case (_: BroadcastDistribution, _) => false
       case _ => true
-    }
-    if (children.length > 1
-        && requiredChildDistributions.exists(requireCompatiblePartitioning)
-        && !Partitioning.allCompatible(children.map(_.outputPartitioning))) {
+    }.map(_._2)
+
+    val childrenNumPartitions =
+      childrenIndexes.map(children(_).outputPartitioning.numPartitions).toSet
 
-      // First check if the existing partitions of the children all match. This means they are
-      // partitioned by the same partitioning into the same number of partitions. In that case,
-      // don't try to make them match `defaultPartitions`, just use the existing partitioning.
-      val maxChildrenNumPartitions = children.map(_.outputPartitioning.numPartitions).max
-      val useExistingPartitioning = children.zip(requiredChildDistributions).forall {
-        case (child, distribution) =>
-          child.outputPartitioning.guarantees(
-            createPartitioning(distribution, maxChildrenNumPartitions))
+    if (childrenNumPartitions.size > 1) {
+      // Get the number of partitions which is explicitly required by the distributions.
+      val requiredNumPartitions = {
+        val numPartitionsSet = childrenIndexes.flatMap {
+          index => requiredChildDistributions(index).requiredNumPartitions
+        }.toSet
+        assert(numPartitionsSet.size <= 1,
+          s"$operator have incompatible requirements of the number of partitions for its children")
+        numPartitionsSet.headOption
       }
 
-      children = if (useExistingPartitioning) {
-        // We do not need to shuffle any child's output.
-        children
-      } else {
-        // We need to shuffle at least one child's output.
-        // Now, we will determine the number of partitions that will be used by created
-        // partitioning schemes.
-        val numPartitions = {
-          // Let's see if we need to shuffle all child's outputs when we use
-          // maxChildrenNumPartitions.
-          val shufflesAllChildren = children.zip(requiredChildDistributions).forall {
-            case (child, distribution) =>
-              !child.outputPartitioning.guarantees(
-                createPartitioning(distribution, maxChildrenNumPartitions))
-          }
-          // If we need to shuffle all children, we use defaultNumPreShufflePartitions as the
-          // number of partitions. Otherwise, we use maxChildrenNumPartitions.
-          if (shufflesAllChildren) defaultNumPreShufflePartitions else maxChildrenNumPartitions
-        }
+      val targetNumPartitions = requiredNumPartitions.getOrElse(childrenNumPartitions.max)
 
-        children.zip(requiredChildDistributions).map {
-          case (child, distribution) =>
-            val targetPartitioning = createPartitioning(distribution, numPartitions)
-            if (child.outputPartitioning.guarantees(targetPartitioning)) {
-              child
-            } else {
-              child match {
-                // If child is an exchange, we replace it with
-                // a new one having targetPartitioning.
-                case ShuffleExchangeExec(_, c, _) => ShuffleExchangeExec(targetPartitioning, c)
-                case _ => ShuffleExchangeExec(targetPartitioning, child)
-              }
+      children = children.zip(requiredChildDistributions).zipWithIndex.map {
+        case ((child, distribution), index) if childrenIndexes.contains(index) =>
+          if (child.outputPartitioning.numPartitions == targetNumPartitions) {
+            child
+          } else {
+            val defaultPartitioning = distribution.createPartitioning(targetNumPartitions)
+            child match {
+              // If child is an exchange, we replace it with a new one having defaultPartitioning.
+              case ShuffleExchangeExec(_, c, _) => ShuffleExchangeExec(defaultPartitioning, c)
+              case _ => ShuffleExchangeExec(defaultPartitioning, child)
+            }
           }
-        }
+
+        case ((child, _), _) => child
       }
     }
 
@@ -248,13 +220,85 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
     operator.withNewChildren(children)
   }
 
+  private def reorder(
+      leftKeys: Seq[Expression],
+      rightKeys: Seq[Expression],
+      expectedOrderOfKeys: Seq[Expression],
+      currentOrderOfKeys: Seq[Expression]): (Seq[Expression], Seq[Expression]) = {
+    val leftKeysBuffer = ArrayBuffer[Expression]()
+    val rightKeysBuffer = ArrayBuffer[Expression]()
+
+    expectedOrderOfKeys.foreach(expression => {
+      val index = currentOrderOfKeys.indexWhere(e => e.semanticEquals(expression))
+      leftKeysBuffer.append(leftKeys(index))
+      rightKeysBuffer.append(rightKeys(index))
+    })
+    (leftKeysBuffer, rightKeysBuffer)
+  }
+
+  private def reorderJoinKeys(
+      leftKeys: Seq[Expression],
+      rightKeys: Seq[Expression],
+      leftPartitioning: Partitioning,
+      rightPartitioning: Partitioning): (Seq[Expression], Seq[Expression]) = {
+    if (leftKeys.forall(_.deterministic) && rightKeys.forall(_.deterministic)) {
+      leftPartitioning match {
+        case HashPartitioning(leftExpressions, _)
+          if leftExpressions.length == leftKeys.length &&
+            leftKeys.forall(x => leftExpressions.exists(_.semanticEquals(x))) =>
+          reorder(leftKeys, rightKeys, leftExpressions, leftKeys)
+
+        case _ => rightPartitioning match {
+          case HashPartitioning(rightExpressions, _)
+            if rightExpressions.length == rightKeys.length &&
+              rightKeys.forall(x => rightExpressions.exists(_.semanticEquals(x))) =>
+            reorder(leftKeys, rightKeys, rightExpressions, rightKeys)
+
+          case _ => (leftKeys, rightKeys)
+        }
+      }
+    } else {
+      (leftKeys, rightKeys)
+    }
+  }
+
+  /**
+   * When the physical operators are created for JOIN, the ordering of join keys is based on order
+   * in which the join keys appear in the user query. That might not match with the output
+   * partitioning of the join node's children (thus leading to extra sort / shuffle being
+   * introduced). This rule will change the ordering of the join keys to match with the
+   * partitioning of the join nodes' children.
+   */
+  private def reorderJoinPredicates(plan: SparkPlan): SparkPlan = {
+    plan.transformUp {
+      case BroadcastHashJoinExec(leftKeys, rightKeys, joinType, buildSide, condition, left,
+        right) =>
+        val (reorderedLeftKeys, reorderedRightKeys) =
+          reorderJoinKeys(leftKeys, rightKeys, left.outputPartitioning, right.outputPartitioning)
+        BroadcastHashJoinExec(reorderedLeftKeys, reorderedRightKeys, joinType, buildSide, condition,
+          left, right)
+
+      case ShuffledHashJoinExec(leftKeys, rightKeys, joinType, buildSide, condition, left, right) =>
+        val (reorderedLeftKeys, reorderedRightKeys) =
+          reorderJoinKeys(leftKeys, rightKeys, left.outputPartitioning, right.outputPartitioning)
+        ShuffledHashJoinExec(reorderedLeftKeys, reorderedRightKeys, joinType, buildSide, condition,
+          left, right)
+
+      case SortMergeJoinExec(leftKeys, rightKeys, joinType, condition, left, right) =>
+        val (reorderedLeftKeys, reorderedRightKeys) =
+          reorderJoinKeys(leftKeys, rightKeys, left.outputPartitioning, right.outputPartitioning)
+        SortMergeJoinExec(reorderedLeftKeys, reorderedRightKeys, joinType, condition, left, right)
+    }
+  }
+
   def apply(plan: SparkPlan): SparkPlan = plan.transformUp {
-    case operator @ ShuffleExchangeExec(partitioning, child, _) =>
-      child.children match {
-        case ShuffleExchangeExec(childPartitioning, baseChild, _)::Nil =>
-          if (childPartitioning.guarantees(partitioning)) child else operator
+    // TODO: remove this after we create a physical operator for `RepartitionByExpression`.
+    case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, _) =>
+      child.outputPartitioning match {
+        case lower: HashPartitioning if upper.semanticEquals(lower) => child
         case _ => operator
       }
-    case operator: SparkPlan => ensureDistributionAndOrdering(operator)
+    case operator: SparkPlan =>
+      ensureDistributionAndOrdering(reorderJoinPredicates(operator))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
index c96ed6ef41016..1918fcc5482db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
@@ -134,19 +134,18 @@ case class BroadcastHashJoinExec(
     // create a name for HashedRelation
     val broadcastRelation = buildPlan.executeBroadcast[HashedRelation]()
     val broadcast = ctx.addReferenceObj("broadcast", broadcastRelation)
-    val relationTerm = ctx.freshName("relation")
     val clsName = broadcastRelation.value.getClass.getName
 
     // At the end of the task, we update the avg hash probe.
     val avgHashProbe = metricTerm(ctx, "avgHashProbe")
-    val addTaskListener = genTaskListener(avgHashProbe, relationTerm)
 
-    ctx.addMutableState(clsName, relationTerm,
-      s"""
-         | $relationTerm = (($clsName) $broadcast.value()).asReadOnlyCopy();
-         | incPeakExecutionMemory($relationTerm.estimatedSize());
-         | $addTaskListener
-       """.stripMargin)
+    // Inline mutable state since not many join operations in a task
+    val relationTerm = ctx.addMutableState(clsName, "relation",
+      v => s"""
+         | $v = (($clsName) $broadcast.value()).asReadOnlyCopy();
+         | incPeakExecutionMemory($v.estimatedSize());
+         | ${genTaskListener(avgHashProbe, v)}
+       """.stripMargin, forceInline = true)
     (broadcastRelation, relationTerm)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index d98cf852a1b48..1465346eb802d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -368,7 +368,7 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
   // The minimum key
   private var minKey = Long.MaxValue
 
-  // The maxinum key
+  // The maximum key
   private var maxKey = Long.MinValue
 
   // The array to store the key and offset of UnsafeRow in the page.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ReorderJoinPredicates.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ReorderJoinPredicates.scala
deleted file mode 100644
index 534d8c5689c27..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ReorderJoinPredicates.scala
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.joins
-
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning}
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.SparkPlan
-
-/**
- * When the physical operators are created for JOIN, the ordering of join keys is based on order
- * in which the join keys appear in the user query. That might not match with the output
- * partitioning of the join node's children (thus leading to extra sort / shuffle being
- * introduced). This rule will change the ordering of the join keys to match with the
- * partitioning of the join nodes' children.
- */
-class ReorderJoinPredicates extends Rule[SparkPlan] {
-  private def reorderJoinKeys(
-      leftKeys: Seq[Expression],
-      rightKeys: Seq[Expression],
-      leftPartitioning: Partitioning,
-      rightPartitioning: Partitioning): (Seq[Expression], Seq[Expression]) = {
-
-    def reorder(
-        expectedOrderOfKeys: Seq[Expression],
-        currentOrderOfKeys: Seq[Expression]): (Seq[Expression], Seq[Expression]) = {
-      val leftKeysBuffer = ArrayBuffer[Expression]()
-      val rightKeysBuffer = ArrayBuffer[Expression]()
-
-      expectedOrderOfKeys.foreach(expression => {
-        val index = currentOrderOfKeys.indexWhere(e => e.semanticEquals(expression))
-        leftKeysBuffer.append(leftKeys(index))
-        rightKeysBuffer.append(rightKeys(index))
-      })
-      (leftKeysBuffer, rightKeysBuffer)
-    }
-
-    if (leftKeys.forall(_.deterministic) && rightKeys.forall(_.deterministic)) {
-      leftPartitioning match {
-        case HashPartitioning(leftExpressions, _)
-          if leftExpressions.length == leftKeys.length &&
-            leftKeys.forall(x => leftExpressions.exists(_.semanticEquals(x))) =>
-          reorder(leftExpressions, leftKeys)
-
-        case _ => rightPartitioning match {
-          case HashPartitioning(rightExpressions, _)
-            if rightExpressions.length == rightKeys.length &&
-              rightKeys.forall(x => rightExpressions.exists(_.semanticEquals(x))) =>
-            reorder(rightExpressions, rightKeys)
-
-          case _ => (leftKeys, rightKeys)
-        }
-      }
-    } else {
-      (leftKeys, rightKeys)
-    }
-  }
-
-  def apply(plan: SparkPlan): SparkPlan = plan.transformUp {
-    case BroadcastHashJoinExec(leftKeys, rightKeys, joinType, buildSide, condition, left, right) =>
-      val (reorderedLeftKeys, reorderedRightKeys) =
-        reorderJoinKeys(leftKeys, rightKeys, left.outputPartitioning, right.outputPartitioning)
-      BroadcastHashJoinExec(reorderedLeftKeys, reorderedRightKeys, joinType, buildSide, condition,
-        left, right)
-
-    case ShuffledHashJoinExec(leftKeys, rightKeys, joinType, buildSide, condition, left, right) =>
-      val (reorderedLeftKeys, reorderedRightKeys) =
-        reorderJoinKeys(leftKeys, rightKeys, left.outputPartitioning, right.outputPartitioning)
-      ShuffledHashJoinExec(reorderedLeftKeys, reorderedRightKeys, joinType, buildSide, condition,
-        left, right)
-
-    case SortMergeJoinExec(leftKeys, rightKeys, joinType, condition, left, right) =>
-      val (reorderedLeftKeys, reorderedRightKeys) =
-        reorderJoinKeys(leftKeys, rightKeys, left.outputPartitioning, right.outputPartitioning)
-      SortMergeJoinExec(reorderedLeftKeys, reorderedRightKeys, joinType, condition, left, right)
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
index 66e8031bb5191..897a4dae39f32 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoinExec.scala
@@ -46,7 +46,7 @@ case class ShuffledHashJoinExec(
     "avgHashProbe" -> SQLMetrics.createAverageMetric(sparkContext, "avg hash probe"))
 
   override def requiredChildDistribution: Seq[Distribution] =
-    ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
+    HashClusteredDistribution(leftKeys) :: HashClusteredDistribution(rightKeys) :: Nil
 
   private def buildHashedRelation(iter: Iterator[InternalRow]): HashedRelation = {
     val buildDataSize = longMetric("buildDataSize")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 9c08ec71c1fde..2de2f30eb05d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -78,7 +78,7 @@ case class SortMergeJoinExec(
   }
 
   override def requiredChildDistribution: Seq[Distribution] =
-    ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
+    HashClusteredDistribution(leftKeys) :: HashClusteredDistribution(rightKeys) :: Nil
 
   override def outputOrdering: Seq[SortOrder] = joinType match {
     // For inner join, orders of both sides keys should be kept.
@@ -422,10 +422,9 @@ case class SortMergeJoinExec(
    */
   private def genScanner(ctx: CodegenContext): (String, String) = {
     // Create class member for next row from both sides.
-    val leftRow = ctx.freshName("leftRow")
-    ctx.addMutableState("InternalRow", leftRow)
-    val rightRow = ctx.freshName("rightRow")
-    ctx.addMutableState("InternalRow", rightRow, s"$rightRow = null;")
+    // Inline mutable state since not many join operations in a task
+    val leftRow = ctx.addMutableState("InternalRow", "leftRow", forceInline = true)
+    val rightRow = ctx.addMutableState("InternalRow", "rightRow", forceInline = true)
 
     // Create variables for join keys from both sides.
     val leftKeyVars = createJoinKey(ctx, leftRow, leftKeys, left.output)
@@ -436,14 +435,14 @@ case class SortMergeJoinExec(
     val rightKeyVars = copyKeys(ctx, rightKeyTmpVars)
 
     // A list to hold all matched rows from right side.
-    val matches = ctx.freshName("matches")
     val clsName = classOf[ExternalAppendOnlyUnsafeRowArray].getName
 
     val spillThreshold = getSpillThreshold
     val inMemoryThreshold = getInMemoryThreshold
 
-    ctx.addMutableState(clsName, matches,
-      s"$matches = new $clsName($inMemoryThreshold, $spillThreshold);")
+    // Inline mutable state since not many join operations in a task
+    val matches = ctx.addMutableState(clsName, "matches",
+      v => s"$v = new $clsName($inMemoryThreshold, $spillThreshold);", forceInline = true)
     // Copy the left keys as class members so they could be used in next function call.
     val matchedKeyVars = copyKeys(ctx, leftKeyVars)
 
@@ -507,32 +506,38 @@ case class SortMergeJoinExec(
   }
 
   /**
-   * Creates variables for left part of result row.
+   * Creates variables and declarations for left part of result row.
    *
    * In order to defer the access after condition and also only access once in the loop,
    * the variables should be declared separately from accessing the columns, we can't use the
    * codegen of BoundReference here.
    */
-  private def createLeftVars(ctx: CodegenContext, leftRow: String): Seq[ExprCode] = {
+  private def createLeftVars(ctx: CodegenContext, leftRow: String): (Seq[ExprCode], Seq[String]) = {
     ctx.INPUT_ROW = leftRow
     left.output.zipWithIndex.map { case (a, i) =>
       val value = ctx.freshName("value")
       val valueCode = ctx.getValue(leftRow, a.dataType, i.toString)
-      // declare it as class member, so we can access the column before or in the loop.
-      ctx.addMutableState(ctx.javaType(a.dataType), value)
+      val javaType = ctx.javaType(a.dataType)
+      val defaultValue = ctx.defaultValue(a.dataType)
       if (a.nullable) {
         val isNull = ctx.freshName("isNull")
-        ctx.addMutableState(ctx.JAVA_BOOLEAN, isNull)
         val code =
           s"""
              |$isNull = $leftRow.isNullAt($i);
-             |$value = $isNull ? ${ctx.defaultValue(a.dataType)} : ($valueCode);
+             |$value = $isNull ? $defaultValue : ($valueCode);
+           """.stripMargin
+        val leftVarsDecl =
+          s"""
+             |boolean $isNull = false;
+             |$javaType $value = $defaultValue;
            """.stripMargin
-        ExprCode(code, isNull, value)
+        (ExprCode(code, isNull, value), leftVarsDecl)
       } else {
-        ExprCode(s"$value = $valueCode;", "false", value)
+        val code = s"$value = $valueCode;"
+        val leftVarsDecl = s"""$javaType $value = $defaultValue;"""
+        (ExprCode(code, "false", value), leftVarsDecl)
       }
-    }
+    }.unzip
   }
 
   /**
@@ -572,15 +577,16 @@ case class SortMergeJoinExec(
   override def needCopyResult: Boolean = true
 
   override def doProduce(ctx: CodegenContext): String = {
-    val leftInput = ctx.freshName("leftInput")
-    ctx.addMutableState("scala.collection.Iterator", leftInput, s"$leftInput = inputs[0];")
-    val rightInput = ctx.freshName("rightInput")
-    ctx.addMutableState("scala.collection.Iterator", rightInput, s"$rightInput = inputs[1];")
+    // Inline mutable state since not many join operations in a task
+    val leftInput = ctx.addMutableState("scala.collection.Iterator", "leftInput",
+      v => s"$v = inputs[0];", forceInline = true)
+    val rightInput = ctx.addMutableState("scala.collection.Iterator", "rightInput",
+      v => s"$v = inputs[1];", forceInline = true)
 
     val (leftRow, matches) = genScanner(ctx)
 
     // Create variables for row from both sides.
-    val leftVars = createLeftVars(ctx, leftRow)
+    val (leftVars, leftVarDecl) = createLeftVars(ctx, leftRow)
     val rightRow = ctx.freshName("rightRow")
     val rightVars = createRightVar(ctx, rightRow)
 
@@ -617,6 +623,7 @@ case class SortMergeJoinExec(
 
     s"""
        |while (findNextInnerJoinRows($leftInput, $rightInput)) {
+       |  ${leftVarDecl.mkString("\n")}
        |  ${beforeLoop.trim}
        |  scala.collection.Iterator<UnsafeRow> $iterator = $matches.generateIterator();
        |  while ($iterator.hasNext()) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 443b6898aaf75..8b24cad6cc597 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -74,8 +74,7 @@ trait BaseLimitExec extends UnaryExecNode with CodegenSupport {
   }
 
   override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
-    val stopEarly = ctx.freshName("stopEarly")
-    ctx.addMutableState(ctx.JAVA_BOOLEAN, stopEarly, s"$stopEarly = false;")
+    val stopEarly = ctx.addMutableState(ctx.JAVA_BOOLEAN, "stopEarly") // init as stopEarly = false
 
     ctx.addNewFunction("stopEarly", s"""
       @Override
@@ -83,8 +82,7 @@ trait BaseLimitExec extends UnaryExecNode with CodegenSupport {
         return $stopEarly;
       }
     """, inlineToOuterClass = true)
-    val countTerm = ctx.freshName("count")
-    ctx.addMutableState(ctx.JAVA_INT, countTerm, s"$countTerm = 0;")
+    val countTerm = ctx.addMutableState(ctx.JAVA_INT, "count") // init as count = 0
     s"""
        | if ($countTerm < $limit) {
        |   $countTerm += 1;
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
index 32d9ea1871c3d..351acb202861b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
@@ -462,7 +462,7 @@ case class CoGroupExec(
     right: SparkPlan) extends BinaryExecNode with ObjectProducerExec {
 
   override def requiredChildDistribution: Seq[Distribution] =
-    ClusteredDistribution(leftGroup) :: ClusteredDistribution(rightGroup) :: Nil
+    HashClusteredDistribution(leftGroup) :: HashClusteredDistribution(rightGroup) :: Nil
 
   override def requiredChildOrdering: Seq[Seq[SortOrder]] =
     leftGroup.map(SortOrder(_, Ascending)) :: rightGroup.map(SortOrder(_, Ascending)) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
index 9a94d771a01b0..dc5ba96e69aec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
@@ -24,14 +24,14 @@ import java.util.concurrent.atomic.AtomicBoolean
 import scala.collection.JavaConverters._
 
 import org.apache.arrow.vector.VectorSchemaRoot
-import org.apache.arrow.vector.stream.{ArrowStreamReader, ArrowStreamWriter}
+import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter}
 
 import org.apache.spark._
 import org.apache.spark.api.python._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.arrow.{ArrowUtils, ArrowWriter}
-import org.apache.spark.sql.execution.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
 import org.apache.spark.util.Utils
 
 /**
@@ -74,13 +74,9 @@ class ArrowPythonRunner(
         val root = VectorSchemaRoot.create(arrowSchema, allocator)
         val arrowWriter = ArrowWriter.create(root)
 
-        var closed = false
-
         context.addTaskCompletionListener { _ =>
-          if (!closed) {
-            root.close()
-            allocator.close()
-          }
+          root.close()
+          allocator.close()
         }
 
         val writer = new ArrowStreamWriter(root, null, dataOut)
@@ -102,7 +98,6 @@ class ArrowPythonRunner(
           writer.end()
           root.close()
           allocator.close()
-          closed = true
         }
       }
     }
@@ -126,18 +121,11 @@ class ArrowPythonRunner(
       private var schema: StructType = _
       private var vectors: Array[ColumnVector] = _
 
-      private var closed = false
-
       context.addTaskCompletionListener { _ =>
-        // todo: we need something like `reader.end()`, which release all the resources, but leave
-        // the input stream open. `reader.close()` will close the socket and we can't reuse worker.
-        // So here we simply not close the reader, which is problematic.
-        if (!closed) {
-          if (root != null) {
-            root.close()
-          }
-          allocator.close()
+        if (reader != null) {
+          reader.close(false)
         }
+        allocator.close()
       }
 
       private var batchLoaded = true
@@ -154,9 +142,8 @@ class ArrowPythonRunner(
               batch.setNumRows(root.getRowCount)
               batch
             } else {
-              root.close()
+              reader.close(false)
               allocator.close()
-              closed = true
               // Reach end of stream. Call `read()` again to read control data.
               read()
             }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
index 26ee25f633ea4..f4d83e8dc7c2b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
@@ -79,16 +79,19 @@ case class BatchEvalPythonExec(udfs: Seq[PythonUDF], output: Seq[Attribute], chi
     } else {
       StructType(udfs.map(u => StructField("", u.dataType, u.nullable)))
     }
+
+    val fromJava = EvaluatePython.makeFromJava(resultType)
+
     outputIterator.flatMap { pickedResult =>
       val unpickledBatch = unpickle.loads(pickedResult)
       unpickledBatch.asInstanceOf[java.util.ArrayList[Any]].asScala
     }.map { result =>
       if (udfs.length == 1) {
         // fast path for single UDF
-        mutableRow(0) = EvaluatePython.fromJava(result, resultType)
+        mutableRow(0) = fromJava(result)
         mutableRow
       } else {
-        EvaluatePython.fromJava(result, resultType).asInstanceOf[InternalRow]
+        fromJava(result).asInstanceOf[InternalRow]
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
index 9bbfa6018ba77..520afad287648 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
@@ -83,82 +83,134 @@ object EvaluatePython {
   }
 
   /**
-   * Converts `obj` to the type specified by the data type, or returns null if the type of obj is
-   * unexpected. Because Python doesn't enforce the type.
+   * Make a converter that converts `obj` to the type specified by the data type, or returns
+   * null if the type of obj is unexpected. Because Python doesn't enforce the type.
    */
-  def fromJava(obj: Any, dataType: DataType): Any = (obj, dataType) match {
-    case (null, _) => null
-
-    case (c: Boolean, BooleanType) => c
+  def makeFromJava(dataType: DataType): Any => Any = dataType match {
+    case BooleanType => (obj: Any) => nullSafeConvert(obj) {
+      case b: Boolean => b
+    }
 
-    case (c: Byte, ByteType) => c
-    case (c: Short, ByteType) => c.toByte
-    case (c: Int, ByteType) => c.toByte
-    case (c: Long, ByteType) => c.toByte
+    case ByteType => (obj: Any) => nullSafeConvert(obj) {
+      case c: Byte => c
+      case c: Short => c.toByte
+      case c: Int => c.toByte
+      case c: Long => c.toByte
+    }
 
-    case (c: Byte, ShortType) => c.toShort
-    case (c: Short, ShortType) => c
-    case (c: Int, ShortType) => c.toShort
-    case (c: Long, ShortType) => c.toShort
+    case ShortType => (obj: Any) => nullSafeConvert(obj) {
+      case c: Byte => c.toShort
+      case c: Short => c
+      case c: Int => c.toShort
+      case c: Long => c.toShort
+    }
 
-    case (c: Byte, IntegerType) => c.toInt
-    case (c: Short, IntegerType) => c.toInt
-    case (c: Int, IntegerType) => c
-    case (c: Long, IntegerType) => c.toInt
+    case IntegerType => (obj: Any) => nullSafeConvert(obj) {
+      case c: Byte => c.toInt
+      case c: Short => c.toInt
+      case c: Int => c
+      case c: Long => c.toInt
+    }
 
-    case (c: Byte, LongType) => c.toLong
-    case (c: Short, LongType) => c.toLong
-    case (c: Int, LongType) => c.toLong
-    case (c: Long, LongType) => c
+    case LongType => (obj: Any) => nullSafeConvert(obj) {
+      case c: Byte => c.toLong
+      case c: Short => c.toLong
+      case c: Int => c.toLong
+      case c: Long => c
+    }
 
-    case (c: Float, FloatType) => c
-    case (c: Double, FloatType) => c.toFloat
+    case FloatType => (obj: Any) => nullSafeConvert(obj) {
+      case c: Float => c
+      case c: Double => c.toFloat
+    }
 
-    case (c: Float, DoubleType) => c.toDouble
-    case (c: Double, DoubleType) => c
+    case DoubleType => (obj: Any) => nullSafeConvert(obj) {
+      case c: Float => c.toDouble
+      case c: Double => c
+    }
 
-    case (c: java.math.BigDecimal, dt: DecimalType) => Decimal(c, dt.precision, dt.scale)
+    case dt: DecimalType => (obj: Any) => nullSafeConvert(obj) {
+      case c: java.math.BigDecimal => Decimal(c, dt.precision, dt.scale)
+    }
 
-    case (c: Int, DateType) => c
+    case DateType => (obj: Any) => nullSafeConvert(obj) {
+      case c: Int => c
+    }
 
-    case (c: Long, TimestampType) => c
-    // Py4J serializes values between MIN_INT and MAX_INT as Ints, not Longs
-    case (c: Int, TimestampType) => c.toLong
+    case TimestampType => (obj: Any) => nullSafeConvert(obj) {
+      case c: Long => c
+      // Py4J serializes values between MIN_INT and MAX_INT as Ints, not Longs
+      case c: Int => c.toLong
+    }
 
-    case (c, StringType) => UTF8String.fromString(c.toString)
+    case StringType => (obj: Any) => nullSafeConvert(obj) {
+      case _ => UTF8String.fromString(obj.toString)
+    }
 
-    case (c: String, BinaryType) => c.getBytes(StandardCharsets.UTF_8)
-    case (c, BinaryType) if c.getClass.isArray && c.getClass.getComponentType.getName == "byte" => c
+    case BinaryType => (obj: Any) => nullSafeConvert(obj) {
+      case c: String => c.getBytes(StandardCharsets.UTF_8)
+      case c if c.getClass.isArray && c.getClass.getComponentType.getName == "byte" => c
+    }
 
-    case (c: java.util.List[_], ArrayType(elementType, _)) =>
-      new GenericArrayData(c.asScala.map { e => fromJava(e, elementType)}.toArray)
+    case ArrayType(elementType, _) =>
+      val elementFromJava = makeFromJava(elementType)
 
-    case (c, ArrayType(elementType, _)) if c.getClass.isArray =>
-      new GenericArrayData(c.asInstanceOf[Array[_]].map(e => fromJava(e, elementType)))
+      (obj: Any) => nullSafeConvert(obj) {
+        case c: java.util.List[_] =>
+          new GenericArrayData(c.asScala.map { e => elementFromJava(e) }.toArray)
+        case c if c.getClass.isArray =>
+          new GenericArrayData(c.asInstanceOf[Array[_]].map(e => elementFromJava(e)))
+      }
 
-    case (javaMap: java.util.Map[_, _], MapType(keyType, valueType, _)) =>
-      ArrayBasedMapData(
-        javaMap,
-        (key: Any) => fromJava(key, keyType),
-        (value: Any) => fromJava(value, valueType))
+    case MapType(keyType, valueType, _) =>
+      val keyFromJava = makeFromJava(keyType)
+      val valueFromJava = makeFromJava(valueType)
+
+      (obj: Any) => nullSafeConvert(obj) {
+        case javaMap: java.util.Map[_, _] =>
+          ArrayBasedMapData(
+            javaMap,
+            (key: Any) => keyFromJava(key),
+            (value: Any) => valueFromJava(value))
+      }
 
-    case (c, StructType(fields)) if c.getClass.isArray =>
-      val array = c.asInstanceOf[Array[_]]
-      if (array.length != fields.length) {
-        throw new IllegalStateException(
-          s"Input row doesn't have expected number of values required by the schema. " +
-            s"${fields.length} fields are required while ${array.length} values are provided."
-        )
+    case StructType(fields) =>
+      val fieldsFromJava = fields.map(f => makeFromJava(f.dataType)).toArray
+
+      (obj: Any) => nullSafeConvert(obj) {
+        case c if c.getClass.isArray =>
+          val array = c.asInstanceOf[Array[_]]
+          if (array.length != fields.length) {
+            throw new IllegalStateException(
+              s"Input row doesn't have expected number of values required by the schema. " +
+                s"${fields.length} fields are required while ${array.length} values are provided."
+            )
+          }
+
+          val row = new GenericInternalRow(fields.length)
+          var i = 0
+          while (i < fields.length) {
+            row(i) = fieldsFromJava(i)(array(i))
+            i += 1
+          }
+          row
       }
-      new GenericInternalRow(array.zip(fields).map {
-        case (e, f) => fromJava(e, f.dataType)
-      })
 
-    case (_, udt: UserDefinedType[_]) => fromJava(obj, udt.sqlType)
+    case udt: UserDefinedType[_] => makeFromJava(udt.sqlType)
+
+    case other => (obj: Any) => nullSafeConvert(other)(PartialFunction.empty)
+  }
 
-    // all other unexpected type should be null, or we will have runtime exception
-    // TODO(davies): we could improve this by try to cast the object to expected type
-    case (c, _) => null
+  private def nullSafeConvert(input: Any)(f: PartialFunction[Any, Any]): Any = {
+    if (input == null) {
+      null
+    } else {
+      f.applyOrElse(input, {
+        // all other unexpected type should be null, or we will have runtime exception
+        // TODO(davies): we could improve this by try to cast the object to expected type
+        _: Any => null
+      })
+    }
   }
 
   private val module = "pyspark.sql.types"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
index f5a4cbc4793e3..2f53fe788c7d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
@@ -202,12 +202,12 @@ object ExtractPythonUDFs extends Rule[SparkPlan] with PredicateHelper {
   private def trySplitFilter(plan: SparkPlan): SparkPlan = {
     plan match {
       case filter: FilterExec =>
-        val (candidates, containingNonDeterministic) =
-          splitConjunctivePredicates(filter.condition).span(_.deterministic)
+        val (candidates, nonDeterministic) =
+          splitConjunctivePredicates(filter.condition).partition(_.deterministic)
         val (pushDown, rest) = candidates.partition(!hasPythonUDF(_))
         if (pushDown.nonEmpty) {
           val newChild = FilterExec(pushDown.reduceLeft(And), filter.child)
-          FilterExec((rest ++ containingNonDeterministic).reduceLeft(And), newChild)
+          FilterExec((rest ++ nonDeterministic).reduceLeft(And), newChild)
         } else {
           filter
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDF.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDF.scala
index ef27fbc2db7d9..d3f743d9eb61e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDF.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDF.scala
@@ -29,9 +29,12 @@ case class PythonUDF(
     func: PythonFunction,
     dataType: DataType,
     children: Seq[Expression],
-    evalType: Int)
+    evalType: Int,
+    udfDeterministic: Boolean)
   extends Expression with Unevaluable with NonSQLExpression with UserDefinedExpression {
 
+  override lazy val deterministic: Boolean = udfDeterministic && children.forall(_.deterministic)
+
   override def toString: String = s"$name(${children.mkString(", ")})"
 
   override def nullable: Boolean = true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala
index 348e49e473ed3..50dca32cb7861 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala
@@ -29,10 +29,11 @@ case class UserDefinedPythonFunction(
     name: String,
     func: PythonFunction,
     dataType: DataType,
-    pythonEvalType: Int) {
+    pythonEvalType: Int,
+    udfDeterministic: Boolean) {
 
   def builder(e: Seq[Expression]): PythonUDF = {
-    PythonUDF(name, func, dataType, e, pythonEvalType)
+    PythonUDF(name, func, dataType, e, pythonEvalType, udfDeterministic)
   }
 
   /** Returns a [[Column]] that will evaluate to calling this UDF with the given input. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java
new file mode 100644
index 0000000000000..ac96c2765368f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSink.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming;
+
+/**
+ * The shared interface between V1 and V2 streaming sinks.
+ *
+ * This is a temporary interface for compatibility during migration. It should not be implemented
+ * directly, and will be removed in future versions.
+ */
+public interface BaseStreamingSink {
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java
new file mode 100644
index 0000000000000..c44b8af2552f0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BaseStreamingSource.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming;
+
+/**
+ * The shared interface between V1 streaming sources and V2 streaming readers.
+ *
+ * This is a temporary interface for compatibility during migration. It should not be implemented
+ * directly, and will be removed in future versions.
+ */
+public interface BaseStreamingSource {
+  /** Stop this source and free any resources it has allocated. */
+  void stop();
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BatchCommitLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CommitLog.scala
similarity index 93%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BatchCommitLog.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CommitLog.scala
index 5e24e8fc4e3cc..5b114242558dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/BatchCommitLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CommitLog.scala
@@ -42,10 +42,10 @@ import org.apache.spark.sql.SparkSession
  * line 1: version
  * line 2: metadata (optional json string)
  */
-class BatchCommitLog(sparkSession: SparkSession, path: String)
+class CommitLog(sparkSession: SparkSession, path: String)
   extends HDFSMetadataLog[String](sparkSession, path) {
 
-  import BatchCommitLog._
+  import CommitLog._
 
   def add(batchId: Long): Unit = {
     super.add(batchId, EMPTY_JSON)
@@ -53,7 +53,7 @@ class BatchCommitLog(sparkSession: SparkSession, path: String)
 
   override def add(batchId: Long, metadata: String): Boolean = {
     throw new UnsupportedOperationException(
-      "BatchCommitLog does not take any metadata, use 'add(batchId)' instead")
+      "CommitLog does not take any metadata, use 'add(batchId)' instead")
   }
 
   override protected def deserialize(in: InputStream): String = {
@@ -76,7 +76,7 @@ class BatchCommitLog(sparkSession: SparkSession, path: String)
   }
 }
 
-object BatchCommitLog {
+object CommitLog {
   private val VERSION = 1
   private val EMPTY_JSON = "{}"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index 6bd0696622005..2715fa93d0e98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -118,13 +118,14 @@ class FileStreamSink(
           throw new RuntimeException(s"Partition column $col not found in schema ${data.schema}")
         }
       }
+      val qe = data.queryExecution
 
       FileFormatWriter.write(
         sparkSession = sparkSession,
-        queryExecution = data.queryExecution,
+        plan = qe.executedPlan,
         fileFormat = fileFormat,
         committer = committer,
-        outputSpec = FileFormatWriter.OutputSpec(path, Map.empty),
+        outputSpec = FileFormatWriter.OutputSpec(path, Map.empty, qe.analyzed.output),
         hadoopConf = hadoopConf,
         partitionColumns = partitionColumns,
         bucketSpec = None,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceOffset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceOffset.scala
index 06d0fe6c18c1e..a2b49d944a688 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceOffset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceOffset.scala
@@ -24,6 +24,7 @@ import org.json4s.jackson.Serialization
 
 /**
  * Offset for the [[FileStreamSource]].
+ *
  * @param logOffset  Position in the [[FileStreamSourceLog]]
  */
 case class FileStreamSourceOffset(logOffset: Long) extends Offset {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index 29f38fab3f896..80769d728b8f1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -23,8 +23,10 @@ import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Attribut
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution}
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.streaming.GroupStateImpl.NO_TIMESTAMP
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode}
+import org.apache.spark.sql.types.IntegerType
 import org.apache.spark.util.CompletionIterator
 
 /**
@@ -60,8 +62,27 @@ case class FlatMapGroupsWithStateExec(
   import GroupStateImpl._
 
   private val isTimeoutEnabled = timeoutConf != NoTimeout
-  val stateManager = new FlatMapGroupsWithState_StateManager(stateEncoder, isTimeoutEnabled)
-  val watermarkPresent = child.output.exists {
+  private val timestampTimeoutAttribute =
+    AttributeReference("timeoutTimestamp", dataType = IntegerType, nullable = false)()
+  private val stateAttributes: Seq[Attribute] = {
+    val encSchemaAttribs = stateEncoder.schema.toAttributes
+    if (isTimeoutEnabled) encSchemaAttribs :+ timestampTimeoutAttribute else encSchemaAttribs
+  }
+  // Get the serializer for the state, taking into account whether we need to save timestamps
+  private val stateSerializer = {
+    val encoderSerializer = stateEncoder.namedExpressions
+    if (isTimeoutEnabled) {
+      encoderSerializer :+ Literal(GroupStateImpl.NO_TIMESTAMP)
+    } else {
+      encoderSerializer
+    }
+  }
+  // Get the deserializer for the state. Note that this must be done in the driver, as
+  // resolving and binding of deserializer expressions to the encoded type can be safely done
+  // only in the driver.
+  private val stateDeserializer = stateEncoder.resolveAndBind().deserializer
+
+  private val watermarkPresent = child.output.exists {
     case a: Attribute if a.metadata.contains(EventTimeWatermark.delayKey) => true
     case _ => false
   }
@@ -92,11 +113,11 @@ case class FlatMapGroupsWithStateExec(
     child.execute().mapPartitionsWithStateStore[InternalRow](
       getStateInfo,
       groupingAttributes.toStructType,
-      stateManager.stateSchema,
+      stateAttributes.toStructType,
       indexOrdinal = None,
       sqlContext.sessionState,
       Some(sqlContext.streams.stateStoreCoordinator)) { case (store, iter) =>
-        val processor = new InputProcessor(store)
+        val updater = new StateStoreUpdater(store)
 
         // If timeout is based on event time, then filter late data based on watermark
         val filteredIter = watermarkPredicateForData match {
@@ -111,7 +132,7 @@ case class FlatMapGroupsWithStateExec(
         // all the data has been processed. This is to ensure that the timeout information of all
         // the keys with data is updated before they are processed for timeouts.
         val outputIterator =
-          processor.processNewData(filteredIter) ++ processor.processTimedOutState()
+          updater.updateStateForKeysWithData(filteredIter) ++ updater.updateStateForTimedOutKeys()
 
         // Return an iterator of all the rows generated by all the keys, such that when fully
         // consumed, all the state updates will be committed by the state store
@@ -126,7 +147,7 @@ case class FlatMapGroupsWithStateExec(
   }
 
   /** Helper class to update the state store */
-  class InputProcessor(store: StateStore) {
+  class StateStoreUpdater(store: StateStore) {
 
     // Converters for translating input keys, values, output data between rows and Java objects
     private val getKeyObj =
@@ -135,6 +156,14 @@ case class FlatMapGroupsWithStateExec(
       ObjectOperator.deserializeRowToObject(valueDeserializer, dataAttributes)
     private val getOutputRow = ObjectOperator.wrapObjectToRow(outputObjAttr.dataType)
 
+    // Converters for translating state between rows and Java objects
+    private val getStateObjFromRow = ObjectOperator.deserializeRowToObject(
+      stateDeserializer, stateAttributes)
+    private val getStateRowFromObj = ObjectOperator.serializeObjectToRow(stateSerializer)
+
+    // Index of the additional metadata fields in the state row
+    private val timeoutTimestampIndex = stateAttributes.indexOf(timestampTimeoutAttribute)
+
     // Metrics
     private val numUpdatedStateRows = longMetric("numUpdatedStateRows")
     private val numOutputRows = longMetric("numOutputRows")
@@ -143,19 +172,20 @@ case class FlatMapGroupsWithStateExec(
      * For every group, get the key, values and corresponding state and call the function,
      * and return an iterator of rows
      */
-    def processNewData(dataIter: Iterator[InternalRow]): Iterator[InternalRow] = {
+    def updateStateForKeysWithData(dataIter: Iterator[InternalRow]): Iterator[InternalRow] = {
       val groupedIter = GroupedIterator(dataIter, groupingAttributes, child.output)
       groupedIter.flatMap { case (keyRow, valueRowIter) =>
         val keyUnsafeRow = keyRow.asInstanceOf[UnsafeRow]
         callFunctionAndUpdateState(
-          stateManager.getState(store, keyUnsafeRow),
+          keyUnsafeRow,
           valueRowIter,
+          store.get(keyUnsafeRow),
           hasTimedOut = false)
       }
     }
 
     /** Find the groups that have timeout set and are timing out right now, and call the function */
-    def processTimedOutState(): Iterator[InternalRow] = {
+    def updateStateForTimedOutKeys(): Iterator[InternalRow] = {
       if (isTimeoutEnabled) {
         val timeoutThreshold = timeoutConf match {
           case ProcessingTimeTimeout => batchTimestampMs.get
@@ -164,11 +194,12 @@ case class FlatMapGroupsWithStateExec(
             throw new IllegalStateException(
               s"Cannot filter timed out keys for $timeoutConf")
         }
-        val timingOutKeys = stateManager.getAllState(store).filter { state =>
-          state.timeoutTimestamp != NO_TIMESTAMP && state.timeoutTimestamp < timeoutThreshold
+        val timingOutKeys = store.getRange(None, None).filter { rowPair =>
+          val timeoutTimestamp = getTimeoutTimestamp(rowPair.value)
+          timeoutTimestamp != NO_TIMESTAMP && timeoutTimestamp < timeoutThreshold
         }
-        timingOutKeys.flatMap { stateData =>
-          callFunctionAndUpdateState(stateData, Iterator.empty, hasTimedOut = true)
+        timingOutKeys.flatMap { rowPair =>
+          callFunctionAndUpdateState(rowPair.key, Iterator.empty, rowPair.value, hasTimedOut = true)
         }
       } else Iterator.empty
     }
@@ -178,19 +209,22 @@ case class FlatMapGroupsWithStateExec(
      * iterator. Note that the store updating is lazy, that is, the store will be updated only
      * after the returned iterator is fully consumed.
      *
-     * @param stateData All the data related to the state to be updated
+     * @param keyRow Row representing the key, cannot be null
      * @param valueRowIter Iterator of values as rows, cannot be null, but can be empty
+     * @param prevStateRow Row representing the previous state, can be null
      * @param hasTimedOut Whether this function is being called for a key timeout
      */
     private def callFunctionAndUpdateState(
-        stateData: FlatMapGroupsWithState_StateData,
+        keyRow: UnsafeRow,
         valueRowIter: Iterator[InternalRow],
+        prevStateRow: UnsafeRow,
         hasTimedOut: Boolean): Iterator[InternalRow] = {
 
-      val keyObj = getKeyObj(stateData.keyRow)  // convert key to objects
+      val keyObj = getKeyObj(keyRow)  // convert key to objects
       val valueObjIter = valueRowIter.map(getValueObj.apply) // convert value rows to objects
-      val groupState = GroupStateImpl.createForStreaming(
-        Option(stateData.stateObj),
+      val stateObj = getStateObj(prevStateRow)
+      val keyedState = GroupStateImpl.createForStreaming(
+        Option(stateObj),
         batchTimestampMs.getOrElse(NO_TIMESTAMP),
         eventTimeWatermark.getOrElse(NO_TIMESTAMP),
         timeoutConf,
@@ -198,24 +232,50 @@ case class FlatMapGroupsWithStateExec(
         watermarkPresent)
 
       // Call function, get the returned objects and convert them to rows
-      val mappedIterator = func(keyObj, valueObjIter, groupState).map { obj =>
+      val mappedIterator = func(keyObj, valueObjIter, keyedState).map { obj =>
         numOutputRows += 1
         getOutputRow(obj)
       }
 
       // When the iterator is consumed, then write changes to state
       def onIteratorCompletion: Unit = {
-        if (groupState.hasRemoved && groupState.getTimeoutTimestamp == NO_TIMESTAMP) {
-          stateManager.removeState(store, stateData.keyRow)
+
+        val currentTimeoutTimestamp = keyedState.getTimeoutTimestamp
+        // If the state has not yet been set but timeout has been set, then
+        // we have to generate a row to save the timeout. However, attempting serialize
+        // null using case class encoder throws -
+        //    java.lang.NullPointerException: Null value appeared in non-nullable field:
+        //    If the schema is inferred from a Scala tuple / case class, or a Java bean, please
+        //    try to use scala.Option[_] or other nullable types.
+        if (!keyedState.exists && currentTimeoutTimestamp != NO_TIMESTAMP) {
+          throw new IllegalStateException(
+            "Cannot set timeout when state is not defined, that is, state has not been" +
+              "initialized or has been removed")
+        }
+
+        if (keyedState.hasRemoved) {
+          store.remove(keyRow)
           numUpdatedStateRows += 1
+
         } else {
-          val currentTimeoutTimestamp = groupState.getTimeoutTimestamp
-          val hasTimeoutChanged = currentTimeoutTimestamp != stateData.timeoutTimestamp
-          val shouldWriteState = groupState.hasUpdated || groupState.hasRemoved || hasTimeoutChanged
+          val previousTimeoutTimestamp = getTimeoutTimestamp(prevStateRow)
+          val stateRowToWrite = if (keyedState.hasUpdated) {
+            getStateRow(keyedState.get)
+          } else {
+            prevStateRow
+          }
+
+          val hasTimeoutChanged = currentTimeoutTimestamp != previousTimeoutTimestamp
+          val shouldWriteState = keyedState.hasUpdated || hasTimeoutChanged
 
           if (shouldWriteState) {
-            val updatedStateObj = if (groupState.exists) groupState.get else null
-            stateManager.putState(store, stateData.keyRow, updatedStateObj, currentTimeoutTimestamp)
+            if (stateRowToWrite == null) {
+              // This should never happen because checks in GroupStateImpl should avoid cases
+              // where empty state would need to be written
+              throw new IllegalStateException("Attempting to write empty state")
+            }
+            setTimeoutTimestamp(stateRowToWrite, currentTimeoutTimestamp)
+            store.put(keyRow, stateRowToWrite)
             numUpdatedStateRows += 1
           }
         }
@@ -224,5 +284,28 @@ case class FlatMapGroupsWithStateExec(
       // Return an iterator of rows such that fully consumed, the updated state value will be saved
       CompletionIterator[InternalRow, Iterator[InternalRow]](mappedIterator, onIteratorCompletion)
     }
+
+    /** Returns the state as Java object if defined */
+    def getStateObj(stateRow: UnsafeRow): Any = {
+      if (stateRow != null) getStateObjFromRow(stateRow) else null
+    }
+
+    /** Returns the row for an updated state */
+    def getStateRow(obj: Any): UnsafeRow = {
+      assert(obj != null)
+      getStateRowFromObj(obj)
+    }
+
+    /** Returns the timeout timestamp of a state row is set */
+    def getTimeoutTimestamp(stateRow: UnsafeRow): Long = {
+      if (isTimeoutEnabled && stateRow != null) {
+        stateRow.getLong(timeoutTimestampIndex)
+      } else NO_TIMESTAMP
+    }
+
+    /** Set the timestamp in a state row */
+    def setTimeoutTimestamp(stateRow: UnsafeRow, timeoutTimestamps: Long): Unit = {
+      if (isTimeoutEnabled) stateRow.setLong(timeoutTimestampIndex, timeoutTimestamps)
+    }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index 43cf0ef1da8ca..6e8154d58d4c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -266,6 +266,20 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
     }
   }
 
+  /**
+   * Removes all log entries later than thresholdBatchId (exclusive).
+   */
+  def purgeAfter(thresholdBatchId: Long): Unit = {
+    val batchIds = fileManager.list(metadataPath, batchFilesFilter)
+      .map(f => pathToBatchId(f.getPath))
+
+    for (batchId <- batchIds if batchId > thresholdBatchId) {
+      val path = batchIdToPath(batchId)
+      fileManager.delete(path)
+      logTrace(s"Removed metadata log file: $path")
+    }
+  }
+
   private def createFileManager(): FileManager = {
     val hadoopConf = sparkSession.sessionState.newHadoopConf()
     try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala
index b84e6ce64c611..66b11ecddf233 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala
@@ -17,15 +17,11 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import java.{util => ju}
-
-import scala.collection.mutable
-
 import com.codahale.metrics.{Gauge, MetricRegistry}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.{Source => CodahaleSource}
-import org.apache.spark.util.Clock
+import org.apache.spark.sql.streaming.StreamingQueryProgress
 
 /**
  * Serves metrics from a [[org.apache.spark.sql.streaming.StreamingQuery]] to
@@ -39,14 +35,17 @@ class MetricsReporter(
 
   // Metric names should not have . in them, so that all the metrics of a query are identified
   // together in Ganglia as a single metric group
-  registerGauge("inputRate-total", () => stream.lastProgress.inputRowsPerSecond)
-  registerGauge("processingRate-total", () => stream.lastProgress.processedRowsPerSecond)
-  registerGauge("latency", () => stream.lastProgress.durationMs.get("triggerExecution").longValue())
-
-  private def registerGauge[T](name: String, f: () => T)(implicit num: Numeric[T]): Unit = {
+  registerGauge("inputRate-total", _.inputRowsPerSecond, 0.0)
+  registerGauge("processingRate-total", _.processedRowsPerSecond, 0.0)
+  registerGauge("latency", _.durationMs.get("triggerExecution").longValue(), 0L)
+
+  private def registerGauge[T](
+      name: String,
+      f: StreamingQueryProgress => T,
+      default: T): Unit = {
     synchronized {
       metricRegistry.register(name, new Gauge[T] {
-        override def getValue: T = f()
+        override def getValue: T = Option(stream.lastProgress).map(f).getOrElse(default)
       })
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
new file mode 100644
index 0000000000000..42240eeb58d4b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -0,0 +1,492 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.util.Optional
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.{ArrayBuffer, Map => MutableMap}
+
+import org.apache.spark.sql.{Dataset, SparkSession}
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relation, WriteToDataSourceV2}
+import org.apache.spark.sql.sources.v2.DataSourceV2Options
+import org.apache.spark.sql.sources.v2.streaming.{MicroBatchReadSupport, MicroBatchWriteSupport}
+import org.apache.spark.sql.sources.v2.streaming.reader.{MicroBatchReader, Offset => OffsetV2}
+import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
+import org.apache.spark.util.{Clock, Utils}
+
+class MicroBatchExecution(
+    sparkSession: SparkSession,
+    name: String,
+    checkpointRoot: String,
+    analyzedPlan: LogicalPlan,
+    sink: BaseStreamingSink,
+    trigger: Trigger,
+    triggerClock: Clock,
+    outputMode: OutputMode,
+    extraOptions: Map[String, String],
+    deleteCheckpointOnStop: Boolean)
+  extends StreamExecution(
+    sparkSession, name, checkpointRoot, analyzedPlan, sink,
+    trigger, triggerClock, outputMode, deleteCheckpointOnStop) {
+
+  @volatile protected var sources: Seq[BaseStreamingSource] = Seq.empty
+
+  private val triggerExecutor = trigger match {
+    case t: ProcessingTime => ProcessingTimeExecutor(t, triggerClock)
+    case OneTimeTrigger => OneTimeExecutor()
+    case _ => throw new IllegalStateException(s"Unknown type of trigger: $trigger")
+  }
+
+  override lazy val logicalPlan: LogicalPlan = {
+    assert(queryExecutionThread eq Thread.currentThread,
+      "logicalPlan must be initialized in QueryExecutionThread " +
+        s"but the current thread was ${Thread.currentThread}")
+    var nextSourceId = 0L
+    val toExecutionRelationMap = MutableMap[StreamingRelation, StreamingExecutionRelation]()
+    val v2ToExecutionRelationMap = MutableMap[StreamingRelationV2, StreamingExecutionRelation]()
+    // We transform each distinct streaming relation into a StreamingExecutionRelation, keeping a
+    // map as we go to ensure each identical relation gets the same StreamingExecutionRelation
+    // object. For each microbatch, the StreamingExecutionRelation will be replaced with a logical
+    // plan for the data within that batch.
+    // Note that we have to use the previous `output` as attributes in StreamingExecutionRelation,
+    // since the existing logical plan has already used those attributes. The per-microbatch
+    // transformation is responsible for replacing attributes with their final values.
+    val _logicalPlan = analyzedPlan.transform {
+      case streamingRelation@StreamingRelation(dataSource, _, output) =>
+        toExecutionRelationMap.getOrElseUpdate(streamingRelation, {
+          // Materialize source to avoid creating it in every batch
+          val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
+          val source = dataSource.createSource(metadataPath)
+          nextSourceId += 1
+          StreamingExecutionRelation(source, output)(sparkSession)
+        })
+      case s @ StreamingRelationV2(source: MicroBatchReadSupport, _, options, output, _) =>
+        v2ToExecutionRelationMap.getOrElseUpdate(s, {
+          // Materialize source to avoid creating it in every batch
+          val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
+          val reader = source.createMicroBatchReader(
+            Optional.empty(), // user specified schema
+            metadataPath,
+            new DataSourceV2Options(options.asJava))
+          nextSourceId += 1
+          StreamingExecutionRelation(reader, output)(sparkSession)
+        })
+      case s @ StreamingRelationV2(_, _, _, output, v1Relation) =>
+        v2ToExecutionRelationMap.getOrElseUpdate(s, {
+          // Materialize source to avoid creating it in every batch
+          val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
+          assert(v1Relation.isDefined, "v2 execution didn't match but v1 was unavailable")
+          val source = v1Relation.get.dataSource.createSource(metadataPath)
+          nextSourceId += 1
+          StreamingExecutionRelation(source, output)(sparkSession)
+        })
+    }
+    sources = _logicalPlan.collect { case s: StreamingExecutionRelation => s.source }
+    uniqueSources = sources.distinct
+    _logicalPlan
+  }
+
+  /**
+   * Repeatedly attempts to run batches as data arrives.
+   */
+  protected def runActivatedStream(sparkSessionForStream: SparkSession): Unit = {
+    triggerExecutor.execute(() => {
+      startTrigger()
+
+      if (isActive) {
+        reportTimeTaken("triggerExecution") {
+          if (currentBatchId < 0) {
+            // We'll do this initialization only once
+            populateStartOffsets(sparkSessionForStream)
+            sparkSession.sparkContext.setJobDescription(getBatchDescriptionString)
+            logDebug(s"Stream running from $committedOffsets to $availableOffsets")
+          } else {
+            constructNextBatch()
+          }
+          if (dataAvailable) {
+            currentStatus = currentStatus.copy(isDataAvailable = true)
+            updateStatusMessage("Processing new data")
+            runBatch(sparkSessionForStream)
+          }
+        }
+        // Report trigger as finished and construct progress object.
+        finishTrigger(dataAvailable)
+        if (dataAvailable) {
+          // Update committed offsets.
+          commitLog.add(currentBatchId)
+          committedOffsets ++= availableOffsets
+          logDebug(s"batch ${currentBatchId} committed")
+          // We'll increase currentBatchId after we complete processing current batch's data
+          currentBatchId += 1
+          sparkSession.sparkContext.setJobDescription(getBatchDescriptionString)
+        } else {
+          currentStatus = currentStatus.copy(isDataAvailable = false)
+          updateStatusMessage("Waiting for data to arrive")
+          Thread.sleep(pollingDelayMs)
+        }
+      }
+      updateStatusMessage("Waiting for next trigger")
+      isActive
+    })
+  }
+
+  /**
+   * Populate the start offsets to start the execution at the current offsets stored in the sink
+   * (i.e. avoid reprocessing data that we have already processed). This function must be called
+   * before any processing occurs and will populate the following fields:
+   *  - currentBatchId
+   *  - committedOffsets
+   *  - availableOffsets
+   *  The basic structure of this method is as follows:
+   *
+   *  Identify (from the offset log) the offsets used to run the last batch
+   *  IF last batch exists THEN
+   *    Set the next batch to be executed as the last recovered batch
+   *    Check the commit log to see which batch was committed last
+   *    IF the last batch was committed THEN
+   *      Call getBatch using the last batch start and end offsets
+   *      // ^^^^ above line is needed since some sources assume last batch always re-executes
+   *      Setup for a new batch i.e., start = last batch end, and identify new end
+   *    DONE
+   *  ELSE
+   *    Identify a brand new batch
+   *  DONE
+   */
+  private def populateStartOffsets(sparkSessionToRunBatches: SparkSession): Unit = {
+    offsetLog.getLatest() match {
+      case Some((latestBatchId, nextOffsets)) =>
+        /* First assume that we are re-executing the latest known batch
+         * in the offset log */
+        currentBatchId = latestBatchId
+        availableOffsets = nextOffsets.toStreamProgress(sources)
+        /* Initialize committed offsets to a committed batch, which at this
+         * is the second latest batch id in the offset log. */
+        if (latestBatchId != 0) {
+          val secondLatestBatchId = offsetLog.get(latestBatchId - 1).getOrElse {
+            throw new IllegalStateException(s"batch ${latestBatchId - 1} doesn't exist")
+          }
+          committedOffsets = secondLatestBatchId.toStreamProgress(sources)
+        }
+
+        // update offset metadata
+        nextOffsets.metadata.foreach { metadata =>
+          OffsetSeqMetadata.setSessionConf(metadata, sparkSessionToRunBatches.conf)
+          offsetSeqMetadata = OffsetSeqMetadata(
+            metadata.batchWatermarkMs, metadata.batchTimestampMs, sparkSessionToRunBatches.conf)
+        }
+
+        /* identify the current batch id: if commit log indicates we successfully processed the
+         * latest batch id in the offset log, then we can safely move to the next batch
+         * i.e., committedBatchId + 1 */
+        commitLog.getLatest() match {
+          case Some((latestCommittedBatchId, _)) =>
+            if (latestBatchId == latestCommittedBatchId) {
+              /* The last batch was successfully committed, so we can safely process a
+               * new next batch but first:
+               * Make a call to getBatch using the offsets from previous batch.
+               * because certain sources (e.g., KafkaSource) assume on restart the last
+               * batch will be executed before getOffset is called again. */
+              availableOffsets.foreach {
+                case (source: Source, end: Offset) =>
+                  if (committedOffsets.get(source).map(_ != end).getOrElse(true)) {
+                    val start = committedOffsets.get(source)
+                    source.getBatch(start, end)
+                  }
+                case nonV1Tuple =>
+                  // The V2 API does not have the same edge case requiring getBatch to be called
+                  // here, so we do nothing here.
+              }
+              currentBatchId = latestCommittedBatchId + 1
+              committedOffsets ++= availableOffsets
+              // Construct a new batch be recomputing availableOffsets
+              constructNextBatch()
+            } else if (latestCommittedBatchId < latestBatchId - 1) {
+              logWarning(s"Batch completion log latest batch id is " +
+                s"${latestCommittedBatchId}, which is not trailing " +
+                s"batchid $latestBatchId by one")
+            }
+          case None => logInfo("no commit log present")
+        }
+        logDebug(s"Resuming at batch $currentBatchId with committed offsets " +
+          s"$committedOffsets and available offsets $availableOffsets")
+      case None => // We are starting this stream for the first time.
+        logInfo(s"Starting new streaming query.")
+        currentBatchId = 0
+        constructNextBatch()
+    }
+  }
+
+  /**
+   * Returns true if there is any new data available to be processed.
+   */
+  private def dataAvailable: Boolean = {
+    availableOffsets.exists {
+      case (source, available) =>
+        committedOffsets
+          .get(source)
+          .map(committed => committed != available)
+          .getOrElse(true)
+    }
+  }
+
+  /**
+   * Queries all of the sources to see if any new data is available. When there is new data the
+   * batchId counter is incremented and a new log entry is written with the newest offsets.
+   */
+  private def constructNextBatch(): Unit = {
+    // Check to see what new data is available.
+    val hasNewData = {
+      awaitProgressLock.lock()
+      try {
+        // Generate a map from each unique source to the next available offset.
+        val latestOffsets: Map[BaseStreamingSource, Option[Offset]] = uniqueSources.map {
+          case s: Source =>
+            updateStatusMessage(s"Getting offsets from $s")
+            reportTimeTaken("getOffset") {
+              (s, s.getOffset)
+            }
+          case s: MicroBatchReader =>
+            updateStatusMessage(s"Getting offsets from $s")
+            reportTimeTaken("getOffset") {
+            // Once v1 streaming source execution is gone, we can refactor this away.
+            // For now, we set the range here to get the source to infer the available end offset,
+            // get that offset, and then set the range again when we later execute.
+            s.setOffsetRange(
+              toJava(availableOffsets.get(s).map(off => s.deserializeOffset(off.json))),
+              Optional.empty())
+
+              (s, Some(s.getEndOffset))
+            }
+        }.toMap
+        availableOffsets ++= latestOffsets.filter { case (_, o) => o.nonEmpty }.mapValues(_.get)
+
+        if (dataAvailable) {
+          true
+        } else {
+          noNewData = true
+          false
+        }
+      } finally {
+        awaitProgressLock.unlock()
+      }
+    }
+    if (hasNewData) {
+      var batchWatermarkMs = offsetSeqMetadata.batchWatermarkMs
+      // Update the eventTime watermarks if we find any in the plan.
+      if (lastExecution != null) {
+        lastExecution.executedPlan.collect {
+          case e: EventTimeWatermarkExec => e
+        }.zipWithIndex.foreach {
+          case (e, index) if e.eventTimeStats.value.count > 0 =>
+            logDebug(s"Observed event time stats $index: ${e.eventTimeStats.value}")
+            val newWatermarkMs = e.eventTimeStats.value.max - e.delayMs
+            val prevWatermarkMs = watermarkMsMap.get(index)
+            if (prevWatermarkMs.isEmpty || newWatermarkMs > prevWatermarkMs.get) {
+              watermarkMsMap.put(index, newWatermarkMs)
+            }
+
+          // Populate 0 if we haven't seen any data yet for this watermark node.
+          case (_, index) =>
+            if (!watermarkMsMap.isDefinedAt(index)) {
+              watermarkMsMap.put(index, 0)
+            }
+        }
+
+        // Update the global watermark to the minimum of all watermark nodes.
+        // This is the safest option, because only the global watermark is fault-tolerant. Making
+        // it the minimum of all individual watermarks guarantees it will never advance past where
+        // any individual watermark operator would be if it were in a plan by itself.
+        if(!watermarkMsMap.isEmpty) {
+          val newWatermarkMs = watermarkMsMap.minBy(_._2)._2
+          if (newWatermarkMs > batchWatermarkMs) {
+            logInfo(s"Updating eventTime watermark to: $newWatermarkMs ms")
+            batchWatermarkMs = newWatermarkMs
+          } else {
+            logDebug(
+              s"Event time didn't move: $newWatermarkMs < " +
+                s"$batchWatermarkMs")
+          }
+        }
+      }
+      offsetSeqMetadata = offsetSeqMetadata.copy(
+        batchWatermarkMs = batchWatermarkMs,
+        batchTimestampMs = triggerClock.getTimeMillis()) // Current batch timestamp in milliseconds
+
+      updateStatusMessage("Writing offsets to log")
+      reportTimeTaken("walCommit") {
+        assert(offsetLog.add(
+          currentBatchId,
+          availableOffsets.toOffsetSeq(sources, offsetSeqMetadata)),
+          s"Concurrent update to the log. Multiple streaming jobs detected for $currentBatchId")
+        logInfo(s"Committed offsets for batch $currentBatchId. " +
+          s"Metadata ${offsetSeqMetadata.toString}")
+
+        // NOTE: The following code is correct because runStream() processes exactly one
+        // batch at a time. If we add pipeline parallelism (multiple batches in flight at
+        // the same time), this cleanup logic will need to change.
+
+        // Now that we've updated the scheduler's persistent checkpoint, it is safe for the
+        // sources to discard data from the previous batch.
+        if (currentBatchId != 0) {
+          val prevBatchOff = offsetLog.get(currentBatchId - 1)
+          if (prevBatchOff.isDefined) {
+            prevBatchOff.get.toStreamProgress(sources).foreach {
+              case (src: Source, off) => src.commit(off)
+              case (reader: MicroBatchReader, off) =>
+                reader.commit(reader.deserializeOffset(off.json))
+            }
+          } else {
+            throw new IllegalStateException(s"batch $currentBatchId doesn't exist")
+          }
+        }
+
+        // It is now safe to discard the metadata beyond the minimum number to retain.
+        // Note that purge is exclusive, i.e. it purges everything before the target ID.
+        if (minLogEntriesToMaintain < currentBatchId) {
+          offsetLog.purge(currentBatchId - minLogEntriesToMaintain)
+          commitLog.purge(currentBatchId - minLogEntriesToMaintain)
+        }
+      }
+    } else {
+      awaitProgressLock.lock()
+      try {
+        // Wake up any threads that are waiting for the stream to progress.
+        awaitProgressLockCondition.signalAll()
+      } finally {
+        awaitProgressLock.unlock()
+      }
+    }
+  }
+
+  /**
+   * Processes any data available between `availableOffsets` and `committedOffsets`.
+   * @param sparkSessionToRunBatch Isolated [[SparkSession]] to run this batch with.
+   */
+  private def runBatch(sparkSessionToRunBatch: SparkSession): Unit = {
+    // Request unprocessed data from all sources.
+    newData = reportTimeTaken("getBatch") {
+      availableOffsets.flatMap {
+        case (source: Source, available)
+          if committedOffsets.get(source).map(_ != available).getOrElse(true) =>
+          val current = committedOffsets.get(source)
+          val batch = source.getBatch(current, available)
+          assert(batch.isStreaming,
+            s"DataFrame returned by getBatch from $source did not have isStreaming=true\n" +
+              s"${batch.queryExecution.logical}")
+          logDebug(s"Retrieving data from $source: $current -> $available")
+          Some(source -> batch.logicalPlan)
+        case (reader: MicroBatchReader, available)
+          if committedOffsets.get(reader).map(_ != available).getOrElse(true) =>
+          val current = committedOffsets.get(reader).map(off => reader.deserializeOffset(off.json))
+          reader.setOffsetRange(
+            toJava(current),
+            Optional.of(available.asInstanceOf[OffsetV2]))
+          logDebug(s"Retrieving data from $reader: $current -> $available")
+          Some(reader ->
+            new StreamingDataSourceV2Relation(reader.readSchema().toAttributes, reader))
+        case _ => None
+      }
+    }
+
+    // A list of attributes that will need to be updated.
+    val replacements = new ArrayBuffer[(Attribute, Attribute)]
+    // Replace sources in the logical plan with data that has arrived since the last batch.
+    val newBatchesPlan = logicalPlan transform {
+      case StreamingExecutionRelation(source, output) =>
+        newData.get(source).map { dataPlan =>
+          assert(output.size == dataPlan.output.size,
+            s"Invalid batch: ${Utils.truncatedString(output, ",")} != " +
+              s"${Utils.truncatedString(dataPlan.output, ",")}")
+          replacements ++= output.zip(dataPlan.output)
+          dataPlan
+        }.getOrElse {
+          LocalRelation(output, isStreaming = true)
+        }
+    }
+
+    // Rewire the plan to use the new attributes that were returned by the source.
+    val replacementMap = AttributeMap(replacements)
+    val newAttributePlan = newBatchesPlan transformAllExpressions {
+      case a: Attribute if replacementMap.contains(a) =>
+        replacementMap(a).withMetadata(a.metadata)
+      case ct: CurrentTimestamp =>
+        CurrentBatchTimestamp(offsetSeqMetadata.batchTimestampMs,
+          ct.dataType)
+      case cd: CurrentDate =>
+        CurrentBatchTimestamp(offsetSeqMetadata.batchTimestampMs,
+          cd.dataType, cd.timeZoneId)
+    }
+
+    val triggerLogicalPlan = sink match {
+      case _: Sink => newAttributePlan
+      case s: MicroBatchWriteSupport =>
+        val writer = s.createMicroBatchWriter(
+          s"$runId",
+          currentBatchId,
+          newAttributePlan.schema,
+          outputMode,
+          new DataSourceV2Options(extraOptions.asJava))
+        assert(writer.isPresent, "microbatch writer must always be present")
+        WriteToDataSourceV2(writer.get, newAttributePlan)
+      case _ => throw new IllegalArgumentException(s"unknown sink type for $sink")
+    }
+
+    reportTimeTaken("queryPlanning") {
+      lastExecution = new IncrementalExecution(
+        sparkSessionToRunBatch,
+        triggerLogicalPlan,
+        outputMode,
+        checkpointFile("state"),
+        runId,
+        currentBatchId,
+        offsetSeqMetadata)
+      lastExecution.executedPlan // Force the lazy generation of execution plan
+    }
+
+    val nextBatch =
+      new Dataset(sparkSessionToRunBatch, lastExecution, RowEncoder(lastExecution.analyzed.schema))
+
+    reportTimeTaken("addBatch") {
+      SQLExecution.withNewExecutionId(sparkSessionToRunBatch, lastExecution) {
+        sink match {
+          case s: Sink => s.addBatch(currentBatchId, nextBatch)
+          case s: MicroBatchWriteSupport =>
+            // This doesn't accumulate any data - it just forces execution of the microbatch writer.
+            nextBatch.collect()
+        }
+      }
+    }
+
+    awaitProgressLock.lock()
+    try {
+      // Wake up any threads that are waiting for the stream to progress.
+      awaitProgressLockCondition.signalAll()
+    } finally {
+      awaitProgressLock.unlock()
+    }
+  }
+
+  private def toJava(scalaOption: Option[OffsetV2]): Optional[OffsetV2] = {
+    Optional.ofNullable(scalaOption.orNull)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.java b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.java
new file mode 100644
index 0000000000000..80aa5505db991
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming;
+
+/**
+ * This is an internal, deprecated interface. New source implementations should use the
+ * org.apache.spark.sql.sources.v2.reader.Offset class, which is the one that will be supported
+ * in the long term.
+ *
+ * This class will be removed in a future release.
+ */
+public abstract class Offset {
+    /**
+     * A JSON-serialized representation of an Offset that is
+     * used for saving offsets to the offset log.
+     * Note: We assume that equivalent/equal offsets serialize to
+     * identical JSON strings.
+     *
+     * @return JSON string encoding
+     */
+    public abstract String json();
+
+    /**
+     * Equality based on JSON string representation. We leverage the
+     * JSON representation for normalization between the Offset's
+     * in memory and on disk representations.
+     */
+    @Override
+    public boolean equals(Object obj) {
+        if (obj instanceof Offset) {
+            return this.json().equals(((Offset) obj).json());
+        } else {
+            return false;
+        }
+    }
+
+    @Override
+    public int hashCode() {
+        return this.json().hashCode();
+    }
+
+    @Override
+    public String toString() {
+        return this.json();
+    }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index 4e0a468b962a2..a1b63a6de3823 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -38,7 +38,7 @@ case class OffsetSeq(offsets: Seq[Option[Offset]], metadata: Option[OffsetSeqMet
    * This method is typically used to associate a serialized offset with actual sources (which
    * cannot be serialized).
    */
-  def toStreamProgress(sources: Seq[Source]): StreamProgress = {
+  def toStreamProgress(sources: Seq[BaseStreamingSource]): StreamProgress = {
     assert(sources.size == offsets.size)
     new StreamProgress ++ sources.zip(offsets).collect { case (s, Some(o)) => (s, o) }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index b1c3a8ab235ab..d1e5be9c12762 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -42,7 +42,7 @@ import org.apache.spark.util.Clock
 trait ProgressReporter extends Logging {
 
   case class ExecutionStats(
-    inputRows: Map[Source, Long],
+    inputRows: Map[BaseStreamingSource, Long],
     stateOperators: Seq[StateOperatorProgress],
     eventTimeStats: Map[String, String])
 
@@ -53,11 +53,11 @@ trait ProgressReporter extends Logging {
   protected def triggerClock: Clock
   protected def logicalPlan: LogicalPlan
   protected def lastExecution: QueryExecution
-  protected def newData: Map[Source, DataFrame]
+  protected def newData: Map[BaseStreamingSource, LogicalPlan]
   protected def availableOffsets: StreamProgress
   protected def committedOffsets: StreamProgress
-  protected def sources: Seq[Source]
-  protected def sink: Sink
+  protected def sources: Seq[BaseStreamingSource]
+  protected def sink: BaseStreamingSink
   protected def offsetSeqMetadata: OffsetSeqMetadata
   protected def currentBatchId: Long
   protected def sparkSession: SparkSession
@@ -225,12 +225,12 @@ trait ProgressReporter extends Logging {
     //
     // 3. For each source, we sum the metrics of the associated execution plan leaves.
     //
-    val logicalPlanLeafToSource = newData.flatMap { case (source, df) =>
-      df.logicalPlan.collectLeaves().map { leaf => leaf -> source }
+    val logicalPlanLeafToSource = newData.flatMap { case (source, logicalPlan) =>
+      logicalPlan.collectLeaves().map { leaf => leaf -> source }
     }
     val allLogicalPlanLeaves = lastExecution.logical.collectLeaves() // includes non-streaming
     val allExecPlanLeaves = lastExecution.executedPlan.collectLeaves()
-    val numInputRows: Map[Source, Long] =
+    val numInputRows: Map[BaseStreamingSource, Long] =
       if (allLogicalPlanLeaves.size == allExecPlanLeaves.size) {
         val execLeafToSource = allLogicalPlanLeaves.zip(allExecPlanLeaves).flatMap {
           case (lp, ep) => logicalPlanLeafToSource.get(lp).map { source => ep -> source }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala
index 077a4778e34a8..66eb0169ac1ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.streaming
 
 import java.io._
 import java.nio.charset.StandardCharsets
+import java.util.Optional
 import java.util.concurrent.TimeUnit
 
 import org.apache.commons.io.IOUtils
@@ -28,7 +29,12 @@ import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.sql.{AnalysisException, DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.execution.streaming.continuous.RateStreamContinuousReader
+import org.apache.spark.sql.execution.streaming.sources.RateStreamMicroBatchReader
 import org.apache.spark.sql.sources.{DataSourceRegister, StreamSourceProvider}
+import org.apache.spark.sql.sources.v2._
+import org.apache.spark.sql.sources.v2.streaming.{ContinuousReadSupport, MicroBatchReadSupport}
+import org.apache.spark.sql.sources.v2.streaming.reader.{ContinuousReader, MicroBatchReader}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{ManualClock, SystemClock}
 
@@ -46,7 +52,8 @@ import org.apache.spark.util.{ManualClock, SystemClock}
  *    generated rows. The source will try its best to reach `rowsPerSecond`, but the query may
  *    be resource constrained, and `numPartitions` can be tweaked to help reach the desired speed.
  */
-class RateSourceProvider extends StreamSourceProvider with DataSourceRegister {
+class RateSourceProvider extends StreamSourceProvider with DataSourceRegister
+  with DataSourceV2 with ContinuousReadSupport {
 
   override def sourceSchema(
       sqlContext: SQLContext,
@@ -100,6 +107,14 @@ class RateSourceProvider extends StreamSourceProvider with DataSourceRegister {
       params.get("useManualClock").map(_.toBoolean).getOrElse(false) // Only for testing
     )
   }
+
+  override def createContinuousReader(
+      schema: Optional[StructType],
+      checkpointLocation: String,
+      options: DataSourceV2Options): ContinuousReader = {
+    new RateStreamContinuousReader(options)
+  }
+
   override def shortName(): String = "rate"
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala
new file mode 100644
index 0000000000000..261d69bbd9843
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import org.json4s.DefaultFormats
+import org.json4s.jackson.Serialization
+
+import org.apache.spark.sql.sources.v2
+
+case class RateStreamOffset(partitionToValueAndRunTimeMs: Map[Int, ValueRunTimeMsPair])
+  extends v2.streaming.reader.Offset {
+  implicit val defaultFormats: DefaultFormats = DefaultFormats
+  override val json = Serialization.write(partitionToValueAndRunTimeMs)
+}
+
+
+case class ValueRunTimeMsPair(value: Long, runTimeMs: Long)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/SerializedOffset.scala
similarity index 55%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/SerializedOffset.scala
index 4efcee0f8f9d6..129cfed860eb6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/SerializedOffset.scala
@@ -17,39 +17,6 @@
 
 package org.apache.spark.sql.execution.streaming
 
-/**
- * An offset is a monotonically increasing metric used to track progress in the computation of a
- * stream. Since offsets are retrieved from a [[Source]] by a single thread, we know the global
- * ordering of two [[Offset]] instances.  We do assume that if two offsets are `equal` then no
- * new data has arrived.
- */
-abstract class Offset {
-
-  /**
-   * Equality based on JSON string representation. We leverage the
-   * JSON representation for normalization between the Offset's
-   * in memory and on disk representations.
-   */
-  override def equals(obj: Any): Boolean = obj match {
-    case o: Offset => this.json == o.json
-    case _ => false
-  }
-
-  override def hashCode(): Int = this.json.hashCode
-
-  override def toString(): String = this.json.toString
-
-  /**
-   * A JSON-serialized representation of an Offset that is
-   * used for saving offsets to the offset log.
-   * Note: We assume that equivalent/equal offsets serialize to
-   * identical JSON strings.
-   *
-   * @return JSON string encoding
-   */
-  def json: String
-}
-
 /**
  * Used when loading a JSON serialized offset from external storage.
  * We are currently not responsible for converting JSON serialized
@@ -58,3 +25,5 @@ abstract class Offset {
  * that accepts a [[SerializedOffset]] for doing the conversion.
  */
 case class SerializedOffset(override val json: String) extends Offset
+
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
index d10cd3044ecdf..34bc085d920c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.DataFrame
  * exactly once semantics a sink must be idempotent in the face of multiple attempts to add the same
  * batch.
  */
-trait Sink {
+trait Sink extends BaseStreamingSink {
 
   /**
    * Adds a batch of data to this sink. The data for a given `batchId` is deterministic and if
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
index 311942f6dbd84..dbbd59e06909c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.types.StructType
  * monotonically increasing notion of progress that can be represented as an [[Offset]]. Spark
  * will regularly query each [[Source]] to see if any more data is available.
  */
-trait Source {
+trait Source extends BaseStreamingSource {
 
   /** Returns the schema of the data from this source */
   def schema: StructType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 406560c260f07..24a8b000df0c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -22,10 +22,9 @@ import java.nio.channels.ClosedByInterruptException
 import java.util.UUID
 import java.util.concurrent.{CountDownLatch, ExecutionException, TimeUnit}
 import java.util.concurrent.atomic.AtomicReference
-import java.util.concurrent.locks.ReentrantLock
+import java.util.concurrent.locks.{Condition, ReentrantLock}
 
 import scala.collection.mutable.{Map => MutableMap}
-import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
 import com.google.common.util.concurrent.UncheckedExecutionException
@@ -33,10 +32,8 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp}
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
-import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming._
@@ -47,6 +44,7 @@ trait State
 case object INITIALIZING extends State
 case object ACTIVE extends State
 case object TERMINATED extends State
+case object RECONFIGURING extends State
 
 /**
  * Manages the execution of a streaming Spark SQL query that is occurring in a separate thread.
@@ -57,12 +55,12 @@ case object TERMINATED extends State
  * @param deleteCheckpointOnStop whether to delete the checkpoint if the query is stopped without
  *                               errors
  */
-class StreamExecution(
+abstract class StreamExecution(
     override val sparkSession: SparkSession,
     override val name: String,
     private val checkpointRoot: String,
     analyzedPlan: LogicalPlan,
-    val sink: Sink,
+    val sink: BaseStreamingSink,
     val trigger: Trigger,
     val triggerClock: Clock,
     val outputMode: OutputMode,
@@ -71,16 +69,16 @@ class StreamExecution(
 
   import org.apache.spark.sql.streaming.StreamingQueryListener._
 
-  private val pollingDelayMs = sparkSession.sessionState.conf.streamingPollingDelay
+  protected val pollingDelayMs: Long = sparkSession.sessionState.conf.streamingPollingDelay
 
-  private val minBatchesToRetain = sparkSession.sessionState.conf.minBatchesToRetain
-  require(minBatchesToRetain > 0, "minBatchesToRetain has to be positive")
+  protected val minLogEntriesToMaintain: Int = sparkSession.sessionState.conf.minBatchesToRetain
+  require(minLogEntriesToMaintain > 0, "minBatchesToRetain has to be positive")
 
   /**
    * A lock used to wait/notify when batches complete. Use a fair lock to avoid thread starvation.
    */
-  private val awaitBatchLock = new ReentrantLock(true)
-  private val awaitBatchLockCondition = awaitBatchLock.newCondition()
+  protected val awaitProgressLock = new ReentrantLock(true)
+  protected val awaitProgressLockCondition = awaitProgressLock.newCondition()
 
   private val initializationLatch = new CountDownLatch(1)
   private val startLatch = new CountDownLatch(1)
@@ -89,9 +87,11 @@ class StreamExecution(
   val resolvedCheckpointRoot = {
     val checkpointPath = new Path(checkpointRoot)
     val fs = checkpointPath.getFileSystem(sparkSession.sessionState.newHadoopConf())
-    checkpointPath.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toUri.toString
+    checkpointPath.makeQualified(fs.getUri, fs.getWorkingDirectory).toUri.toString
   }
 
+  def logicalPlan: LogicalPlan
+
   /**
    * Tracks how much data we have processed and committed to the sink or state store from each
    * input source.
@@ -148,59 +148,25 @@ class StreamExecution(
    * Pretty identified string of printing in logs. Format is
    * If name is set "queryName [id = xyz, runId = abc]" else "[id = xyz, runId = abc]"
    */
-  private val prettyIdString =
+  protected val prettyIdString =
     Option(name).map(_ + " ").getOrElse("") + s"[id = $id, runId = $runId]"
 
-  /**
-   * All stream sources present in the query plan. This will be set when generating logical plan.
-   */
-  @volatile protected var sources: Seq[Source] = Seq.empty
-
   /**
    * A list of unique sources in the query plan. This will be set when generating logical plan.
    */
-  @volatile private var uniqueSources: Seq[Source] = Seq.empty
-
-  override lazy val logicalPlan: LogicalPlan = {
-    assert(microBatchThread eq Thread.currentThread,
-      "logicalPlan must be initialized in StreamExecutionThread " +
-        s"but the current thread was ${Thread.currentThread}")
-    var nextSourceId = 0L
-    val toExecutionRelationMap = MutableMap[StreamingRelation, StreamingExecutionRelation]()
-    val _logicalPlan = analyzedPlan.transform {
-      case streamingRelation@StreamingRelation(dataSource, _, output) =>
-        toExecutionRelationMap.getOrElseUpdate(streamingRelation, {
-          // Materialize source to avoid creating it in every batch
-          val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
-          val source = dataSource.createSource(metadataPath)
-          nextSourceId += 1
-          // We still need to use the previous `output` instead of `source.schema` as attributes in
-          // "df.logicalPlan" has already used attributes of the previous `output`.
-          StreamingExecutionRelation(source, output)(sparkSession)
-        })
-    }
-    sources = _logicalPlan.collect { case s: StreamingExecutionRelation => s.source }
-    uniqueSources = sources.distinct
-    _logicalPlan
-  }
-
-  private val triggerExecutor = trigger match {
-    case t: ProcessingTime => ProcessingTimeExecutor(t, triggerClock)
-    case OneTimeTrigger => OneTimeExecutor()
-    case _ => throw new IllegalStateException(s"Unknown type of trigger: $trigger")
-  }
+  @volatile protected var uniqueSources: Seq[BaseStreamingSource] = Seq.empty
 
   /** Defines the internal state of execution */
-  private val state = new AtomicReference[State](INITIALIZING)
+  protected val state = new AtomicReference[State](INITIALIZING)
 
   @volatile
   var lastExecution: IncrementalExecution = _
 
   /** Holds the most recent input data for each source. */
-  protected var newData: Map[Source, DataFrame] = _
+  protected var newData: Map[BaseStreamingSource, LogicalPlan] = _
 
   @volatile
-  private var streamDeathCause: StreamingQueryException = null
+  protected var streamDeathCause: StreamingQueryException = null
 
   /* Get the call site in the caller thread; will pass this into the micro batch thread */
   private val callSite = Utils.getCallSite()
@@ -214,13 +180,13 @@ class StreamExecution(
    * [[org.apache.spark.util.UninterruptibleThread]] to workaround KAFKA-1894: interrupting a
    * running `KafkaConsumer` may cause endless loop.
    */
-  val microBatchThread =
-    new StreamExecutionThread(s"stream execution thread for $prettyIdString") {
+  val queryExecutionThread: QueryExecutionThread =
+    new QueryExecutionThread(s"stream execution thread for $prettyIdString") {
       override def run(): Unit = {
         // To fix call site like "run at <unknown>:0", we bridge the call site from the caller
         // thread to this micro batch thread
         sparkSession.sparkContext.setCallSite(callSite)
-        runBatches()
+        runStream()
       }
     }
 
@@ -237,7 +203,7 @@ class StreamExecution(
    * fully processed, and its output was committed to the sink, hence no need to process it again.
    * This is used (for instance) during restart, to help identify which batch to run next.
    */
-  val batchCommitLog = new BatchCommitLog(sparkSession, checkpointFile("commits"))
+  val commitLog = new CommitLog(sparkSession, checkpointFile("commits"))
 
   /** Whether all fields of the query have been initialized */
   private def isInitialized: Boolean = state.get != INITIALIZING
@@ -249,7 +215,7 @@ class StreamExecution(
   override def exception: Option[StreamingQueryException] = Option(streamDeathCause)
 
   /** Returns the path of a file with `name` in the checkpoint directory. */
-  private def checkpointFile(name: String): String =
+  protected def checkpointFile(name: String): String =
     new Path(new Path(resolvedCheckpointRoot), name).toUri.toString
 
   /**
@@ -258,20 +224,25 @@ class StreamExecution(
    */
   def start(): Unit = {
     logInfo(s"Starting $prettyIdString. Use $resolvedCheckpointRoot to store the query checkpoint.")
-    microBatchThread.setDaemon(true)
-    microBatchThread.start()
+    queryExecutionThread.setDaemon(true)
+    queryExecutionThread.start()
     startLatch.await()  // Wait until thread started and QueryStart event has been posted
   }
 
   /**
-   * Repeatedly attempts to run batches as data arrives.
+   * Run the activated stream until stopped.
+   */
+  protected def runActivatedStream(sparkSessionForStream: SparkSession): Unit
+
+  /**
+   * Activate the stream and then wrap a callout to runActivatedStream, handling start and stop.
    *
    * Note that this method ensures that [[QueryStartedEvent]] and [[QueryTerminatedEvent]] are
    * posted such that listeners are guaranteed to get a start event before a termination.
    * Furthermore, this method also ensures that [[QueryStartedEvent]] event is posted before the
    * `start()` method returns.
    */
-  private def runBatches(): Unit = {
+  private def runStream(): Unit = {
     try {
       sparkSession.sparkContext.setJobGroup(runId.toString, getBatchDescriptionString,
         interruptOnCancel = true)
@@ -294,56 +265,18 @@ class StreamExecution(
       logicalPlan
 
       // Isolated spark session to run the batches with.
-      val sparkSessionToRunBatches = sparkSession.cloneSession()
+      val sparkSessionForStream = sparkSession.cloneSession()
       // Adaptive execution can change num shuffle partitions, disallow
-      sparkSessionToRunBatches.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false")
+      sparkSessionForStream.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false")
       // Disable cost-based join optimization as we do not want stateful operations to be rearranged
-      sparkSessionToRunBatches.conf.set(SQLConf.CBO_ENABLED.key, "false")
+      sparkSessionForStream.conf.set(SQLConf.CBO_ENABLED.key, "false")
       offsetSeqMetadata = OffsetSeqMetadata(
-        batchWatermarkMs = 0, batchTimestampMs = 0, sparkSessionToRunBatches.conf)
+        batchWatermarkMs = 0, batchTimestampMs = 0, sparkSessionForStream.conf)
 
       if (state.compareAndSet(INITIALIZING, ACTIVE)) {
         // Unblock `awaitInitialization`
         initializationLatch.countDown()
-
-        triggerExecutor.execute(() => {
-          startTrigger()
-
-          if (isActive) {
-            reportTimeTaken("triggerExecution") {
-              if (currentBatchId < 0) {
-                // We'll do this initialization only once
-                populateStartOffsets(sparkSessionToRunBatches)
-                sparkSession.sparkContext.setJobDescription(getBatchDescriptionString)
-                logDebug(s"Stream running from $committedOffsets to $availableOffsets")
-              } else {
-                constructNextBatch()
-              }
-              if (dataAvailable) {
-                currentStatus = currentStatus.copy(isDataAvailable = true)
-                updateStatusMessage("Processing new data")
-                runBatch(sparkSessionToRunBatches)
-              }
-            }
-            // Report trigger as finished and construct progress object.
-            finishTrigger(dataAvailable)
-            if (dataAvailable) {
-              // Update committed offsets.
-              batchCommitLog.add(currentBatchId)
-              committedOffsets ++= availableOffsets
-              logDebug(s"batch ${currentBatchId} committed")
-              // We'll increase currentBatchId after we complete processing current batch's data
-              currentBatchId += 1
-              sparkSession.sparkContext.setJobDescription(getBatchDescriptionString)
-            } else {
-              currentStatus = currentStatus.copy(isDataAvailable = false)
-              updateStatusMessage("Waiting for data to arrive")
-              Thread.sleep(pollingDelayMs)
-            }
-          }
-          updateStatusMessage("Waiting for next trigger")
-          isActive
-        })
+        runActivatedStream(sparkSessionForStream)
         updateStatusMessage("Stopped")
       } else {
         // `stop()` is already called. Let `finally` finish the cleanup.
@@ -372,7 +305,7 @@ class StreamExecution(
         if (!NonFatal(e)) {
           throw e
         }
-    } finally microBatchThread.runUninterruptibly {
+    } finally queryExecutionThread.runUninterruptibly {
       // The whole `finally` block must run inside `runUninterruptibly` to avoid being interrupted
       // when a query is stopped by the user. We need to make sure the following codes finish
       // otherwise it may throw `InterruptedException` to `UncaughtExceptionHandler` (SPARK-21248).
@@ -409,12 +342,12 @@ class StreamExecution(
           }
         }
       } finally {
-        awaitBatchLock.lock()
+        awaitProgressLock.lock()
         try {
           // Wake up any threads that are waiting for the stream to progress.
-          awaitBatchLockCondition.signalAll()
+          awaitProgressLockCondition.signalAll()
         } finally {
-          awaitBatchLock.unlock()
+          awaitProgressLock.unlock()
         }
         terminationLatch.countDown()
       }
@@ -447,302 +380,12 @@ class StreamExecution(
     }
   }
 
-  /**
-   * Populate the start offsets to start the execution at the current offsets stored in the sink
-   * (i.e. avoid reprocessing data that we have already processed). This function must be called
-   * before any processing occurs and will populate the following fields:
-   *  - currentBatchId
-   *  - committedOffsets
-   *  - availableOffsets
-   *  The basic structure of this method is as follows:
-   *
-   *  Identify (from the offset log) the offsets used to run the last batch
-   *  IF last batch exists THEN
-   *    Set the next batch to be executed as the last recovered batch
-   *    Check the commit log to see which batch was committed last
-   *    IF the last batch was committed THEN
-   *      Call getBatch using the last batch start and end offsets
-   *      // ^^^^ above line is needed since some sources assume last batch always re-executes
-   *      Setup for a new batch i.e., start = last batch end, and identify new end
-   *    DONE
-   *  ELSE
-   *    Identify a brand new batch
-   *  DONE
-   */
-  private def populateStartOffsets(sparkSessionToRunBatches: SparkSession): Unit = {
-    offsetLog.getLatest() match {
-      case Some((latestBatchId, nextOffsets)) =>
-        /* First assume that we are re-executing the latest known batch
-         * in the offset log */
-        currentBatchId = latestBatchId
-        availableOffsets = nextOffsets.toStreamProgress(sources)
-        /* Initialize committed offsets to a committed batch, which at this
-         * is the second latest batch id in the offset log. */
-        if (latestBatchId != 0) {
-          val secondLatestBatchId = offsetLog.get(latestBatchId - 1).getOrElse {
-            throw new IllegalStateException(s"batch ${latestBatchId - 1} doesn't exist")
-          }
-          committedOffsets = secondLatestBatchId.toStreamProgress(sources)
-        }
-
-        // update offset metadata
-        nextOffsets.metadata.foreach { metadata =>
-          OffsetSeqMetadata.setSessionConf(metadata, sparkSessionToRunBatches.conf)
-          offsetSeqMetadata = OffsetSeqMetadata(
-            metadata.batchWatermarkMs, metadata.batchTimestampMs, sparkSessionToRunBatches.conf)
-        }
-
-        /* identify the current batch id: if commit log indicates we successfully processed the
-         * latest batch id in the offset log, then we can safely move to the next batch
-         * i.e., committedBatchId + 1 */
-        batchCommitLog.getLatest() match {
-          case Some((latestCommittedBatchId, _)) =>
-            if (latestBatchId == latestCommittedBatchId) {
-              /* The last batch was successfully committed, so we can safely process a
-               * new next batch but first:
-               * Make a call to getBatch using the offsets from previous batch.
-               * because certain sources (e.g., KafkaSource) assume on restart the last
-               * batch will be executed before getOffset is called again. */
-              availableOffsets.foreach { ao: (Source, Offset) =>
-                val (source, end) = ao
-                if (committedOffsets.get(source).map(_ != end).getOrElse(true)) {
-                  val start = committedOffsets.get(source)
-                  source.getBatch(start, end)
-                }
-              }
-              currentBatchId = latestCommittedBatchId + 1
-              committedOffsets ++= availableOffsets
-              // Construct a new batch be recomputing availableOffsets
-              constructNextBatch()
-            } else if (latestCommittedBatchId < latestBatchId - 1) {
-              logWarning(s"Batch completion log latest batch id is " +
-                s"${latestCommittedBatchId}, which is not trailing " +
-                s"batchid $latestBatchId by one")
-            }
-          case None => logInfo("no commit log present")
-        }
-        logDebug(s"Resuming at batch $currentBatchId with committed offsets " +
-          s"$committedOffsets and available offsets $availableOffsets")
-      case None => // We are starting this stream for the first time.
-        logInfo(s"Starting new streaming query.")
-        currentBatchId = 0
-        constructNextBatch()
-    }
-  }
-
-  /**
-   * Returns true if there is any new data available to be processed.
-   */
-  private def dataAvailable: Boolean = {
-    availableOffsets.exists {
-      case (source, available) =>
-        committedOffsets
-            .get(source)
-            .map(committed => committed != available)
-            .getOrElse(true)
-    }
-  }
-
-  /**
-   * Queries all of the sources to see if any new data is available. When there is new data the
-   * batchId counter is incremented and a new log entry is written with the newest offsets.
-   */
-  private def constructNextBatch(): Unit = {
-    // Check to see what new data is available.
-    val hasNewData = {
-      awaitBatchLock.lock()
-      try {
-        val latestOffsets: Map[Source, Option[Offset]] = uniqueSources.map { s =>
-          updateStatusMessage(s"Getting offsets from $s")
-          reportTimeTaken("getOffset") {
-            (s, s.getOffset)
-          }
-        }.toMap
-        availableOffsets ++= latestOffsets.filter { case (s, o) => o.nonEmpty }.mapValues(_.get)
-
-        if (dataAvailable) {
-          true
-        } else {
-          noNewData = true
-          false
-        }
-      } finally {
-        awaitBatchLock.unlock()
-      }
-    }
-    if (hasNewData) {
-      var batchWatermarkMs = offsetSeqMetadata.batchWatermarkMs
-      // Update the eventTime watermarks if we find any in the plan.
-      if (lastExecution != null) {
-        lastExecution.executedPlan.collect {
-          case e: EventTimeWatermarkExec => e
-        }.zipWithIndex.foreach {
-          case (e, index) if e.eventTimeStats.value.count > 0 =>
-            logDebug(s"Observed event time stats $index: ${e.eventTimeStats.value}")
-            val newWatermarkMs = e.eventTimeStats.value.max - e.delayMs
-            val prevWatermarkMs = watermarkMsMap.get(index)
-            if (prevWatermarkMs.isEmpty || newWatermarkMs > prevWatermarkMs.get) {
-              watermarkMsMap.put(index, newWatermarkMs)
-            }
-
-          // Populate 0 if we haven't seen any data yet for this watermark node.
-          case (_, index) =>
-            if (!watermarkMsMap.isDefinedAt(index)) {
-              watermarkMsMap.put(index, 0)
-            }
-        }
-
-        // Update the global watermark to the minimum of all watermark nodes.
-        // This is the safest option, because only the global watermark is fault-tolerant. Making
-        // it the minimum of all individual watermarks guarantees it will never advance past where
-        // any individual watermark operator would be if it were in a plan by itself.
-        if(!watermarkMsMap.isEmpty) {
-          val newWatermarkMs = watermarkMsMap.minBy(_._2)._2
-          if (newWatermarkMs > batchWatermarkMs) {
-            logInfo(s"Updating eventTime watermark to: $newWatermarkMs ms")
-            batchWatermarkMs = newWatermarkMs
-          } else {
-            logDebug(
-              s"Event time didn't move: $newWatermarkMs < " +
-                s"$batchWatermarkMs")
-          }
-        }
-      }
-      offsetSeqMetadata = offsetSeqMetadata.copy(
-        batchWatermarkMs = batchWatermarkMs,
-        batchTimestampMs = triggerClock.getTimeMillis()) // Current batch timestamp in milliseconds
-
-      updateStatusMessage("Writing offsets to log")
-      reportTimeTaken("walCommit") {
-        assert(offsetLog.add(
-          currentBatchId,
-          availableOffsets.toOffsetSeq(sources, offsetSeqMetadata)),
-          s"Concurrent update to the log. Multiple streaming jobs detected for $currentBatchId")
-        logInfo(s"Committed offsets for batch $currentBatchId. " +
-          s"Metadata ${offsetSeqMetadata.toString}")
-
-        // NOTE: The following code is correct because runBatches() processes exactly one
-        // batch at a time. If we add pipeline parallelism (multiple batches in flight at
-        // the same time), this cleanup logic will need to change.
-
-        // Now that we've updated the scheduler's persistent checkpoint, it is safe for the
-        // sources to discard data from the previous batch.
-        if (currentBatchId != 0) {
-          val prevBatchOff = offsetLog.get(currentBatchId - 1)
-          if (prevBatchOff.isDefined) {
-            prevBatchOff.get.toStreamProgress(sources).foreach {
-              case (src, off) => src.commit(off)
-            }
-          } else {
-            throw new IllegalStateException(s"batch $currentBatchId doesn't exist")
-          }
-        }
-
-        // It is now safe to discard the metadata beyond the minimum number to retain.
-        // Note that purge is exclusive, i.e. it purges everything before the target ID.
-        if (minBatchesToRetain < currentBatchId) {
-          offsetLog.purge(currentBatchId - minBatchesToRetain)
-          batchCommitLog.purge(currentBatchId - minBatchesToRetain)
-        }
-      }
-    } else {
-      awaitBatchLock.lock()
-      try {
-        // Wake up any threads that are waiting for the stream to progress.
-        awaitBatchLockCondition.signalAll()
-      } finally {
-        awaitBatchLock.unlock()
-      }
-    }
-  }
-
-  /**
-   * Processes any data available between `availableOffsets` and `committedOffsets`.
-   * @param sparkSessionToRunBatch Isolated [[SparkSession]] to run this batch with.
-   */
-  private def runBatch(sparkSessionToRunBatch: SparkSession): Unit = {
-    // Request unprocessed data from all sources.
-    newData = reportTimeTaken("getBatch") {
-      availableOffsets.flatMap {
-        case (source, available)
-          if committedOffsets.get(source).map(_ != available).getOrElse(true) =>
-          val current = committedOffsets.get(source)
-          val batch = source.getBatch(current, available)
-          assert(batch.isStreaming,
-            s"DataFrame returned by getBatch from $source did not have isStreaming=true\n" +
-              s"${batch.queryExecution.logical}")
-          logDebug(s"Retrieving data from $source: $current -> $available")
-          Some(source -> batch)
-        case _ => None
-      }
-    }
-
-    // A list of attributes that will need to be updated.
-    val replacements = new ArrayBuffer[(Attribute, Attribute)]
-    // Replace sources in the logical plan with data that has arrived since the last batch.
-    val withNewSources = logicalPlan transform {
-      case StreamingExecutionRelation(source, output) =>
-        newData.get(source).map { data =>
-          val newPlan = data.logicalPlan
-          assert(output.size == newPlan.output.size,
-            s"Invalid batch: ${Utils.truncatedString(output, ",")} != " +
-            s"${Utils.truncatedString(newPlan.output, ",")}")
-          replacements ++= output.zip(newPlan.output)
-          newPlan
-        }.getOrElse {
-          LocalRelation(output, isStreaming = true)
-        }
-    }
-
-    // Rewire the plan to use the new attributes that were returned by the source.
-    val replacementMap = AttributeMap(replacements)
-    val triggerLogicalPlan = withNewSources transformAllExpressions {
-      case a: Attribute if replacementMap.contains(a) =>
-        replacementMap(a).withMetadata(a.metadata)
-      case ct: CurrentTimestamp =>
-        CurrentBatchTimestamp(offsetSeqMetadata.batchTimestampMs,
-          ct.dataType)
-      case cd: CurrentDate =>
-        CurrentBatchTimestamp(offsetSeqMetadata.batchTimestampMs,
-          cd.dataType, cd.timeZoneId)
-    }
-
-    reportTimeTaken("queryPlanning") {
-      lastExecution = new IncrementalExecution(
-        sparkSessionToRunBatch,
-        triggerLogicalPlan,
-        outputMode,
-        checkpointFile("state"),
-        runId,
-        currentBatchId,
-        offsetSeqMetadata)
-      lastExecution.executedPlan // Force the lazy generation of execution plan
-    }
-
-    val nextBatch =
-      new Dataset(sparkSessionToRunBatch, lastExecution, RowEncoder(lastExecution.analyzed.schema))
-
-    reportTimeTaken("addBatch") {
-      SQLExecution.withNewExecutionId(sparkSessionToRunBatch, lastExecution) {
-        sink.addBatch(currentBatchId, nextBatch)
-      }
-    }
-
-    awaitBatchLock.lock()
-    try {
-      // Wake up any threads that are waiting for the stream to progress.
-      awaitBatchLockCondition.signalAll()
-    } finally {
-      awaitBatchLock.unlock()
-    }
-  }
-
   override protected def postEvent(event: StreamingQueryListener.Event): Unit = {
     sparkSession.streams.postListenerEvent(event)
   }
 
   /** Stops all streaming sources safely. */
-  private def stopSources(): Unit = {
+  protected def stopSources(): Unit = {
     uniqueSources.foreach { source =>
       try {
         source.stop()
@@ -761,10 +404,10 @@ class StreamExecution(
     // Set the state to TERMINATED so that the batching thread knows that it was interrupted
     // intentionally
     state.set(TERMINATED)
-    if (microBatchThread.isAlive) {
+    if (queryExecutionThread.isAlive) {
       sparkSession.sparkContext.cancelJobGroup(runId.toString)
-      microBatchThread.interrupt()
-      microBatchThread.join()
+      queryExecutionThread.interrupt()
+      queryExecutionThread.join()
       // microBatchThread may spawn new jobs, so we need to cancel again to prevent a leak
       sparkSession.sparkContext.cancelJobGroup(runId.toString)
     }
@@ -775,7 +418,7 @@ class StreamExecution(
    * Blocks the current thread until processing for data from the given `source` has reached at
    * least the given `Offset`. This method is intended for use primarily when writing tests.
    */
-  private[sql] def awaitOffset(source: Source, newOffset: Offset): Unit = {
+  private[sql] def awaitOffset(source: BaseStreamingSource, newOffset: Offset): Unit = {
     assertAwaitThread()
     def notDone = {
       val localCommittedOffsets = committedOffsets
@@ -783,21 +426,21 @@ class StreamExecution(
     }
 
     while (notDone) {
-      awaitBatchLock.lock()
+      awaitProgressLock.lock()
       try {
-        awaitBatchLockCondition.await(100, TimeUnit.MILLISECONDS)
+        awaitProgressLockCondition.await(100, TimeUnit.MILLISECONDS)
         if (streamDeathCause != null) {
           throw streamDeathCause
         }
       } finally {
-        awaitBatchLock.unlock()
+        awaitProgressLock.unlock()
       }
     }
     logDebug(s"Unblocked at $newOffset for $source")
   }
 
   /** A flag to indicate that a batch has completed with no new data available. */
-  @volatile private var noNewData = false
+  @volatile protected var noNewData = false
 
   /**
    * Assert that the await APIs should not be called in the stream thread. Otherwise, it may cause
@@ -805,7 +448,7 @@ class StreamExecution(
    * the stream thread forever.
    */
   private def assertAwaitThread(): Unit = {
-    if (microBatchThread eq Thread.currentThread) {
+    if (queryExecutionThread eq Thread.currentThread) {
       throw new IllegalStateException(
         "Cannot wait for a query state from the same thread that is running the query")
     }
@@ -832,11 +475,11 @@ class StreamExecution(
       throw streamDeathCause
     }
     if (!isActive) return
-    awaitBatchLock.lock()
+    awaitProgressLock.lock()
     try {
       noNewData = false
       while (true) {
-        awaitBatchLockCondition.await(10000, TimeUnit.MILLISECONDS)
+        awaitProgressLockCondition.await(10000, TimeUnit.MILLISECONDS)
         if (streamDeathCause != null) {
           throw streamDeathCause
         }
@@ -845,7 +488,7 @@ class StreamExecution(
         }
       }
     } finally {
-      awaitBatchLock.unlock()
+      awaitProgressLock.unlock()
     }
   }
 
@@ -899,7 +542,7 @@ class StreamExecution(
           |Current Available Offsets: $availableOffsets
           |
           |Current State: $state
-          |Thread State: ${microBatchThread.getState}""".stripMargin
+          |Thread State: ${queryExecutionThread.getState}""".stripMargin
     if (includeLogicalPlan) {
       debugString + s"\n\nLogical Plan:\n$logicalPlan"
     } else {
@@ -907,7 +550,7 @@ class StreamExecution(
     }
   }
 
-  private def getBatchDescriptionString: String = {
+  protected def getBatchDescriptionString: String = {
     val batchDescription = if (currentBatchId < 0) "init" else currentBatchId.toString
     Option(name).map(_ + "<br/>").getOrElse("") +
       s"id = $id<br/>runId = $runId<br/>batch = $batchDescription"
@@ -919,7 +562,7 @@ object StreamExecution {
 }
 
 /**
- * A special thread to run the stream query. Some codes require to run in the StreamExecutionThread
- * and will use `classOf[StreamExecutionThread]` to check.
+ * A special thread to run the stream query. Some codes require to run in the QueryExecutionThread
+ * and will use `classOf[QueryxecutionThread]` to check.
  */
-abstract class StreamExecutionThread(name: String) extends UninterruptibleThread(name)
+abstract class QueryExecutionThread(name: String) extends UninterruptibleThread(name)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
index a3f3662e6f4c9..8531070b1bc49 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
@@ -23,25 +23,28 @@ import scala.collection.{immutable, GenTraversableOnce}
  * A helper class that looks like a Map[Source, Offset].
  */
 class StreamProgress(
-    val baseMap: immutable.Map[Source, Offset] = new immutable.HashMap[Source, Offset])
-  extends scala.collection.immutable.Map[Source, Offset] {
+    val baseMap: immutable.Map[BaseStreamingSource, Offset] =
+        new immutable.HashMap[BaseStreamingSource, Offset])
+  extends scala.collection.immutable.Map[BaseStreamingSource, Offset] {
 
-  def toOffsetSeq(source: Seq[Source], metadata: OffsetSeqMetadata): OffsetSeq = {
+  def toOffsetSeq(source: Seq[BaseStreamingSource], metadata: OffsetSeqMetadata): OffsetSeq = {
     OffsetSeq(source.map(get), Some(metadata))
   }
 
   override def toString: String =
     baseMap.map { case (k, v) => s"$k: $v"}.mkString("{", ",", "}")
 
-  override def +[B1 >: Offset](kv: (Source, B1)): Map[Source, B1] = baseMap + kv
+  override def +[B1 >: Offset](kv: (BaseStreamingSource, B1)): Map[BaseStreamingSource, B1] = {
+    baseMap + kv
+  }
 
-  override def get(key: Source): Option[Offset] = baseMap.get(key)
+  override def get(key: BaseStreamingSource): Option[Offset] = baseMap.get(key)
 
-  override def iterator: Iterator[(Source, Offset)] = baseMap.iterator
+  override def iterator: Iterator[(BaseStreamingSource, Offset)] = baseMap.iterator
 
-  override def -(key: Source): Map[Source, Offset] = baseMap - key
+  override def -(key: BaseStreamingSource): Map[BaseStreamingSource, Offset] = baseMap - key
 
-  def ++(updates: GenTraversableOnce[(Source, Offset)]): StreamProgress = {
+  def ++(updates: GenTraversableOnce[(BaseStreamingSource, Offset)]): StreamProgress = {
     new StreamProgress(baseMap ++ updates)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
index 07e39023c8366..7dd491ede9d05 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
@@ -40,7 +40,7 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
 
   import StreamingQueryListener._
 
-  sparkListenerBus.addToSharedQueue(this)
+  sparkListenerBus.addToQueue(this, StreamingQueryListenerBus.STREAM_EVENT_QUERY)
 
   /**
    * RunIds of active queries whose events are supposed to be forwarded by this ListenerBus
@@ -130,3 +130,7 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
     }
   }
 }
+
+object StreamingQueryListenerBus {
+  val STREAM_EVENT_QUERY = "streams"
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index 6b82c78ea653d..a0ee683a895d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -25,6 +25,8 @@ import org.apache.spark.sql.catalyst.plans.logical.LeafNode
 import org.apache.spark.sql.catalyst.plans.logical.Statistics
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.sources.v2.DataSourceV2
+import org.apache.spark.sql.sources.v2.streaming.ContinuousReadSupport
 
 object StreamingRelation {
   def apply(dataSource: DataSource): StreamingRelation = {
@@ -59,7 +61,53 @@ case class StreamingRelation(dataSource: DataSource, sourceName: String, output:
  * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]].
  */
 case class StreamingExecutionRelation(
-    source: Source,
+    source: BaseStreamingSource,
+    output: Seq[Attribute])(session: SparkSession)
+  extends LeafNode {
+
+  override def isStreaming: Boolean = true
+  override def toString: String = source.toString
+
+  // There's no sensible value here. On the execution path, this relation will be
+  // swapped out with microbatches. But some dataframe operations (in particular explain) do lead
+  // to this node surviving analysis. So we satisfy the LeafNode contract with the session default
+  // value.
+  override def computeStats(): Statistics = Statistics(
+    sizeInBytes = BigInt(session.sessionState.conf.defaultSizeInBytes)
+  )
+}
+
+// We have to pack in the V1 data source as a shim, for the case when a source implements
+// continuous processing (which is always V2) but only has V1 microbatch support. We don't
+// know at read time whether the query is conntinuous or not, so we need to be able to
+// swap a V1 relation back in.
+/**
+ * Used to link a [[DataSourceV2]] into a streaming
+ * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]]. This is only used for creating
+ * a streaming [[org.apache.spark.sql.DataFrame]] from [[org.apache.spark.sql.DataFrameReader]],
+ * and should be converted before passing to [[StreamExecution]].
+ */
+case class StreamingRelationV2(
+    dataSource: DataSourceV2,
+    sourceName: String,
+    extraOptions: Map[String, String],
+    output: Seq[Attribute],
+    v1Relation: Option[StreamingRelation])(session: SparkSession)
+  extends LeafNode {
+  override def isStreaming: Boolean = true
+  override def toString: String = sourceName
+
+  override def computeStats(): Statistics = Statistics(
+    sizeInBytes = BigInt(session.sessionState.conf.defaultSizeInBytes)
+  )
+}
+
+/**
+ * Used to link a [[DataSourceV2]] into a continuous processing execution.
+ */
+case class ContinuousExecutionRelation(
+    source: ContinuousReadSupport,
+    extraOptions: Map[String, String],
     output: Seq[Attribute])(session: SparkSession)
   extends LeafNode {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
index 167e991ca62f8..4aba76cad367e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
@@ -72,8 +72,7 @@ object StreamingSymmetricHashJoinHelper extends Logging {
    * left AND right AND joined is equivalent to full.
    *
    * Note that left and right do not necessarily contain *all* conjuncts which satisfy
-   * their condition. Any conjuncts after the first nondeterministic one are treated as
-   * nondeterministic for purposes of the split.
+   * their condition.
    *
    * @param leftSideOnly Deterministic conjuncts which reference only the left side of the join.
    * @param rightSideOnly Deterministic conjuncts which reference only the right side of the join.
@@ -111,7 +110,7 @@ object StreamingSymmetricHashJoinHelper extends Logging {
           // Span rather than partition, because nondeterministic expressions don't commute
           // across AND.
           val (deterministicConjuncts, nonDeterministicConjuncts) =
-            splitConjunctivePredicates(condition.get).span(_.deterministic)
+            splitConjunctivePredicates(condition.get).partition(_.deterministic)
 
           val (leftConjuncts, nonLeftConjuncts) = deterministicConjuncts.partition { cond =>
             cond.references.subsetOf(left.outputSet)
@@ -204,7 +203,7 @@ object StreamingSymmetricHashJoinHelper extends Logging {
   /**
    * A custom RDD that allows partitions to be "zipped" together, while ensuring the tasks'
    * preferred location is based on which executors have the required join state stores already
-   * loaded. This is class is a modified verion of [[ZippedPartitionsRDD2]].
+   * loaded. This is class is a modified version of [[ZippedPartitionsRDD2]].
    */
   class StateStoreAwareZipPartitionsRDD[A: ClassTag, B: ClassTag, V: ClassTag](
       sc: SparkContext,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
index 271bc4da99c08..19e3e55cb2829 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.{Experimental, InterfaceStability}
 import org.apache.spark.sql.streaming.Trigger
 
 /**
- * A [[Trigger]] that process only one batch of data in a streaming query then terminates
+ * A [[Trigger]] that processes only one batch of data in a streaming query then terminates
  * the query.
  */
 @Experimental
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala
new file mode 100644
index 0000000000000..d79e4bd65f563
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDDIter.scala
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.continuous
+
+import java.util.concurrent.{ArrayBlockingQueue, BlockingQueue, TimeUnit}
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark._
+import org.apache.spark.internal.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rpc.RpcEndpointRef
+import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceRDDPartition, RowToUnsafeDataReader}
+import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.continuous._
+import org.apache.spark.sql.sources.v2.reader._
+import org.apache.spark.sql.sources.v2.streaming.reader.{ContinuousDataReader, PartitionOffset}
+import org.apache.spark.sql.streaming.ProcessingTime
+import org.apache.spark.util.{SystemClock, ThreadUtils}
+
+class ContinuousDataSourceRDD(
+    sc: SparkContext,
+    sqlContext: SQLContext,
+    @transient private val readTasks: java.util.List[ReadTask[UnsafeRow]])
+  extends RDD[UnsafeRow](sc, Nil) {
+
+  private val dataQueueSize = sqlContext.conf.continuousStreamingExecutorQueueSize
+  private val epochPollIntervalMs = sqlContext.conf.continuousStreamingExecutorPollIntervalMs
+
+  override protected def getPartitions: Array[Partition] = {
+    readTasks.asScala.zipWithIndex.map {
+      case (readTask, index) => new DataSourceRDDPartition(index, readTask)
+    }.toArray
+  }
+
+  override def compute(split: Partition, context: TaskContext): Iterator[UnsafeRow] = {
+    val reader = split.asInstanceOf[DataSourceRDDPartition].readTask.createDataReader()
+
+    val runId = context.getLocalProperty(ContinuousExecution.RUN_ID_KEY)
+
+    // This queue contains two types of messages:
+    // * (null, null) representing an epoch boundary.
+    // * (row, off) containing a data row and its corresponding PartitionOffset.
+    val queue = new ArrayBlockingQueue[(UnsafeRow, PartitionOffset)](dataQueueSize)
+
+    val epochPollFailed = new AtomicBoolean(false)
+    val epochPollExecutor = ThreadUtils.newDaemonSingleThreadScheduledExecutor(
+      s"epoch-poll--${runId}--${context.partitionId()}")
+    val epochPollRunnable = new EpochPollRunnable(queue, context, epochPollFailed)
+    epochPollExecutor.scheduleWithFixedDelay(
+      epochPollRunnable, 0, epochPollIntervalMs, TimeUnit.MILLISECONDS)
+
+    // Important sequencing - we must get start offset before the data reader thread begins
+    val startOffset = ContinuousDataSourceRDD.getBaseReader(reader).getOffset
+
+    val dataReaderFailed = new AtomicBoolean(false)
+    val dataReaderThread = new DataReaderThread(reader, queue, context, dataReaderFailed)
+    dataReaderThread.setDaemon(true)
+    dataReaderThread.start()
+
+    context.addTaskCompletionListener(_ => {
+      reader.close()
+      dataReaderThread.interrupt()
+      epochPollExecutor.shutdown()
+    })
+
+    val epochEndpoint = EpochCoordinatorRef.get(runId, SparkEnv.get)
+    new Iterator[UnsafeRow] {
+      private val POLL_TIMEOUT_MS = 1000
+
+      private var currentEntry: (UnsafeRow, PartitionOffset) = _
+      private var currentOffset: PartitionOffset = startOffset
+      private var currentEpoch =
+        context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong
+
+      override def hasNext(): Boolean = {
+        while (currentEntry == null) {
+          if (context.isInterrupted() || context.isCompleted()) {
+            currentEntry = (null, null)
+          }
+          if (dataReaderFailed.get()) {
+            throw new SparkException("data read failed", dataReaderThread.failureReason)
+          }
+          if (epochPollFailed.get()) {
+            throw new SparkException("epoch poll failed", epochPollRunnable.failureReason)
+          }
+          currentEntry = queue.poll(POLL_TIMEOUT_MS, TimeUnit.MILLISECONDS)
+        }
+
+        currentEntry match {
+          // epoch boundary marker
+          case (null, null) =>
+            epochEndpoint.send(ReportPartitionOffset(
+              context.partitionId(),
+              currentEpoch,
+              currentOffset))
+            currentEpoch += 1
+            currentEntry = null
+            false
+          // real row
+          case (_, offset) =>
+            currentOffset = offset
+            true
+        }
+      }
+
+      override def next(): UnsafeRow = {
+        if (currentEntry == null) throw new NoSuchElementException("No current row was set")
+        val r = currentEntry._1
+        currentEntry = null
+        r
+      }
+    }
+  }
+
+  override def getPreferredLocations(split: Partition): Seq[String] = {
+    split.asInstanceOf[DataSourceRDDPartition].readTask.preferredLocations()
+  }
+}
+
+case class EpochPackedPartitionOffset(epoch: Long) extends PartitionOffset
+
+class EpochPollRunnable(
+    queue: BlockingQueue[(UnsafeRow, PartitionOffset)],
+    context: TaskContext,
+    failedFlag: AtomicBoolean)
+  extends Thread with Logging {
+  private[continuous] var failureReason: Throwable = _
+
+  private val epochEndpoint = EpochCoordinatorRef.get(
+    context.getLocalProperty(ContinuousExecution.RUN_ID_KEY), SparkEnv.get)
+  private var currentEpoch = context.getLocalProperty(ContinuousExecution.START_EPOCH_KEY).toLong
+
+  override def run(): Unit = {
+    try {
+      val newEpoch = epochEndpoint.askSync[Long](GetCurrentEpoch)
+      for (i <- currentEpoch to newEpoch - 1) {
+        queue.put((null, null))
+        logDebug(s"Sent marker to start epoch ${i + 1}")
+      }
+      currentEpoch = newEpoch
+    } catch {
+      case t: Throwable =>
+        failureReason = t
+        failedFlag.set(true)
+        throw t
+    }
+  }
+}
+
+class DataReaderThread(
+    reader: DataReader[UnsafeRow],
+    queue: BlockingQueue[(UnsafeRow, PartitionOffset)],
+    context: TaskContext,
+    failedFlag: AtomicBoolean)
+  extends Thread(
+    s"continuous-reader--${context.partitionId()}--" +
+    s"${context.getLocalProperty(ContinuousExecution.RUN_ID_KEY)}") {
+  private[continuous] var failureReason: Throwable = _
+
+  override def run(): Unit = {
+    val baseReader = ContinuousDataSourceRDD.getBaseReader(reader)
+    try {
+      while (!context.isInterrupted && !context.isCompleted()) {
+        if (!reader.next()) {
+          // Check again, since reader.next() might have blocked through an incoming interrupt.
+          if (!context.isInterrupted && !context.isCompleted()) {
+            throw new IllegalStateException(
+              "Continuous reader reported no elements! Reader should have blocked waiting.")
+          } else {
+            return
+          }
+        }
+
+        queue.put((reader.get().copy(), baseReader.getOffset))
+      }
+    } catch {
+      case _: InterruptedException if context.isInterrupted() =>
+        // Continuous shutdown always involves an interrupt; do nothing and shut down quietly.
+
+      case t: Throwable =>
+        failureReason = t
+        failedFlag.set(true)
+        // Don't rethrow the exception in this thread. It's not needed, and the default Spark
+        // exception handler will kill the executor.
+    }
+  }
+}
+
+object ContinuousDataSourceRDD {
+  private[continuous] def getBaseReader(reader: DataReader[UnsafeRow]): ContinuousDataReader[_] = {
+    reader match {
+      case r: ContinuousDataReader[UnsafeRow] => r
+      case wrapped: RowToUnsafeDataReader =>
+        wrapped.rowReader.asInstanceOf[ContinuousDataReader[Row]]
+      case _ =>
+        throw new IllegalStateException(s"Unknown continuous reader type ${reader.getClass}")
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
new file mode 100644
index 0000000000000..9657b5e26d770
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.continuous
+
+import java.util.concurrent.TimeUnit
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.{ArrayBuffer, Map => MutableMap}
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, StreamingDataSourceV2Relation, WriteToDataSourceV2}
+import org.apache.spark.sql.execution.streaming.{ContinuousExecutionRelation, StreamingRelationV2, _}
+import org.apache.spark.sql.sources.v2.DataSourceV2Options
+import org.apache.spark.sql.sources.v2.streaming.{ContinuousReadSupport, ContinuousWriteSupport}
+import org.apache.spark.sql.sources.v2.streaming.reader.{ContinuousReader, Offset, PartitionOffset}
+import org.apache.spark.sql.sources.v2.streaming.writer.ContinuousWriter
+import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, Trigger}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.{Clock, Utils}
+
+class ContinuousExecution(
+    sparkSession: SparkSession,
+    name: String,
+    checkpointRoot: String,
+    analyzedPlan: LogicalPlan,
+    sink: ContinuousWriteSupport,
+    trigger: Trigger,
+    triggerClock: Clock,
+    outputMode: OutputMode,
+    extraOptions: Map[String, String],
+    deleteCheckpointOnStop: Boolean)
+  extends StreamExecution(
+    sparkSession, name, checkpointRoot, analyzedPlan, sink,
+    trigger, triggerClock, outputMode, deleteCheckpointOnStop) {
+
+  @volatile protected var continuousSources: Seq[ContinuousReader] = Seq.empty
+  override protected def sources: Seq[BaseStreamingSource] = continuousSources
+
+  override lazy val logicalPlan: LogicalPlan = {
+    assert(queryExecutionThread eq Thread.currentThread,
+      "logicalPlan must be initialized in StreamExecutionThread " +
+        s"but the current thread was ${Thread.currentThread}")
+    val toExecutionRelationMap = MutableMap[StreamingRelationV2, ContinuousExecutionRelation]()
+    analyzedPlan.transform {
+      case r @ StreamingRelationV2(
+          source: ContinuousReadSupport, _, extraReaderOptions, output, _) =>
+        toExecutionRelationMap.getOrElseUpdate(r, {
+          ContinuousExecutionRelation(source, extraReaderOptions, output)(sparkSession)
+        })
+      case StreamingRelationV2(_, sourceName, _, _, _) =>
+        throw new AnalysisException(
+          s"Data source $sourceName does not support continuous processing.")
+    }
+  }
+
+  private val triggerExecutor = trigger match {
+    case ContinuousTrigger(t) => ProcessingTimeExecutor(ProcessingTime(t), triggerClock)
+    case _ => throw new IllegalStateException(s"Unsupported type of trigger: $trigger")
+  }
+
+  override protected def runActivatedStream(sparkSessionForStream: SparkSession): Unit = {
+    do {
+      try {
+        runContinuous(sparkSessionForStream)
+      } catch {
+        case _: InterruptedException if state.get().equals(RECONFIGURING) =>
+          // swallow exception and run again
+          state.set(ACTIVE)
+      }
+    } while (state.get() == ACTIVE)
+  }
+
+  /**
+   * Populate the start offsets to start the execution at the current offsets stored in the sink
+   * (i.e. avoid reprocessing data that we have already processed). This function must be called
+   * before any processing occurs and will populate the following fields:
+   *  - currentBatchId
+   *  - committedOffsets
+   *  The basic structure of this method is as follows:
+   *
+   *  Identify (from the commit log) the latest epoch that has committed
+   *  IF last epoch exists THEN
+   *    Get end offsets for the epoch
+   *    Set those offsets as the current commit progress
+   *    Set the next epoch ID as the last + 1
+   *    Return the end offsets of the last epoch as start for the next one
+   *    DONE
+   *  ELSE
+   *    Start a new query log
+   *  DONE
+   */
+  private def getStartOffsets(sparkSessionToRunBatches: SparkSession): OffsetSeq = {
+    // Note that this will need a slight modification for exactly once. If ending offsets were
+    // reported but not committed for any epochs, we must replay exactly to those offsets.
+    // For at least once, we can just ignore those reports and risk duplicates.
+    commitLog.getLatest() match {
+      case Some((latestEpochId, _)) =>
+        val nextOffsets = offsetLog.get(latestEpochId).getOrElse {
+          throw new IllegalStateException(
+            s"Batch $latestEpochId was committed without end epoch offsets!")
+        }
+        committedOffsets = nextOffsets.toStreamProgress(sources)
+
+        // Forcibly align commit and offset logs by slicing off any spurious offset logs from
+        // a previous run. We can't allow commits to an epoch that a previous run reached but
+        // this run has not.
+        offsetLog.purgeAfter(latestEpochId)
+
+        currentBatchId = latestEpochId + 1
+        logDebug(s"Resuming at epoch $currentBatchId with committed offsets $committedOffsets")
+        nextOffsets
+      case None =>
+        // We are starting this stream for the first time. Offsets are all None.
+        logInfo(s"Starting new streaming query.")
+        currentBatchId = 0
+        OffsetSeq.fill(continuousSources.map(_ => null): _*)
+    }
+  }
+
+  /**
+   * Do a continuous run.
+   * @param sparkSessionForQuery Isolated [[SparkSession]] to run the continuous query with.
+   */
+  private def runContinuous(sparkSessionForQuery: SparkSession): Unit = {
+    // A list of attributes that will need to be updated.
+    val replacements = new ArrayBuffer[(Attribute, Attribute)]
+    // Translate from continuous relation to the underlying data source.
+    var nextSourceId = 0
+    continuousSources = logicalPlan.collect {
+      case ContinuousExecutionRelation(dataSource, extraReaderOptions, output) =>
+        val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
+        nextSourceId += 1
+
+        dataSource.createContinuousReader(
+          java.util.Optional.empty[StructType](),
+          metadataPath,
+          new DataSourceV2Options(extraReaderOptions.asJava))
+    }
+    uniqueSources = continuousSources.distinct
+
+    val offsets = getStartOffsets(sparkSessionForQuery)
+
+    var insertedSourceId = 0
+    val withNewSources = logicalPlan transform {
+      case ContinuousExecutionRelation(_, _, output) =>
+        val reader = continuousSources(insertedSourceId)
+        insertedSourceId += 1
+        val newOutput = reader.readSchema().toAttributes
+
+        assert(output.size == newOutput.size,
+          s"Invalid reader: ${Utils.truncatedString(output, ",")} != " +
+            s"${Utils.truncatedString(newOutput, ",")}")
+        replacements ++= output.zip(newOutput)
+
+        val loggedOffset = offsets.offsets(0)
+        val realOffset = loggedOffset.map(off => reader.deserializeOffset(off.json))
+        reader.setOffset(java.util.Optional.ofNullable(realOffset.orNull))
+        new StreamingDataSourceV2Relation(newOutput, reader)
+    }
+
+    // Rewire the plan to use the new attributes that were returned by the source.
+    val replacementMap = AttributeMap(replacements)
+    val triggerLogicalPlan = withNewSources transformAllExpressions {
+      case a: Attribute if replacementMap.contains(a) =>
+        replacementMap(a).withMetadata(a.metadata)
+      case (_: CurrentTimestamp | _: CurrentDate) =>
+        throw new IllegalStateException(
+          "CurrentTimestamp and CurrentDate not yet supported for continuous processing")
+    }
+
+    val writer = sink.createContinuousWriter(
+      s"$runId",
+      triggerLogicalPlan.schema,
+      outputMode,
+      new DataSourceV2Options(extraOptions.asJava))
+    val withSink = WriteToDataSourceV2(writer.get(), triggerLogicalPlan)
+
+    val reader = withSink.collect {
+      case DataSourceV2Relation(_, r: ContinuousReader) => r
+    }.head
+
+    reportTimeTaken("queryPlanning") {
+      lastExecution = new IncrementalExecution(
+        sparkSessionForQuery,
+        withSink,
+        outputMode,
+        checkpointFile("state"),
+        runId,
+        currentBatchId,
+        offsetSeqMetadata)
+      lastExecution.executedPlan // Force the lazy generation of execution plan
+    }
+
+    sparkSession.sparkContext.setLocalProperty(
+      ContinuousExecution.START_EPOCH_KEY, currentBatchId.toString)
+    sparkSession.sparkContext.setLocalProperty(
+      ContinuousExecution.RUN_ID_KEY, runId.toString)
+
+    // Use the parent Spark session for the endpoint since it's where this query ID is registered.
+    val epochEndpoint =
+      EpochCoordinatorRef.create(
+        writer.get(), reader, this, currentBatchId, sparkSession, SparkEnv.get)
+    val epochUpdateThread = new Thread(new Runnable {
+      override def run: Unit = {
+        try {
+          triggerExecutor.execute(() => {
+            startTrigger()
+
+            if (reader.needsReconfiguration()) {
+              state.set(RECONFIGURING)
+              stopSources()
+              if (queryExecutionThread.isAlive) {
+                sparkSession.sparkContext.cancelJobGroup(runId.toString)
+                queryExecutionThread.interrupt()
+                // No need to join - this thread is about to end anyway.
+              }
+              false
+            } else if (isActive) {
+              currentBatchId = epochEndpoint.askSync[Long](IncrementAndGetEpoch)
+              logInfo(s"New epoch $currentBatchId is starting.")
+              true
+            } else {
+              false
+            }
+          })
+        } catch {
+          case _: InterruptedException =>
+            // Cleanly stop the query.
+            return
+        }
+      }
+    }, s"epoch update thread for $prettyIdString")
+
+    try {
+      epochUpdateThread.setDaemon(true)
+      epochUpdateThread.start()
+
+      reportTimeTaken("runContinuous") {
+        SQLExecution.withNewExecutionId(
+          sparkSessionForQuery, lastExecution)(lastExecution.toRdd)
+      }
+    } finally {
+      SparkEnv.get.rpcEnv.stop(epochEndpoint)
+
+      epochUpdateThread.interrupt()
+      epochUpdateThread.join()
+    }
+  }
+
+  /**
+   * Report ending partition offsets for the given reader at the given epoch.
+   */
+  def addOffset(
+      epoch: Long, reader: ContinuousReader, partitionOffsets: Seq[PartitionOffset]): Unit = {
+    assert(continuousSources.length == 1, "only one continuous source supported currently")
+
+    if (partitionOffsets.contains(null)) {
+      // If any offset is null, that means the corresponding partition hasn't seen any data yet, so
+      // there's nothing meaningful to add to the offset log.
+    }
+    val globalOffset = reader.mergeOffsets(partitionOffsets.toArray)
+    synchronized {
+      if (queryExecutionThread.isAlive) {
+        offsetLog.add(epoch, OffsetSeq.fill(globalOffset))
+      } else {
+        return
+      }
+    }
+  }
+
+  /**
+   * Mark the specified epoch as committed. All readers must have reported end offsets for the epoch
+   * before this is called.
+   */
+  def commit(epoch: Long): Unit = {
+    assert(continuousSources.length == 1, "only one continuous source supported currently")
+    assert(offsetLog.get(epoch).isDefined, s"offset for epoch $epoch not reported before commit")
+    synchronized {
+      if (queryExecutionThread.isAlive) {
+        commitLog.add(epoch)
+        val offset = offsetLog.get(epoch).get.offsets(0).get
+        committedOffsets ++= Seq(continuousSources(0) -> offset)
+      } else {
+        return
+      }
+    }
+
+    if (minLogEntriesToMaintain < currentBatchId) {
+      offsetLog.purge(currentBatchId - minLogEntriesToMaintain)
+      commitLog.purge(currentBatchId - minLogEntriesToMaintain)
+    }
+
+    awaitProgressLock.lock()
+    try {
+      awaitProgressLockCondition.signalAll()
+    } finally {
+      awaitProgressLock.unlock()
+    }
+  }
+
+  /**
+   * Blocks the current thread until execution has committed at or after the specified epoch.
+   */
+  private[sql] def awaitEpoch(epoch: Long): Unit = {
+    def notDone = {
+      val latestCommit = commitLog.getLatest()
+      latestCommit match {
+        case Some((latestEpoch, _)) =>
+          latestEpoch < epoch
+        case None => true
+      }
+    }
+
+    while (notDone) {
+      awaitProgressLock.lock()
+      try {
+        awaitProgressLockCondition.await(100, TimeUnit.MILLISECONDS)
+        if (streamDeathCause != null) {
+          throw streamDeathCause
+        }
+      } finally {
+        awaitProgressLock.unlock()
+      }
+    }
+  }
+}
+
+object ContinuousExecution {
+  val START_EPOCH_KEY = "__continuous_start_epoch"
+  val RUN_ID_KEY = "__run_id"
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
new file mode 100644
index 0000000000000..b4b21e7d2052f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.continuous
+
+import scala.collection.JavaConverters._
+
+import org.json4s.DefaultFormats
+import org.json4s.jackson.Serialization
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.streaming.{RateSourceProvider, RateStreamOffset, ValueRunTimeMsPair}
+import org.apache.spark.sql.execution.streaming.sources.RateStreamSourceV2
+import org.apache.spark.sql.sources.v2.{DataSourceV2, DataSourceV2Options}
+import org.apache.spark.sql.sources.v2.reader._
+import org.apache.spark.sql.sources.v2.streaming.reader.{ContinuousDataReader, ContinuousReader, Offset, PartitionOffset}
+import org.apache.spark.sql.types.{LongType, StructField, StructType, TimestampType}
+
+case class RateStreamPartitionOffset(
+   partition: Int, currentValue: Long, currentTimeMs: Long) extends PartitionOffset
+
+class RateStreamContinuousReader(options: DataSourceV2Options)
+  extends ContinuousReader {
+  implicit val defaultFormats: DefaultFormats = DefaultFormats
+
+  val creationTime = System.currentTimeMillis()
+
+  val numPartitions = options.get(RateStreamSourceV2.NUM_PARTITIONS).orElse("5").toInt
+  val rowsPerSecond = options.get(RateStreamSourceV2.ROWS_PER_SECOND).orElse("6").toLong
+  val perPartitionRate = rowsPerSecond.toDouble / numPartitions.toDouble
+
+  override def mergeOffsets(offsets: Array[PartitionOffset]): Offset = {
+    assert(offsets.length == numPartitions)
+    val tuples = offsets.map {
+      case RateStreamPartitionOffset(i, currVal, nextRead) =>
+        (i, ValueRunTimeMsPair(currVal, nextRead))
+    }
+    RateStreamOffset(Map(tuples: _*))
+  }
+
+  override def deserializeOffset(json: String): Offset = {
+    RateStreamOffset(Serialization.read[Map[Int, ValueRunTimeMsPair]](json))
+  }
+
+  override def readSchema(): StructType = RateSourceProvider.SCHEMA
+
+  private var offset: Offset = _
+
+  override def setOffset(offset: java.util.Optional[Offset]): Unit = {
+    this.offset = offset.orElse(RateStreamSourceV2.createInitialOffset(numPartitions, creationTime))
+  }
+
+  override def getStartOffset(): Offset = offset
+
+  override def createReadTasks(): java.util.List[ReadTask[Row]] = {
+    val partitionStartMap = offset match {
+      case off: RateStreamOffset => off.partitionToValueAndRunTimeMs
+      case off =>
+        throw new IllegalArgumentException(
+          s"invalid offset type ${off.getClass()} for ContinuousRateSource")
+    }
+    if (partitionStartMap.keySet.size != numPartitions) {
+      throw new IllegalArgumentException(
+        s"The previous run contained ${partitionStartMap.keySet.size} partitions, but" +
+        s" $numPartitions partitions are currently configured. The numPartitions option" +
+        " cannot be changed.")
+    }
+
+    Range(0, numPartitions).map { i =>
+      val start = partitionStartMap(i)
+      // Have each partition advance by numPartitions each row, with starting points staggered
+      // by their partition index.
+      RateStreamContinuousReadTask(
+        start.value,
+        start.runTimeMs,
+        i,
+        numPartitions,
+        perPartitionRate)
+        .asInstanceOf[ReadTask[Row]]
+    }.asJava
+  }
+
+  override def commit(end: Offset): Unit = {}
+  override def stop(): Unit = {}
+
+}
+
+case class RateStreamContinuousReadTask(
+    startValue: Long,
+    startTimeMs: Long,
+    partitionIndex: Int,
+    increment: Long,
+    rowsPerSecond: Double)
+  extends ReadTask[Row] {
+  override def createDataReader(): DataReader[Row] =
+    new RateStreamContinuousDataReader(
+      startValue, startTimeMs, partitionIndex, increment, rowsPerSecond)
+}
+
+class RateStreamContinuousDataReader(
+    startValue: Long,
+    startTimeMs: Long,
+    partitionIndex: Int,
+    increment: Long,
+    rowsPerSecond: Double)
+  extends ContinuousDataReader[Row] {
+  private var nextReadTime: Long = startTimeMs
+  private val readTimeIncrement: Long = (1000 / rowsPerSecond).toLong
+
+  private var currentValue = startValue
+  private var currentRow: Row = null
+
+  override def next(): Boolean = {
+    currentValue += increment
+    nextReadTime += readTimeIncrement
+
+    try {
+      while (System.currentTimeMillis < nextReadTime) {
+        Thread.sleep(nextReadTime - System.currentTimeMillis)
+      }
+    } catch {
+      case _: InterruptedException =>
+        // Someone's trying to end the task; just let them.
+        return false
+    }
+
+    currentRow = Row(
+      DateTimeUtils.toJavaTimestamp(DateTimeUtils.fromMillis(nextReadTime)),
+      currentValue)
+
+    true
+  }
+
+  override def get: Row = currentRow
+
+  override def close(): Unit = {}
+
+  override def getOffset(): PartitionOffset =
+    RateStreamPartitionOffset(partitionIndex, currentValue, nextReadTime)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala
new file mode 100644
index 0000000000000..90e1766c4d9f1
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTrigger.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.continuous
+
+import java.util.concurrent.TimeUnit
+
+import scala.concurrent.duration.Duration
+
+import org.apache.commons.lang3.StringUtils
+
+import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.sql.streaming.{ProcessingTime, Trigger}
+import org.apache.spark.unsafe.types.CalendarInterval
+
+/**
+ * A [[Trigger]] that continuously processes streaming data, asynchronously checkpointing at
+ * the specified interval.
+ */
+@InterfaceStability.Evolving
+case class ContinuousTrigger(intervalMs: Long) extends Trigger {
+  require(intervalMs >= 0, "the interval of trigger should not be negative")
+}
+
+private[sql] object ContinuousTrigger {
+  def apply(interval: String): ContinuousTrigger = {
+    if (StringUtils.isBlank(interval)) {
+      throw new IllegalArgumentException(
+        "interval cannot be null or blank.")
+    }
+    val cal = if (interval.startsWith("interval")) {
+      CalendarInterval.fromString(interval)
+    } else {
+      CalendarInterval.fromString("interval " + interval)
+    }
+    if (cal == null) {
+      throw new IllegalArgumentException(s"Invalid interval: $interval")
+    }
+    if (cal.months > 0) {
+      throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval")
+    }
+    new ContinuousTrigger(cal.microseconds / 1000)
+  }
+
+  def apply(interval: Duration): ContinuousTrigger = {
+    ContinuousTrigger(interval.toMillis)
+  }
+
+  def create(interval: String): ContinuousTrigger = {
+    apply(interval)
+  }
+
+  def create(interval: Long, unit: TimeUnit): ContinuousTrigger = {
+    ContinuousTrigger(unit.toMillis(interval))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
new file mode 100644
index 0000000000000..98017c3ac6a33
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.continuous
+
+import java.util.concurrent.atomic.AtomicLong
+
+import scala.collection.mutable
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.internal.Logging
+import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.streaming.StreamingQueryWrapper
+import org.apache.spark.sql.sources.v2.streaming.reader.{ContinuousReader, PartitionOffset}
+import org.apache.spark.sql.sources.v2.streaming.writer.ContinuousWriter
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
+import org.apache.spark.util.RpcUtils
+
+private[continuous] sealed trait EpochCoordinatorMessage extends Serializable
+
+// Driver epoch trigger message
+/**
+ * Atomically increment the current epoch and get the new value.
+ */
+private[sql] case object IncrementAndGetEpoch extends EpochCoordinatorMessage
+
+// Init messages
+/**
+ * Set the reader and writer partition counts. Tasks may not be started until the coordinator
+ * has acknowledged these messages.
+ */
+private[sql] case class SetReaderPartitions(numPartitions: Int) extends EpochCoordinatorMessage
+case class SetWriterPartitions(numPartitions: Int) extends EpochCoordinatorMessage
+
+// Partition task messages
+/**
+ * Get the current epoch.
+ */
+private[sql] case object GetCurrentEpoch extends EpochCoordinatorMessage
+/**
+ * Commit a partition at the specified epoch with the given message.
+ */
+private[sql] case class CommitPartitionEpoch(
+    partitionId: Int,
+    epoch: Long,
+    message: WriterCommitMessage) extends EpochCoordinatorMessage
+/**
+ * Report that a partition is ending the specified epoch at the specified offset.
+ */
+private[sql] case class ReportPartitionOffset(
+    partitionId: Int,
+    epoch: Long,
+    offset: PartitionOffset) extends EpochCoordinatorMessage
+
+
+/** Helper object used to create reference to [[EpochCoordinator]]. */
+private[sql] object EpochCoordinatorRef extends Logging {
+  private def endpointName(runId: String) = s"EpochCoordinator-$runId"
+
+  /**
+   * Create a reference to a new [[EpochCoordinator]].
+   */
+  def create(
+      writer: ContinuousWriter,
+      reader: ContinuousReader,
+      query: ContinuousExecution,
+      startEpoch: Long,
+      session: SparkSession,
+      env: SparkEnv): RpcEndpointRef = synchronized {
+    val coordinator = new EpochCoordinator(
+      writer, reader, query, startEpoch, session, env.rpcEnv)
+    val ref = env.rpcEnv.setupEndpoint(endpointName(query.runId.toString()), coordinator)
+    logInfo("Registered EpochCoordinator endpoint")
+    ref
+  }
+
+  def get(runId: String, env: SparkEnv): RpcEndpointRef = synchronized {
+    val rpcEndpointRef = RpcUtils.makeDriverRef(endpointName(runId), env.conf, env.rpcEnv)
+    logDebug("Retrieved existing EpochCoordinator endpoint")
+    rpcEndpointRef
+  }
+}
+
+/**
+ * Handles three major epoch coordination tasks for continuous processing:
+ *
+ * * Maintains a local epoch counter (the "driver epoch"), incremented by IncrementAndGetEpoch
+ *   and pollable from executors by GetCurrentEpoch. Note that this epoch is *not* immediately
+ *   reflected anywhere in ContinuousExecution.
+ * * Collates ReportPartitionOffset messages, and forwards to ContinuousExecution when all
+ *   readers have ended a given epoch.
+ * * Collates CommitPartitionEpoch messages, and forwards to ContinuousExecution when all readers
+ *   have both committed and reported an end offset for a given epoch.
+ */
+private[continuous] class EpochCoordinator(
+    writer: ContinuousWriter,
+    reader: ContinuousReader,
+    query: ContinuousExecution,
+    startEpoch: Long,
+    session: SparkSession,
+    override val rpcEnv: RpcEnv)
+  extends ThreadSafeRpcEndpoint with Logging {
+
+  private var numReaderPartitions: Int = _
+  private var numWriterPartitions: Int = _
+
+  private var currentDriverEpoch = startEpoch
+
+  // (epoch, partition) -> message
+  private val partitionCommits =
+    mutable.Map[(Long, Int), WriterCommitMessage]()
+  // (epoch, partition) -> offset
+  private val partitionOffsets =
+    mutable.Map[(Long, Int), PartitionOffset]()
+
+  private def resolveCommitsAtEpoch(epoch: Long) = {
+    val thisEpochCommits =
+      partitionCommits.collect { case ((e, _), msg) if e == epoch => msg }
+    val nextEpochOffsets =
+      partitionOffsets.collect { case ((e, _), o) if e == epoch => o }
+
+    if (thisEpochCommits.size == numWriterPartitions &&
+      nextEpochOffsets.size == numReaderPartitions) {
+      logDebug(s"Epoch $epoch has received commits from all partitions. Committing globally.")
+      // Sequencing is important here. We must commit to the writer before recording the commit
+      // in the query, or we will end up dropping the commit if we restart in the middle.
+      writer.commit(epoch, thisEpochCommits.toArray)
+      query.commit(epoch)
+
+      // Cleanup state from before this epoch, now that we know all partitions are forever past it.
+      for (k <- partitionCommits.keys.filter { case (e, _) => e < epoch }) {
+        partitionCommits.remove(k)
+      }
+      for (k <- partitionOffsets.keys.filter { case (e, _) => e < epoch }) {
+        partitionCommits.remove(k)
+      }
+    }
+  }
+
+  override def receive: PartialFunction[Any, Unit] = {
+    case CommitPartitionEpoch(partitionId, epoch, message) =>
+      logDebug(s"Got commit from partition $partitionId at epoch $epoch: $message")
+      if (!partitionCommits.isDefinedAt((epoch, partitionId))) {
+        partitionCommits.put((epoch, partitionId), message)
+        resolveCommitsAtEpoch(epoch)
+      }
+
+    case ReportPartitionOffset(partitionId, epoch, offset) =>
+      partitionOffsets.put((epoch, partitionId), offset)
+      val thisEpochOffsets =
+        partitionOffsets.collect { case ((e, _), o) if e == epoch => o }
+      if (thisEpochOffsets.size == numReaderPartitions) {
+        logDebug(s"Epoch $epoch has offsets reported from all partitions: $thisEpochOffsets")
+        query.addOffset(epoch, reader, thisEpochOffsets.toSeq)
+        resolveCommitsAtEpoch(epoch)
+      }
+  }
+
+  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+    case GetCurrentEpoch =>
+      val result = currentDriverEpoch
+      logDebug(s"Epoch $result")
+      context.reply(result)
+
+    case IncrementAndGetEpoch =>
+      currentDriverEpoch += 1
+      context.reply(currentDriverEpoch)
+
+    case SetReaderPartitions(numPartitions) =>
+      numReaderPartitions = numPartitions
+      context.reply(())
+
+    case SetWriterPartitions(numPartitions) =>
+      numWriterPartitions = numPartitions
+      context.reply(())
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamSourceV2.scala
new file mode 100644
index 0000000000000..c0ed12cec25ef
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamSourceV2.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.sources
+
+import java.util.Optional
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.json4s.DefaultFormats
+import org.json4s.jackson.Serialization
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.streaming.{RateStreamOffset, ValueRunTimeMsPair}
+import org.apache.spark.sql.sources.DataSourceRegister
+import org.apache.spark.sql.sources.v2.{DataSourceV2, DataSourceV2Options}
+import org.apache.spark.sql.sources.v2.reader._
+import org.apache.spark.sql.sources.v2.streaming.MicroBatchReadSupport
+import org.apache.spark.sql.sources.v2.streaming.reader.{MicroBatchReader, Offset}
+import org.apache.spark.sql.types.{LongType, StructField, StructType, TimestampType}
+import org.apache.spark.util.{ManualClock, SystemClock}
+
+/**
+ * This is a temporary register as we build out v2 migration. Microbatch read support should
+ * be implemented in the same register as v1.
+ */
+class RateSourceProviderV2 extends DataSourceV2 with MicroBatchReadSupport with DataSourceRegister {
+  override def createMicroBatchReader(
+      schema: Optional[StructType],
+      checkpointLocation: String,
+      options: DataSourceV2Options): MicroBatchReader = {
+    new RateStreamMicroBatchReader(options)
+  }
+
+  override def shortName(): String = "ratev2"
+}
+
+class RateStreamMicroBatchReader(options: DataSourceV2Options)
+  extends MicroBatchReader {
+  implicit val defaultFormats: DefaultFormats = DefaultFormats
+
+  val clock = {
+    // The option to use a manual clock is provided only for unit testing purposes.
+    if (options.get("useManualClock").orElse("false").toBoolean) new ManualClock
+    else new SystemClock
+  }
+
+  private val numPartitions =
+    options.get(RateStreamSourceV2.NUM_PARTITIONS).orElse("5").toInt
+  private val rowsPerSecond =
+    options.get(RateStreamSourceV2.ROWS_PER_SECOND).orElse("6").toLong
+
+  // The interval (in milliseconds) between rows in each partition.
+  // e.g. if there are 4 global rows per second, and 2 partitions, each partition
+  // should output rows every (1000 * 2 / 4) = 500 ms.
+  private val msPerPartitionBetweenRows = (1000 * numPartitions) / rowsPerSecond
+
+  override def readSchema(): StructType = {
+    StructType(
+      StructField("timestamp", TimestampType, false) ::
+      StructField("value", LongType, false) :: Nil)
+  }
+
+  val creationTimeMs = clock.getTimeMillis()
+
+  private var start: RateStreamOffset = _
+  private var end: RateStreamOffset = _
+
+  override def setOffsetRange(
+      start: Optional[Offset],
+      end: Optional[Offset]): Unit = {
+    this.start = start.orElse(
+      RateStreamSourceV2.createInitialOffset(numPartitions, creationTimeMs))
+      .asInstanceOf[RateStreamOffset]
+
+    this.end = end.orElse {
+      val currentTime = clock.getTimeMillis()
+      RateStreamOffset(
+        this.start.partitionToValueAndRunTimeMs.map {
+          case startOffset @ (part, ValueRunTimeMsPair(currentVal, currentReadTime)) =>
+            // Calculate the number of rows we should advance in this partition (based on the
+            // current time), and output a corresponding offset.
+            val readInterval = currentTime - currentReadTime
+            val numNewRows = readInterval / msPerPartitionBetweenRows
+            if (numNewRows <= 0) {
+              startOffset
+            } else {
+              (part, ValueRunTimeMsPair(
+                  currentVal + (numNewRows * numPartitions),
+                  currentReadTime + (numNewRows * msPerPartitionBetweenRows)))
+            }
+        }
+      )
+    }.asInstanceOf[RateStreamOffset]
+  }
+
+  override def getStartOffset(): Offset = {
+    if (start == null) throw new IllegalStateException("start offset not set")
+    start
+  }
+  override def getEndOffset(): Offset = {
+    if (end == null) throw new IllegalStateException("end offset not set")
+    end
+  }
+
+  override def deserializeOffset(json: String): Offset = {
+    RateStreamOffset(Serialization.read[Map[Int, ValueRunTimeMsPair]](json))
+  }
+
+  override def createReadTasks(): java.util.List[ReadTask[Row]] = {
+    val startMap = start.partitionToValueAndRunTimeMs
+    val endMap = end.partitionToValueAndRunTimeMs
+    endMap.keys.toSeq.map { part =>
+      val ValueRunTimeMsPair(endVal, _) = endMap(part)
+      val ValueRunTimeMsPair(startVal, startTimeMs) = startMap(part)
+
+      val packedRows = mutable.ListBuffer[(Long, Long)]()
+      var outVal = startVal + numPartitions
+      var outTimeMs = startTimeMs
+      while (outVal <= endVal) {
+        packedRows.append((outTimeMs, outVal))
+        outVal += numPartitions
+        outTimeMs += msPerPartitionBetweenRows
+      }
+
+      RateStreamBatchTask(packedRows).asInstanceOf[ReadTask[Row]]
+    }.toList.asJava
+  }
+
+  override def commit(end: Offset): Unit = {}
+  override def stop(): Unit = {}
+}
+
+case class RateStreamBatchTask(vals: Seq[(Long, Long)]) extends ReadTask[Row] {
+  override def createDataReader(): DataReader[Row] = new RateStreamBatchReader(vals)
+}
+
+class RateStreamBatchReader(vals: Seq[(Long, Long)]) extends DataReader[Row] {
+  var currentIndex = -1
+
+  override def next(): Boolean = {
+    // Return true as long as the new index is in the seq.
+    currentIndex += 1
+    currentIndex < vals.size
+  }
+
+  override def get(): Row = {
+    Row(
+      DateTimeUtils.toJavaTimestamp(DateTimeUtils.fromMillis(vals(currentIndex)._1)),
+      vals(currentIndex)._2)
+  }
+
+  override def close(): Unit = {}
+}
+
+object RateStreamSourceV2 {
+  val NUM_PARTITIONS = "numPartitions"
+  val ROWS_PER_SECOND = "rowsPerSecond"
+
+  private[sql] def createInitialOffset(numPartitions: Int, creationTimeMs: Long) = {
+    RateStreamOffset(
+      Range(0, numPartitions).map { i =>
+        // Note that the starting offset is exclusive, so we have to decrement the starting value
+        // by the increment that will later be applied. The first row output in each
+        // partition will have a value equal to the partition index.
+        (i,
+          ValueRunTimeMsPair(
+            (i - numPartitions).toLong,
+            creationTimeMs))
+      }.toMap)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
new file mode 100644
index 0000000000000..da7c31cf62428
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memoryV2.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.sources
+
+import javax.annotation.concurrent.GuardedBy
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+import scala.util.control.NonFatal
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.{Append, Complete, Update}
+import org.apache.spark.sql.execution.streaming.Sink
+import org.apache.spark.sql.sources.v2.{DataSourceV2, DataSourceV2Options}
+import org.apache.spark.sql.sources.v2.streaming.{ContinuousWriteSupport, MicroBatchWriteSupport}
+import org.apache.spark.sql.sources.v2.streaming.writer.ContinuousWriter
+import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.sql.types.StructType
+
+/**
+ * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
+ * tests and does not provide durability.
+ */
+class MemorySinkV2 extends DataSourceV2
+  with MicroBatchWriteSupport with ContinuousWriteSupport with Logging {
+
+  override def createMicroBatchWriter(
+      queryId: String,
+      batchId: Long,
+      schema: StructType,
+      mode: OutputMode,
+      options: DataSourceV2Options): java.util.Optional[DataSourceV2Writer] = {
+    java.util.Optional.of(new MemoryWriter(this, batchId, mode))
+  }
+
+  override def createContinuousWriter(
+      queryId: String,
+      schema: StructType,
+      mode: OutputMode,
+      options: DataSourceV2Options): java.util.Optional[ContinuousWriter] = {
+    java.util.Optional.of(new ContinuousMemoryWriter(this, mode))
+  }
+
+  private case class AddedData(batchId: Long, data: Array[Row])
+
+  /** An order list of batches that have been written to this [[Sink]]. */
+  @GuardedBy("this")
+  private val batches = new ArrayBuffer[AddedData]()
+
+  /** Returns all rows that are stored in this [[Sink]]. */
+  def allData: Seq[Row] = synchronized {
+    batches.flatMap(_.data)
+  }
+
+  def latestBatchId: Option[Long] = synchronized {
+    batches.lastOption.map(_.batchId)
+  }
+
+  def latestBatchData: Seq[Row] = synchronized {
+    batches.lastOption.toSeq.flatten(_.data)
+  }
+
+  def toDebugString: String = synchronized {
+    batches.map { case AddedData(batchId, data) =>
+      val dataStr = try data.mkString(" ") catch {
+        case NonFatal(e) => "[Error converting to string]"
+      }
+      s"$batchId: $dataStr"
+    }.mkString("\n")
+  }
+
+  def write(batchId: Long, outputMode: OutputMode, newRows: Array[Row]): Unit = {
+    val notCommitted = synchronized {
+      latestBatchId.isEmpty || batchId > latestBatchId.get
+    }
+    if (notCommitted) {
+      logDebug(s"Committing batch $batchId to $this")
+      outputMode match {
+        case Append | Update =>
+          val rows = AddedData(batchId, newRows)
+          synchronized { batches += rows }
+
+        case Complete =>
+          val rows = AddedData(batchId, newRows)
+          synchronized {
+            batches.clear()
+            batches += rows
+          }
+
+        case _ =>
+          throw new IllegalArgumentException(
+            s"Output mode $outputMode is not supported by MemorySink")
+      }
+    } else {
+      logDebug(s"Skipping already committed batch: $batchId")
+    }
+  }
+
+  def clear(): Unit = synchronized {
+    batches.clear()
+  }
+
+  override def toString(): String = "MemorySink"
+}
+
+case class MemoryWriterCommitMessage(partition: Int, data: Seq[Row]) extends WriterCommitMessage {}
+
+class MemoryWriter(sink: MemorySinkV2, batchId: Long, outputMode: OutputMode)
+  extends DataSourceV2Writer with Logging {
+
+  override def createWriterFactory: MemoryWriterFactory = MemoryWriterFactory(outputMode)
+
+  def commit(messages: Array[WriterCommitMessage]): Unit = {
+    val newRows = messages.flatMap {
+      case message: MemoryWriterCommitMessage => message.data
+    }
+    sink.write(batchId, outputMode, newRows)
+  }
+
+  override def abort(messages: Array[WriterCommitMessage]): Unit = {
+    // Don't accept any of the new input.
+  }
+}
+
+class ContinuousMemoryWriter(val sink: MemorySinkV2, outputMode: OutputMode)
+  extends ContinuousWriter {
+
+  override def createWriterFactory: MemoryWriterFactory = MemoryWriterFactory(outputMode)
+
+  override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
+    val newRows = messages.flatMap {
+      case message: MemoryWriterCommitMessage => message.data
+    }
+    sink.write(epochId, outputMode, newRows)
+  }
+
+  override def abort(messages: Array[WriterCommitMessage]): Unit = {
+    // Don't accept any of the new input.
+  }
+}
+
+case class MemoryWriterFactory(outputMode: OutputMode) extends DataWriterFactory[Row] {
+  def createDataWriter(partitionId: Int, attemptNumber: Int): DataWriter[Row] = {
+    new MemoryDataWriter(partitionId, outputMode)
+  }
+}
+
+class MemoryDataWriter(partition: Int, outputMode: OutputMode)
+  extends DataWriter[Row] with Logging {
+
+  private val data = mutable.Buffer[Row]()
+
+  override def write(row: Row): Unit = {
+    data.append(row)
+  }
+
+  override def commit(): MemoryWriterCommitMessage = {
+    val msg = MemoryWriterCommitMessage(partition, data.clone())
+    data.clear()
+    msg
+  }
+
+  override def abort(): Unit = {}
+}
+
+
+/**
+ * Used to query the data that has been written into a [[MemorySink]].
+ */
+case class MemoryPlanV2(sink: MemorySinkV2, override val output: Seq[Attribute]) extends LeafNode {
+  private val sizePerRow = output.map(_.dataType.defaultSize).sum
+
+  override def computeStats(): Statistics = Statistics(sizePerRow * sink.allData.size)
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithState_StateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithState_StateManager.scala
deleted file mode 100644
index e49546830286b..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithState_StateManager.scala
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.streaming.state
-
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BoundReference, CaseWhen, CreateNamedStruct, GetStructField, IsNull, Literal, UnsafeRow}
-import org.apache.spark.sql.execution.ObjectOperator
-import org.apache.spark.sql.execution.streaming.GroupStateImpl
-import org.apache.spark.sql.execution.streaming.GroupStateImpl.NO_TIMESTAMP
-import org.apache.spark.sql.types.{IntegerType, LongType, StructType}
-
-
-/**
- * Class to serialize/write/read/deserialize state for
- * [[org.apache.spark.sql.execution.streaming.FlatMapGroupsWithStateExec]].
- */
-class FlatMapGroupsWithState_StateManager(
-    stateEncoder: ExpressionEncoder[Any],
-    shouldStoreTimestamp: Boolean) extends Serializable {
-
-  /** Schema of the state rows saved in the state store */
-  val stateSchema = {
-    val schema = new StructType().add("groupState", stateEncoder.schema, nullable = true)
-    if (shouldStoreTimestamp) schema.add("timeoutTimestamp", LongType) else schema
-  }
-
-  /** Get deserialized state and corresponding timeout timestamp for a key */
-  def getState(store: StateStore, keyRow: UnsafeRow): FlatMapGroupsWithState_StateData = {
-    val stateRow = store.get(keyRow)
-    stateDataForGets.withNew(
-      keyRow, stateRow, getStateObj(stateRow), getTimestamp(stateRow))
-  }
-
-  /** Put state and timeout timestamp for a key */
-  def putState(store: StateStore, keyRow: UnsafeRow, state: Any, timestamp: Long): Unit = {
-    val stateRow = getStateRow(state)
-    setTimestamp(stateRow, timestamp)
-    store.put(keyRow, stateRow)
-  }
-
-  /** Removed all information related to a key */
-  def removeState(store: StateStore, keyRow: UnsafeRow): Unit = {
-    store.remove(keyRow)
-  }
-
-  /** Get all the keys and corresponding state rows in the state store */
-  def getAllState(store: StateStore): Iterator[FlatMapGroupsWithState_StateData] = {
-    val stateDataForGetAllState = FlatMapGroupsWithState_StateData()
-    store.getRange(None, None).map { pair =>
-      stateDataForGetAllState.withNew(
-        pair.key, pair.value, getStateObjFromRow(pair.value), getTimestamp(pair.value))
-    }
-  }
-
-  // Ordinals of the information stored in the state row
-  private lazy val nestedStateOrdinal = 0
-  private lazy val timeoutTimestampOrdinal = 1
-
-  // Get the serializer for the state, taking into account whether we need to save timestamps
-  private val stateSerializer = {
-    val nestedStateExpr = CreateNamedStruct(
-      stateEncoder.namedExpressions.flatMap(e => Seq(Literal(e.name), e)))
-    if (shouldStoreTimestamp) {
-      Seq(nestedStateExpr, Literal(GroupStateImpl.NO_TIMESTAMP))
-    } else {
-      Seq(nestedStateExpr)
-    }
-  }
-
-  // Get the deserializer for the state. Note that this must be done in the driver, as
-  // resolving and binding of deserializer expressions to the encoded type can be safely done
-  // only in the driver.
-  private val stateDeserializer = {
-    val boundRefToNestedState = BoundReference(nestedStateOrdinal, stateEncoder.schema, true)
-    val deser = stateEncoder.resolveAndBind().deserializer.transformUp {
-      case BoundReference(ordinal, _, _) => GetStructField(boundRefToNestedState, ordinal)
-    }
-    CaseWhen(Seq(IsNull(boundRefToNestedState) -> Literal(null)), elseValue = deser)
-  }
-
-  // Converters for translating state between rows and Java objects
-  private lazy val getStateObjFromRow = ObjectOperator.deserializeRowToObject(
-    stateDeserializer, stateSchema.toAttributes)
-  private lazy val getStateRowFromObj = ObjectOperator.serializeObjectToRow(stateSerializer)
-
-  // Reusable instance for returning state information
-  private lazy val stateDataForGets = FlatMapGroupsWithState_StateData()
-
-  /** Returns the state as Java object if defined */
-  private def getStateObj(stateRow: UnsafeRow): Any = {
-    if (stateRow == null) null
-    else getStateObjFromRow(stateRow)
-  }
-
-  /** Returns the row for an updated state */
-  private def getStateRow(obj: Any): UnsafeRow = {
-    val row = getStateRowFromObj(obj)
-    if (obj == null) {
-      row.setNullAt(nestedStateOrdinal)
-    }
-    row
-  }
-
-  /** Returns the timeout timestamp of a state row is set */
-  private def getTimestamp(stateRow: UnsafeRow): Long = {
-    if (shouldStoreTimestamp && stateRow != null) {
-      stateRow.getLong(timeoutTimestampOrdinal)
-    } else NO_TIMESTAMP
-  }
-
-  /** Set the timestamp in a state row */
-  private def setTimestamp(stateRow: UnsafeRow, timeoutTimestamps: Long): Unit = {
-    if (shouldStoreTimestamp) stateRow.setLong(timeoutTimestampOrdinal, timeoutTimestamps)
-  }
-}
-
-/**
- * Class to capture deserialized state and timestamp return by the state manager.
- * This is intended for reuse.
- */
-case class FlatMapGroupsWithState_StateData(
-    var keyRow: UnsafeRow = null,
-    var stateRow: UnsafeRow = null,
-    var stateObj: Any = null,
-    var timeoutTimestamp: Long = -1) {
-  def withNew(
-      newKeyRow: UnsafeRow,
-      newStateRow: UnsafeRow,
-      newStateObj: Any,
-      newTimeout: Long): this.type = {
-    keyRow = newKeyRow
-    stateRow = newStateRow
-    stateObj = newStateObj
-    timeoutTimestamp = newTimeout
-    this
-  }
-}
-
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
index 43cec4807ae4d..73a105266e1c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
@@ -27,17 +27,14 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler._
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.metric._
-import org.apache.spark.status.LiveEntity
+import org.apache.spark.sql.internal.StaticSQLConf._
+import org.apache.spark.status.{ElementTrackingStore, KVUtils, LiveEntity}
 import org.apache.spark.status.config._
-import org.apache.spark.ui.SparkUI
-import org.apache.spark.util.kvstore.KVStore
 
-private[sql] class SQLAppStatusListener(
+class SQLAppStatusListener(
     conf: SparkConf,
-    kvstore: KVStore,
-    live: Boolean,
-    ui: Option[SparkUI] = None)
-  extends SparkListener with Logging {
+    kvstore: ElementTrackingStore,
+    live: Boolean) extends SparkListener with Logging {
 
   // How often to flush intermediate state of a live execution to the store. When replaying logs,
   // never flush (only do the very last write).
@@ -49,7 +46,27 @@ private[sql] class SQLAppStatusListener(
   private val liveExecutions = new ConcurrentHashMap[Long, LiveExecutionData]()
   private val stageMetrics = new ConcurrentHashMap[Int, LiveStageMetrics]()
 
-  private var uiInitialized = false
+  // Returns true if this listener has no live data. Exposed for tests only.
+  private[sql] def noLiveData(): Boolean = {
+    liveExecutions.isEmpty && stageMetrics.isEmpty
+  }
+
+  kvstore.addTrigger(classOf[SQLExecutionUIData], conf.get(UI_RETAINED_EXECUTIONS)) { count =>
+    cleanupExecutions(count)
+  }
+
+  kvstore.onFlush {
+    if (!live) {
+      val now = System.nanoTime()
+      liveExecutions.values.asScala.foreach { exec =>
+        // This saves the partial aggregated metrics to the store; this works currently because
+        // when the SHS sees an updated event log, all old data for the application is thrown
+        // away.
+        exec.metricsValues = aggregateMetrics(exec)
+        exec.write(kvstore, now)
+      }
+    }
+  }
 
   override def onJobStart(event: SparkListenerJobStart): Unit = {
     val executionIdString = event.properties.getProperty(SQLExecution.EXECUTION_ID_KEY)
@@ -70,7 +87,7 @@ private[sql] class SQLAppStatusListener(
     }
 
     exec.jobs = exec.jobs + (jobId -> JobExecutionStatus.RUNNING)
-    exec.stages = event.stageIds.toSet
+    exec.stages ++= event.stageIds.toSet
     update(exec)
   }
 
@@ -82,7 +99,7 @@ private[sql] class SQLAppStatusListener(
     // Reset the metrics tracking object for the new attempt.
     Option(stageMetrics.get(event.stageInfo.stageId)).foreach { metrics =>
       metrics.taskMetrics.clear()
-      metrics.attemptId = event.stageInfo.attemptId
+      metrics.attemptId = event.stageInfo.attemptNumber
     }
   }
 
@@ -158,7 +175,7 @@ private[sql] class SQLAppStatusListener(
 
     // Check the execution again for whether the aggregated metrics data has been calculated.
     // This can happen if the UI is requesting this data, and the onExecutionEnd handler is
-    // running at the same time. The metrics calculcated for the UI can be innacurate in that
+    // running at the same time. The metrics calculated for the UI can be innacurate in that
     // case, since the onExecutionEnd handler will clean up tracked stage metrics.
     if (exec.metricsValues != null) {
       exec.metricsValues
@@ -212,14 +229,6 @@ private[sql] class SQLAppStatusListener(
   }
 
   private def onExecutionStart(event: SparkListenerSQLExecutionStart): Unit = {
-    // Install the SQL tab in a live app if it hasn't been initialized yet.
-    if (!uiInitialized) {
-      ui.foreach { _ui =>
-        new SQLTab(new SQLAppStatusStore(kvstore, Some(this)), _ui)
-      }
-      uiInitialized = true
-    }
-
     val SparkListenerSQLExecutionStart(executionId, description, details,
       physicalPlanDescription, sparkPlanInfo, time) = event
 
@@ -317,6 +326,17 @@ private[sql] class SQLAppStatusListener(
     }
   }
 
+  private def cleanupExecutions(count: Long): Unit = {
+    val countToDelete = count - conf.get(UI_RETAINED_EXECUTIONS)
+    if (countToDelete <= 0) {
+      return
+    }
+
+    val toDelete = KVUtils.viewToSeq(kvstore.view(classOf[SQLExecutionUIData]),
+        countToDelete.toInt) { e => e.completionTime.isDefined }
+    toDelete.foreach { e => kvstore.delete(e.getClass(), e.executionId) }
+  }
+
 }
 
 private class LiveExecutionData(val executionId: Long) extends LiveEntity {
@@ -360,7 +380,7 @@ private class LiveStageMetrics(
     val accumulatorIds: Array[Long],
     val taskMetrics: ConcurrentHashMap[Long, LiveTaskMetrics])
 
-private[sql] class LiveTaskMetrics(
+private class LiveTaskMetrics(
     val ids: Array[Long],
     val values: Array[Long],
     val succeeded: Boolean)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
index 586d3ae411c74..910f2e52fdbb3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
@@ -25,21 +25,17 @@ import scala.collection.mutable.ArrayBuffer
 
 import com.fasterxml.jackson.databind.annotation.JsonDeserialize
 
-import org.apache.spark.{JobExecutionStatus, SparkConf}
-import org.apache.spark.scheduler.SparkListener
-import org.apache.spark.status.AppStatusPlugin
+import org.apache.spark.JobExecutionStatus
 import org.apache.spark.status.KVUtils.KVIndexParam
-import org.apache.spark.ui.SparkUI
-import org.apache.spark.util.Utils
 import org.apache.spark.util.kvstore.KVStore
 
 /**
  * Provides a view of a KVStore with methods that make it easy to query SQL-specific state. There's
  * no state kept in this class, so it's ok to have multiple instances of it in an application.
  */
-private[sql] class SQLAppStatusStore(
+class SQLAppStatusStore(
     store: KVStore,
-    listener: Option[SQLAppStatusListener] = None) {
+    val listener: Option[SQLAppStatusListener] = None) {
 
   def executionsList(): Seq[SQLExecutionUIData] = {
     store.view(classOf[SQLExecutionUIData]).asScala.toSeq
@@ -74,47 +70,9 @@ private[sql] class SQLAppStatusStore(
   def planGraph(executionId: Long): SparkPlanGraph = {
     store.read(classOf[SparkPlanGraphWrapper], executionId).toSparkPlanGraph()
   }
-
-}
-
-/**
- * An AppStatusPlugin for handling the SQL UI and listeners.
- */
-private[sql] class SQLAppStatusPlugin extends AppStatusPlugin {
-
-  override def setupListeners(
-      conf: SparkConf,
-      store: KVStore,
-      addListenerFn: SparkListener => Unit,
-      live: Boolean): Unit = {
-    // For live applications, the listener is installed in [[setupUI]]. This also avoids adding
-    // the listener when the UI is disabled. Force installation during testing, though.
-    if (!live || Utils.isTesting) {
-      val listener = new SQLAppStatusListener(conf, store, live, None)
-      addListenerFn(listener)
-    }
-  }
-
-  override def setupUI(ui: SparkUI): Unit = {
-    ui.sc match {
-      case Some(sc) =>
-        // If this is a live application, then install a listener that will enable the SQL
-        // tab as soon as there's a SQL event posted to the bus.
-        val listener = new SQLAppStatusListener(sc.conf, ui.store.store, true, Some(ui))
-        sc.listenerBus.addToStatusQueue(listener)
-
-      case _ =>
-        // For a replayed application, only add the tab if the store already contains SQL data.
-        val sqlStore = new SQLAppStatusStore(ui.store.store)
-        if (sqlStore.executionsCount() > 0) {
-          new SQLTab(sqlStore, ui)
-        }
-    }
-  }
-
 }
 
-private[sql] class SQLExecutionUIData(
+class SQLExecutionUIData(
     @KVIndexParam val executionId: Long,
     val description: String,
     val details: String,
@@ -132,10 +90,9 @@ private[sql] class SQLExecutionUIData(
      * from the SQL listener instance.
      */
     @JsonDeserialize(keyAs = classOf[JLong])
-    val metricValues: Map[Long, String]
-    )
+    val metricValues: Map[Long, String])
 
-private[sql] class SparkPlanGraphWrapper(
+class SparkPlanGraphWrapper(
     @KVIndexParam val executionId: Long,
     val nodes: Seq[SparkPlanGraphNodeWrapper],
     val edges: Seq[SparkPlanGraphEdge]) {
@@ -146,7 +103,7 @@ private[sql] class SparkPlanGraphWrapper(
 
 }
 
-private[sql] class SparkPlanGraphClusterWrapper(
+class SparkPlanGraphClusterWrapper(
     val id: Long,
     val name: String,
     val desc: String,
@@ -162,7 +119,7 @@ private[sql] class SparkPlanGraphClusterWrapper(
 }
 
 /** Only one of the values should be set. */
-private[sql] class SparkPlanGraphNodeWrapper(
+class SparkPlanGraphNodeWrapper(
     val node: SparkPlanGraphNode,
     val cluster: SparkPlanGraphClusterWrapper) {
 
@@ -173,7 +130,7 @@ private[sql] class SparkPlanGraphNodeWrapper(
 
 }
 
-private[sql] case class SQLPlanMetric(
+case class SQLPlanMetric(
     name: String,
     accumulatorId: Long,
     metricType: String)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLHistoryServerPlugin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLHistoryServerPlugin.scala
new file mode 100644
index 0000000000000..522d0cf79bffa
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLHistoryServerPlugin.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.ui
+
+import org.apache.spark.SparkConf
+import org.apache.spark.scheduler.SparkListener
+import org.apache.spark.status.{AppHistoryServerPlugin, ElementTrackingStore}
+import org.apache.spark.ui.SparkUI
+
+class SQLHistoryServerPlugin extends AppHistoryServerPlugin {
+  override def createListeners(conf: SparkConf, store: ElementTrackingStore): Seq[SparkListener] = {
+    Seq(new SQLAppStatusListener(conf, store, live = false))
+  }
+
+  override def setupUI(ui: SparkUI): Unit = {
+    val sqlStatusStore = new SQLAppStatusStore(ui.store.store)
+    if (sqlStatusStore.executionsCount() > 0) {
+      new SQLTab(sqlStatusStore, ui)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
index 058c38c8cb8f4..1e076207bc607 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
@@ -86,7 +86,7 @@ abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
   def bufferEncoder: Encoder[BUF]
 
   /**
-   * Specifies the `Encoder` for the final ouput value type.
+   * Specifies the `Encoder` for the final output value type.
    * @since 2.0.0
    */
   def outputEncoder: Encoder[OUT]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 03b654f830520..40a058d2cadd2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -66,6 +66,7 @@ case class UserDefinedFunction protected[sql] (
    *
    * @since 1.3.0
    */
+  @scala.annotation.varargs
   def apply(exprs: Column*): Column = {
     Column(ScalaUDF(
       f,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 3e4659b9eae60..0d11682d80a3c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -24,6 +24,7 @@ import scala.util.Try
 import scala.util.control.NonFatal
 
 import org.apache.spark.annotation.InterfaceStability
+import org.apache.spark.sql.api.java._
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedFunction}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -32,7 +33,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, ResolvedHint}
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.expressions.UserDefinedFunction
-import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -2171,7 +2171,8 @@ object functions {
   def base64(e: Column): Column = withExpr { Base64(e.expr) }
 
   /**
-   * Concatenates multiple input string columns together into a single string column.
+   * Concatenates multiple input columns together into a single column.
+   * If all inputs are binary, concat returns an output as binary. Otherwise, it returns as string.
    *
    * @group string_funcs
    * @since 1.5.0
@@ -2797,6 +2798,21 @@ object functions {
     TruncDate(date.expr, Literal(format))
   }
 
+  /**
+   * Returns timestamp truncated to the unit specified by the format.
+   *
+   * @param format: 'year', 'yyyy', 'yy' for truncate by year,
+   *                'month', 'mon', 'mm' for truncate by month,
+   *                'day', 'dd' for truncate by day,
+   *                Other options are: 'second', 'minute', 'hour', 'week', 'month', 'quarter'
+   *
+   * @group datetime_funcs
+   * @since 2.3.0
+   */
+  def date_trunc(format: String, timestamp: Column): Column = withExpr {
+    TruncTimestamp(Literal(format), timestamp.expr)
+  }
+
   /**
    * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders
    * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield
@@ -3238,42 +3254,66 @@ object functions {
    */
   def map_values(e: Column): Column = withExpr { MapValues(e.expr) }
 
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
   // scalastyle:off line.size.limit
   // scalastyle:off parameter.number
 
   /* Use the following code to generate:
-  (0 to 10).map { x =>
+
+  (0 to 10).foreach { x =>
     val types = (1 to x).foldRight("RT")((i, s) => {s"A$i, $s"})
     val typeTags = (1 to x).map(i => s"A$i: TypeTag").foldLeft("RT: TypeTag")(_ + ", " + _)
     val inputTypes = (1 to x).foldRight("Nil")((i, s) => {s"ScalaReflection.schemaFor(typeTag[A$i]).dataType :: $s"})
     println(s"""
-    /**
-     * Defines a deterministic user-defined function of ${x} arguments as user-defined
-     * function (UDF). The data types are automatically inferred based on the function's
-     * signature. To change a UDF to nondeterministic, call the API
-     * `UserDefinedFunction.asNondeterministic()`.
-     *
-     * @group udf_funcs
-     * @since 1.3.0
-     */
-    def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = {
-      val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
-      val inputTypes = Try($inputTypes).toOption
-      val udf = UserDefinedFunction(f, dataType, inputTypes)
-      if (nullable) udf else udf.asNonNullable()
-    }""")
+      |/**
+      | * Defines a Scala closure of $x arguments as user-defined function (UDF).
+      | * The data types are automatically inferred based on the Scala closure's
+      | * signature. By default the returned UDF is deterministic. To change it to
+      | * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+      | *
+      | * @group udf_funcs
+      | * @since 1.3.0
+      | */
+      |def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = {
+      |  val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
+      |  val inputTypes = Try($inputTypes).toOption
+      |  val udf = UserDefinedFunction(f, dataType, inputTypes)
+      |  if (nullable) udf else udf.asNonNullable()
+      |}""".stripMargin)
+  }
+
+  (0 to 10).foreach { i =>
+    val extTypeArgs = (0 to i).map(_ => "_").mkString(", ")
+    val anyTypeArgs = (0 to i).map(_ => "Any").mkString(", ")
+    val anyCast = s".asInstanceOf[UDF$i[$anyTypeArgs]]"
+    val anyParams = (1 to i).map(_ => "_: Any").mkString(", ")
+    val funcCall = if (i == 0) "() => func" else "func"
+    println(s"""
+      |/**
+      | * Defines a Java UDF$i instance as user-defined function (UDF).
+      | * The caller must specify the output data type, and there is no automatic input type coercion.
+      | * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+      | * API `UserDefinedFunction.asNondeterministic()`.
+      | *
+      | * @group udf_funcs
+      | * @since 2.3.0
+      | */
+      |def udf(f: UDF$i[$extTypeArgs], returnType: DataType): UserDefinedFunction = {
+      |  val func = f$anyCast.call($anyParams)
+      |  UserDefinedFunction($funcCall, returnType, inputTypes = None)
+      |}""".stripMargin)
   }
 
   */
 
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Scala UDF functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
   /**
-   * Defines a deterministic user-defined function of 0 arguments as user-defined
-   * function (UDF). The data types are automatically inferred based on the function's
-   * signature. To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`.
+   * Defines a Scala closure of 0 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the Scala closure's
+   * signature. By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
    * @since 1.3.0
@@ -3286,10 +3326,10 @@ object functions {
   }
 
   /**
-   * Defines a deterministic user-defined function of 1 arguments as user-defined
-   * function (UDF). The data types are automatically inferred based on the function's
-   * signature. To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`.
+   * Defines a Scala closure of 1 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the Scala closure's
+   * signature. By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
    * @since 1.3.0
@@ -3302,10 +3342,10 @@ object functions {
   }
 
   /**
-   * Defines a deterministic user-defined function of 2 arguments as user-defined
-   * function (UDF). The data types are automatically inferred based on the function's
-   * signature. To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`.
+   * Defines a Scala closure of 2 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the Scala closure's
+   * signature. By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
    * @since 1.3.0
@@ -3318,10 +3358,10 @@ object functions {
   }
 
   /**
-   * Defines a deterministic user-defined function of 3 arguments as user-defined
-   * function (UDF). The data types are automatically inferred based on the function's
-   * signature. To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`.
+   * Defines a Scala closure of 3 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the Scala closure's
+   * signature. By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
    * @since 1.3.0
@@ -3334,10 +3374,10 @@ object functions {
   }
 
   /**
-   * Defines a deterministic user-defined function of 4 arguments as user-defined
-   * function (UDF). The data types are automatically inferred based on the function's
-   * signature. To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`.
+   * Defines a Scala closure of 4 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the Scala closure's
+   * signature. By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
    * @since 1.3.0
@@ -3350,10 +3390,10 @@ object functions {
   }
 
   /**
-   * Defines a deterministic user-defined function of 5 arguments as user-defined
-   * function (UDF). The data types are automatically inferred based on the function's
-   * signature. To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`.
+   * Defines a Scala closure of 5 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the Scala closure's
+   * signature. By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
    * @since 1.3.0
@@ -3366,10 +3406,10 @@ object functions {
   }
 
   /**
-   * Defines a deterministic user-defined function of 6 arguments as user-defined
-   * function (UDF). The data types are automatically inferred based on the function's
-   * signature. To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`.
+   * Defines a Scala closure of 6 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the Scala closure's
+   * signature. By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
    * @since 1.3.0
@@ -3382,10 +3422,10 @@ object functions {
   }
 
   /**
-   * Defines a deterministic user-defined function of 7 arguments as user-defined
-   * function (UDF). The data types are automatically inferred based on the function's
-   * signature. To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`.
+   * Defines a Scala closure of 7 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the Scala closure's
+   * signature. By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
    * @since 1.3.0
@@ -3398,10 +3438,10 @@ object functions {
   }
 
   /**
-   * Defines a deterministic user-defined function of 8 arguments as user-defined
-   * function (UDF). The data types are automatically inferred based on the function's
-   * signature. To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`.
+   * Defines a Scala closure of 8 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the Scala closure's
+   * signature. By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
    * @since 1.3.0
@@ -3414,10 +3454,10 @@ object functions {
   }
 
   /**
-   * Defines a deterministic user-defined function of 9 arguments as user-defined
-   * function (UDF). The data types are automatically inferred based on the function's
-   * signature. To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`.
+   * Defines a Scala closure of 9 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the Scala closure's
+   * signature. By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
    * @since 1.3.0
@@ -3430,10 +3470,10 @@ object functions {
   }
 
   /**
-   * Defines a deterministic user-defined function of 10 arguments as user-defined
-   * function (UDF). The data types are automatically inferred based on the function's
-   * signature. To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`.
+   * Defines a Scala closure of 10 arguments as user-defined function (UDF).
+   * The data types are automatically inferred based on the Scala closure's
+   * signature. By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
    * @since 1.3.0
@@ -3445,13 +3485,172 @@ object functions {
     if (nullable) udf else udf.asNonNullable()
   }
 
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Java UDF functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Defines a Java UDF0 instance as user-defined function (UDF).
+   * The caller must specify the output data type, and there is no automatic input type coercion.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 2.3.0
+   */
+  def udf(f: UDF0[_], returnType: DataType): UserDefinedFunction = {
+    val func = f.asInstanceOf[UDF0[Any]].call()
+    UserDefinedFunction(() => func, returnType, inputTypes = None)
+  }
+
+  /**
+   * Defines a Java UDF1 instance as user-defined function (UDF).
+   * The caller must specify the output data type, and there is no automatic input type coercion.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 2.3.0
+   */
+  def udf(f: UDF1[_, _], returnType: DataType): UserDefinedFunction = {
+    val func = f.asInstanceOf[UDF1[Any, Any]].call(_: Any)
+    UserDefinedFunction(func, returnType, inputTypes = None)
+  }
+
+  /**
+   * Defines a Java UDF2 instance as user-defined function (UDF).
+   * The caller must specify the output data type, and there is no automatic input type coercion.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 2.3.0
+   */
+  def udf(f: UDF2[_, _, _], returnType: DataType): UserDefinedFunction = {
+    val func = f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any)
+    UserDefinedFunction(func, returnType, inputTypes = None)
+  }
+
+  /**
+   * Defines a Java UDF3 instance as user-defined function (UDF).
+   * The caller must specify the output data type, and there is no automatic input type coercion.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 2.3.0
+   */
+  def udf(f: UDF3[_, _, _, _], returnType: DataType): UserDefinedFunction = {
+    val func = f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any)
+    UserDefinedFunction(func, returnType, inputTypes = None)
+  }
+
+  /**
+   * Defines a Java UDF4 instance as user-defined function (UDF).
+   * The caller must specify the output data type, and there is no automatic input type coercion.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 2.3.0
+   */
+  def udf(f: UDF4[_, _, _, _, _], returnType: DataType): UserDefinedFunction = {
+    val func = f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any)
+    UserDefinedFunction(func, returnType, inputTypes = None)
+  }
+
+  /**
+   * Defines a Java UDF5 instance as user-defined function (UDF).
+   * The caller must specify the output data type, and there is no automatic input type coercion.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 2.3.0
+   */
+  def udf(f: UDF5[_, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
+    val func = f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any)
+    UserDefinedFunction(func, returnType, inputTypes = None)
+  }
+
+  /**
+   * Defines a Java UDF6 instance as user-defined function (UDF).
+   * The caller must specify the output data type, and there is no automatic input type coercion.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 2.3.0
+   */
+  def udf(f: UDF6[_, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
+    val func = f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
+    UserDefinedFunction(func, returnType, inputTypes = None)
+  }
+
+  /**
+   * Defines a Java UDF7 instance as user-defined function (UDF).
+   * The caller must specify the output data type, and there is no automatic input type coercion.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 2.3.0
+   */
+  def udf(f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
+    val func = f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
+    UserDefinedFunction(func, returnType, inputTypes = None)
+  }
+
+  /**
+   * Defines a Java UDF8 instance as user-defined function (UDF).
+   * The caller must specify the output data type, and there is no automatic input type coercion.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 2.3.0
+   */
+  def udf(f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
+    val func = f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
+    UserDefinedFunction(func, returnType, inputTypes = None)
+  }
+
+  /**
+   * Defines a Java UDF9 instance as user-defined function (UDF).
+   * The caller must specify the output data type, and there is no automatic input type coercion.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 2.3.0
+   */
+  def udf(f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
+    val func = f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
+    UserDefinedFunction(func, returnType, inputTypes = None)
+  }
+
+  /**
+   * Defines a Java UDF10 instance as user-defined function (UDF).
+   * The caller must specify the output data type, and there is no automatic input type coercion.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 2.3.0
+   */
+  def udf(f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
+    val func = f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
+    UserDefinedFunction(func, returnType, inputTypes = None)
+  }
+
   // scalastyle:on parameter.number
   // scalastyle:on line.size.limit
 
   /**
    * Defines a deterministic user-defined function (UDF) using a Scala closure. For this variant,
    * the caller must specify the output data type, and there is no automatic input type coercion.
-   * To change a UDF to nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
    *
    * @param f  A closure in Scala
    * @param dataType  The output data type of the UDF
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index fdd25330c5e67..6ae307bce10c8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -480,7 +480,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
     if (tableMetadata.tableType == CatalogTableType.VIEW) {
       // Temp or persistent views: refresh (or invalidate) any metadata/data cached
       // in the plan recursively.
-      table.queryExecution.analyzed.foreach(_.refresh())
+      table.queryExecution.analyzed.refresh()
     } else {
       // Non-temp tables: refresh the metadata cache.
       sessionCatalog.refreshTable(tableIdent)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
index b9515ec7bca2a..dac463641cfab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
@@ -73,6 +73,7 @@ object HiveSerDe {
     val key = source.toLowerCase(Locale.ROOT) match {
       case s if s.startsWith("org.apache.spark.sql.parquet") => "parquet"
       case s if s.startsWith("org.apache.spark.sql.orc") => "orc"
+      case s if s.startsWith("org.apache.spark.sql.hive.orc") => "orc"
       case s if s.equals("orcfile") => "orc"
       case s if s.equals("parquetfile") => "parquet"
       case s if s.equals("avrofile") => "avro"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 81359cedebea9..0ff39004fa008 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -28,11 +28,12 @@ import org.apache.hadoop.fs.FsUrlStreamHandlerFactory
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.internal.Logging
-import org.apache.spark.scheduler.LiveListenerBus
-import org.apache.spark.sql.{SparkSession, SQLContext}
+import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.CacheManager
+import org.apache.spark.sql.execution.ui.{SQLAppStatusListener, SQLAppStatusStore, SQLTab}
 import org.apache.spark.sql.internal.StaticSQLConf._
+import org.apache.spark.status.ElementTrackingStore
 import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
 
@@ -82,6 +83,19 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
    */
   val cacheManager: CacheManager = new CacheManager
 
+  /**
+   * A status store to query SQL status/metrics of this Spark application, based on SQL-specific
+   * [[org.apache.spark.scheduler.SparkListenerEvent]]s.
+   */
+  val statusStore: SQLAppStatusStore = {
+    val kvStore = sparkContext.statusStore.store.asInstanceOf[ElementTrackingStore]
+    val listener = new SQLAppStatusListener(sparkContext.conf, kvStore, live = true)
+    sparkContext.listenerBus.addToStatusQueue(listener)
+    val statusStore = new SQLAppStatusStore(kvStore, Some(listener))
+    sparkContext.ui.foreach(new SQLTab(statusStore, _))
+    statusStore
+  }
+
   /**
    * A catalog that interacts with external systems.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala
index f3bfea5f6bfc8..8b92c8b4f56b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.types.{DataType, MetadataBuilder}
 /**
  * AggregatedDialect can unify multiple dialects into one virtual Dialect.
  * Dialects are tried in order, and the first dialect that does not return a
- * neutral element will will.
+ * neutral element will win.
  *
  * @param dialects List of dialects.
  */
@@ -63,4 +63,8 @@ private class AggregatedDialect(dialects: List[JdbcDialect]) extends JdbcDialect
       case _ => Some(false)
     }
   }
+
+  override def getTruncateQuery(table: String): String = {
+    dialects.head.getTruncateQuery(table)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 7c38ed68c0413..83d87a11810c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -116,6 +116,18 @@ abstract class JdbcDialect extends Serializable {
     s"SELECT * FROM $table WHERE 1=0"
   }
 
+  /**
+   * The SQL query that should be used to truncate a table. Dialects can override this method to
+   * return a query that is suitable for a particular database. For PostgreSQL, for instance,
+   * a different query is used to prevent "TRUNCATE" affecting other tables.
+   * @param table The name of the table.
+   * @return The SQL query to use for truncating a table
+   */
+  @Since("2.3.0")
+  def getTruncateQuery(table: String): String = {
+    s"TRUNCATE TABLE $table"
+  }
+
   /**
    * Override connection specific properties to run before a select is made.  This is in place to
    * allow dialects that need special treatment to optimize behavior.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index e3f106c41c7ff..6ef77f24460be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -18,7 +18,10 @@
 package org.apache.spark.sql.jdbc
 
 import java.sql.{Date, Timestamp, Types}
+import java.util.TimeZone
 
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
@@ -29,6 +32,13 @@ private case object OracleDialect extends JdbcDialect {
 
   override def canHandle(url: String): Boolean = url.startsWith("jdbc:oracle")
 
+  private def supportTimeZoneTypes: Boolean = {
+    val timeZone = DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone)
+    // TODO: support timezone types when users are not using the JVM timezone, which
+    // is the default value of SESSION_LOCAL_TIMEZONE
+    timeZone == TimeZone.getDefault
+  }
+
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
     sqlType match {
@@ -49,7 +59,8 @@ private case object OracleDialect extends JdbcDialect {
           case _ if scale == -127L => Option(DecimalType(DecimalType.MAX_PRECISION, 10))
           case _ => None
         }
-      case TIMESTAMPTZ => Some(TimestampType) // Value for Timestamp with Time Zone in Oracle
+      case TIMESTAMPTZ if supportTimeZoneTypes
+        => Some(TimestampType) // Value for Timestamp with Time Zone in Oracle
       case BINARY_FLOAT => Some(FloatType) // Value for OracleTypes.BINARY_FLOAT
       case BINARY_DOUBLE => Some(DoubleType) // Value for OracleTypes.BINARY_DOUBLE
       case _ => None
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index 4f61a328f47ca..13a2035f4d0c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -85,6 +85,17 @@ private object PostgresDialect extends JdbcDialect {
     s"SELECT 1 FROM $table LIMIT 1"
   }
 
+  /**
+  * The SQL query used to truncate a table. For Postgres, the default behaviour is to
+  * also truncate any descendant tables. As this is a (possibly unwanted) side-effect,
+  * the Postgres dialect adds 'ONLY' to truncate only the table in question
+  * @param table The name of the table.
+  * @return The SQL query to use for truncating a table
+  */
+  override def getTruncateQuery(table: String): String = {
+    s"TRUNCATE TABLE ONLY $table"
+  }
+
   override def beforeFetch(connection: Connection, properties: Map[String, String]): Unit = {
     super.beforeFetch(connection, properties)
 
@@ -97,8 +108,7 @@ private object PostgresDialect extends JdbcDialect {
     if (properties.getOrElse(JDBCOptions.JDBC_BATCH_FETCH_SIZE, "0").toInt > 0) {
       connection.setAutoCommit(false)
     }
-
   }
 
-  override def isCascadingTruncateTable(): Option[Boolean] = Some(true)
+  override def isCascadingTruncateTable(): Option[Boolean] = Some(false)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index a42e28053a96a..52f2e2639cd86 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.streaming
 
-import java.util.Locale
+import java.util.{Locale, Optional}
 
 import scala.collection.JavaConverters._
 
@@ -26,8 +26,12 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.execution.streaming.StreamingRelation
+import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRelationV2}
+import org.apache.spark.sql.sources.StreamSourceProvider
+import org.apache.spark.sql.sources.v2.DataSourceV2Options
+import org.apache.spark.sql.sources.v2.streaming.{ContinuousReadSupport, MicroBatchReadSupport}
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.Utils
 
 /**
  * Interface used to load a streaming `Dataset` from external storage systems (e.g. file systems,
@@ -153,13 +157,45 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
         "read files of Hive data source directly.")
     }
 
-    val dataSource =
-      DataSource(
-        sparkSession,
-        userSpecifiedSchema = userSpecifiedSchema,
-        className = source,
-        options = extraOptions.toMap)
-    Dataset.ofRows(sparkSession, StreamingRelation(dataSource))
+    val ds = DataSource.lookupDataSource(source, sparkSession.sqlContext.conf).newInstance()
+    val options = new DataSourceV2Options(extraOptions.asJava)
+    // We need to generate the V1 data source so we can pass it to the V2 relation as a shim.
+    // We can't be sure at this point whether we'll actually want to use V2, since we don't know the
+    // writer or whether the query is continuous.
+    val v1DataSource = DataSource(
+      sparkSession,
+      userSpecifiedSchema = userSpecifiedSchema,
+      className = source,
+      options = extraOptions.toMap)
+    val v1Relation = ds match {
+      case _: StreamSourceProvider => Some(StreamingRelation(v1DataSource))
+      case _ => None
+    }
+    ds match {
+      case s: MicroBatchReadSupport =>
+        val tempReader = s.createMicroBatchReader(
+          Optional.ofNullable(userSpecifiedSchema.orNull),
+          Utils.createTempDir(namePrefix = s"temporaryReader").getCanonicalPath,
+          options)
+        Dataset.ofRows(
+          sparkSession,
+          StreamingRelationV2(
+            s, source, extraOptions.toMap,
+            tempReader.readSchema().toAttributes, v1Relation)(sparkSession))
+      case s: ContinuousReadSupport =>
+        val tempReader = s.createContinuousReader(
+          Optional.ofNullable(userSpecifiedSchema.orNull),
+          Utils.createTempDir(namePrefix = s"temporaryReader").getCanonicalPath,
+          options)
+        Dataset.ofRows(
+          sparkSession,
+          StreamingRelationV2(
+            s, source, extraOptions.toMap,
+            tempReader.readSchema().toAttributes, v1Relation)(sparkSession))
+      case _ =>
+        // Code path for data source v1.
+        Dataset.ofRows(sparkSession, StreamingRelation(v1DataSource))
+    }
   }
 
   /**
@@ -239,17 +275,20 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * <ul>
    * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be
    * considered in every trigger.</li>
-   * <li>`sep` (default `,`): sets the single character as a separator for each
+   * <li>`sep` (default `,`): sets a single character as a separator for each
    * field and value.</li>
    * <li>`encoding` (default `UTF-8`): decodes the CSV files by the given encoding
    * type.</li>
-   * <li>`quote` (default `"`): sets the single character used for escaping quoted values where
+   * <li>`quote` (default `"`): sets a single character used for escaping quoted values where
    * the separator can be part of the value. If you would like to turn off quotations, you need to
    * set not `null` but an empty string. This behaviour is different form
    * `com.databricks.spark.csv`.</li>
-   * <li>`escape` (default `\`): sets the single character used for escaping quotes inside
+   * <li>`escape` (default `\`): sets a single character used for escaping quotes inside
    * an already quoted value.</li>
-   * <li>`comment` (default empty string): sets the single character used for skipping lines
+   * <li>`charToEscapeQuoteEscaping` (default `escape` or `\0`): sets a single character used for
+   * escaping the escape for the quote character. The default value is escape character when escape
+   * and quote characters are different, `\0` otherwise.</li>
+   * <li>`comment` (default empty string): sets a single character used for skipping lines
    * beginning with this character. By default, it is disabled.</li>
    * <li>`header` (default `false`): uses the first line as names of columns.</li>
    * <li>`inferSchema` (default `false`): infers the input schema automatically from data. It
@@ -298,6 +337,21 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    */
   def csv(path: String): DataFrame = format("csv").load(path)
 
+  /**
+   * Loads a ORC file stream, returning the result as a `DataFrame`.
+   *
+   * You can set the following ORC-specific option(s) for reading ORC files:
+   * <ul>
+   * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be
+   * considered in every trigger.</li>
+   * </ul>
+   *
+   * @since 2.3.0
+   */
+  def orc(path: String): DataFrame = {
+    format("orc").load(path)
+  }
+
   /**
    * Loads a Parquet file stream, returning the result as a `DataFrame`.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 0be69b98abc8a..db588ae282f38 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -26,7 +26,9 @@ import org.apache.spark.sql.{AnalysisException, Dataset, ForeachWriter}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.execution.streaming.{ForeachSink, MemoryPlan, MemorySink}
+import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger
+import org.apache.spark.sql.execution.streaming.sources.{MemoryPlanV2, MemorySinkV2}
 
 /**
  * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems,
@@ -240,14 +242,23 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       if (extraOptions.get("queryName").isEmpty) {
         throw new AnalysisException("queryName must be specified for memory sink")
       }
-      val sink = new MemorySink(df.schema, outputMode)
-      val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink))
+      val (sink, resultDf) = trigger match {
+        case _: ContinuousTrigger =>
+          val s = new MemorySinkV2()
+          val r = Dataset.ofRows(df.sparkSession, new MemoryPlanV2(s, df.schema.toAttributes))
+          (s, r)
+        case _ =>
+          val s = new MemorySink(df.schema, outputMode)
+          val r = Dataset.ofRows(df.sparkSession, new MemoryPlan(s))
+          (s, r)
+      }
       val chkpointLoc = extraOptions.get("checkpointLocation")
       val recoverFromChkpoint = outputMode == OutputMode.Complete()
       val query = df.sparkSession.sessionState.streamingQueryManager.startQuery(
         extraOptions.get("queryName"),
         chkpointLoc,
         df,
+        extraOptions.toMap,
         sink,
         outputMode,
         useTempCheckpointLocation = true,
@@ -262,6 +273,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         extraOptions.get("queryName"),
         extraOptions.get("checkpointLocation"),
         df,
+        extraOptions.toMap,
         sink,
         outputMode,
         useTempCheckpointLocation = true,
@@ -277,6 +289,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         extraOptions.get("queryName"),
         extraOptions.get("checkpointLocation"),
         df,
+        extraOptions.toMap,
         dataSource.createSink(outputMode),
         outputMode,
         useTempCheckpointLocation = source == "console",
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 48b0ea20e5da1..4b27e0d4ef47b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -29,8 +29,10 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution, ContinuousTrigger}
 import org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.v2.streaming.{ContinuousWriteSupport, MicroBatchWriteSupport}
 import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
@@ -188,7 +190,8 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
       userSpecifiedName: Option[String],
       userSpecifiedCheckpointLocation: Option[String],
       df: DataFrame,
-      sink: Sink,
+      extraOptions: Map[String, String],
+      sink: BaseStreamingSink,
       outputMode: OutputMode,
       useTempCheckpointLocation: Boolean,
       recoverFromCheckpointLocation: Boolean,
@@ -237,16 +240,36 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
           "is not supported in streaming DataFrames/Datasets and will be disabled.")
     }
 
-    new StreamingQueryWrapper(new StreamExecution(
-      sparkSession,
-      userSpecifiedName.orNull,
-      checkpointLocation,
-      analyzedPlan,
-      sink,
-      trigger,
-      triggerClock,
-      outputMode,
-      deleteCheckpointOnStop))
+    (sink, trigger) match {
+      case (v2Sink: ContinuousWriteSupport, trigger: ContinuousTrigger) =>
+        UnsupportedOperationChecker.checkForContinuous(analyzedPlan, outputMode)
+        new StreamingQueryWrapper(new ContinuousExecution(
+          sparkSession,
+          userSpecifiedName.orNull,
+          checkpointLocation,
+          analyzedPlan,
+          v2Sink,
+          trigger,
+          triggerClock,
+          outputMode,
+          extraOptions,
+          deleteCheckpointOnStop))
+      case (_: MicroBatchWriteSupport, _) | (_: Sink, _) =>
+        new StreamingQueryWrapper(new MicroBatchExecution(
+          sparkSession,
+          userSpecifiedName.orNull,
+          checkpointLocation,
+          analyzedPlan,
+          sink,
+          trigger,
+          triggerClock,
+          outputMode,
+          extraOptions,
+          deleteCheckpointOnStop))
+      case (_: ContinuousWriteSupport, t) if !t.isInstanceOf[ContinuousTrigger] =>
+        throw new AnalysisException(
+          "Sink only supports continuous writes, but a continuous trigger was not specified.")
+    }
   }
 
   /**
@@ -269,7 +292,8 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
       userSpecifiedName: Option[String],
       userSpecifiedCheckpointLocation: Option[String],
       df: DataFrame,
-      sink: Sink,
+      extraOptions: Map[String, String],
+      sink: BaseStreamingSink,
       outputMode: OutputMode,
       useTempCheckpointLocation: Boolean = false,
       recoverFromCheckpointLocation: Boolean = true,
@@ -279,6 +303,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
       userSpecifiedName,
       userSpecifiedCheckpointLocation,
       df,
+      extraOptions,
       sink,
       outputMode,
       useTempCheckpointLocation,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index cedc1dce4a703..0dcb666e2c3e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -152,7 +152,7 @@ class StreamingQueryProgress private[sql](
  * @param endOffset              The ending offset for data being read.
  * @param numInputRows           The number of records read from this source.
  * @param inputRowsPerSecond     The rate at which data is arriving from this source.
- * @param processedRowsPerSecond The rate at which data from this source is being procressed by
+ * @param processedRowsPerSecond The rate at which data from this source is being processed by
  *                               Spark.
  * @since 2.1.0
  */
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
index b007093dad84b..69a2904f5f3fe 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
@@ -36,6 +36,7 @@
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.expressions.UserDefinedFunction;
 import org.apache.spark.sql.test.TestSparkSession;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.util.sketch.BloomFilter;
@@ -455,4 +456,15 @@ public void testCircularReferenceBean() {
     CircularReference1Bean bean = new CircularReference1Bean();
     spark.createDataFrame(Arrays.asList(bean), CircularReference1Bean.class);
   }
+
+  @Test
+  public void testUDF() {
+    UserDefinedFunction foo = udf((Integer i, String s) -> i.toString() + s, DataTypes.StringType);
+    Dataset<Row> df = spark.table("testData").select(foo.apply(col("key"), col("value")));
+    String[] result = df.collectAsList().stream().map(row -> row.getString(0))
+      .toArray(String[]::new);
+    String[] expected = spark.table("testData").collectAsList().stream()
+      .map(row -> row.get(0).toString() + row.getString(1)).toArray(String[]::new);
+    Assert.assertArrayEquals(expected, result);
+  }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleAvg.java b/sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleAvg.java
index 447a71d284fbb..288f5e7426c05 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleAvg.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleAvg.java
@@ -47,7 +47,7 @@ public MyDoubleAvg() {
     _inputDataType = DataTypes.createStructType(inputFields);
 
     // The buffer has two values, bufferSum for storing the current sum and
-    // bufferCount for storing the number of non-null input values that have been contribuetd
+    // bufferCount for storing the number of non-null input values that have been contributed
     // to the current sum.
     List<StructField> bufferFields = new ArrayList<>();
     bufferFields.add(DataTypes.createStructField("bufferSum", DataTypes.DoubleType, true));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 1e1384549a410..c5070b734d521 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -60,3 +60,12 @@ SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a;
 -- Aggregate with empty input and empty GroupBy expressions.
 SELECT COUNT(1) FROM testData WHERE false;
 SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t;
+
+-- Aggregate with empty GroupBy expressions and filter on top
+SELECT 1 from (
+  SELECT 1 AS z,
+  MIN(a.x)
+  FROM (select 1 as x) a
+  WHERE false
+) b
+where b.z != b.z
diff --git a/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql b/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql
new file mode 100644
index 0000000000000..8afa3270f4de4
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql
@@ -0,0 +1,28 @@
+CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a);
+CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a);
+
+CREATE TEMPORARY VIEW empty_table as SELECT a FROM t2 WHERE false;
+
+SELECT * FROM t1 INNER JOIN empty_table;
+SELECT * FROM t1 CROSS JOIN empty_table;
+SELECT * FROM t1 LEFT OUTER JOIN empty_table;
+SELECT * FROM t1 RIGHT OUTER JOIN empty_table;
+SELECT * FROM t1 FULL OUTER JOIN empty_table;
+SELECT * FROM t1 LEFT SEMI JOIN empty_table;
+SELECT * FROM t1 LEFT ANTI JOIN empty_table;
+
+SELECT * FROM empty_table INNER JOIN t1;
+SELECT * FROM empty_table CROSS JOIN t1;
+SELECT * FROM empty_table LEFT OUTER JOIN t1;
+SELECT * FROM empty_table RIGHT OUTER JOIN t1;
+SELECT * FROM empty_table FULL OUTER JOIN t1;
+SELECT * FROM empty_table LEFT SEMI JOIN t1;
+SELECT * FROM empty_table LEFT ANTI JOIN t1;
+
+SELECT * FROM empty_table INNER JOIN empty_table;
+SELECT * FROM empty_table CROSS JOIN empty_table;
+SELECT * FROM empty_table LEFT OUTER JOIN empty_table;
+SELECT * FROM empty_table RIGHT OUTER JOIN empty_table;
+SELECT * FROM empty_table FULL OUTER JOIN empty_table;
+SELECT * FROM empty_table LEFT SEMI JOIN empty_table;
+SELECT * FROM empty_table LEFT ANTI JOIN empty_table;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 40d0c064f5c44..4113734e1707e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -24,3 +24,26 @@ select left("abcd", 2), left("abcd", 5), left("abcd", '2'), left("abcd", null);
 select left(null, -2), left("abcd", -2), left("abcd", 0), left("abcd", 'a');
 select right("abcd", 2), right("abcd", 5), right("abcd", '2'), right("abcd", null);
 select right(null, -2), right("abcd", -2), right("abcd", 0), right("abcd", 'a');
+
+-- turn off concatBinaryAsString
+set spark.sql.function.concatBinaryAsString=false;
+
+-- Check if catalyst combine nested `Concat`s if concatBinaryAsString=false
+EXPLAIN SELECT ((col1 || col2) || (col3 || col4)) col
+FROM (
+  SELECT
+    string(id) col1,
+    string(id + 1) col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+);
+
+EXPLAIN SELECT (col1 || (col3 || col4)) col
+FROM (
+  SELECT
+    string(id) col1,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/binaryComparison.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/binaryComparison.sql
new file mode 100644
index 0000000000000..522322ac480be
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/binaryComparison.sql
@@ -0,0 +1,287 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+-- Binary Comparison
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+SELECT cast(1 as binary) = '1' FROM t;
+SELECT cast(1 as binary) > '2' FROM t;
+SELECT cast(1 as binary) >= '2' FROM t;
+SELECT cast(1 as binary) < '2' FROM t;
+SELECT cast(1 as binary) <= '2' FROM t;
+SELECT cast(1 as binary) <> '2' FROM t;
+SELECT cast(1 as binary) = cast(null as string) FROM t;
+SELECT cast(1 as binary) > cast(null as string) FROM t;
+SELECT cast(1 as binary) >= cast(null as string) FROM t;
+SELECT cast(1 as binary) < cast(null as string) FROM t;
+SELECT cast(1 as binary) <= cast(null as string) FROM t;
+SELECT cast(1 as binary) <> cast(null as string) FROM t;
+SELECT '1' = cast(1 as binary) FROM t;
+SELECT '2' > cast(1 as binary) FROM t;
+SELECT '2' >= cast(1 as binary) FROM t;
+SELECT '2' < cast(1 as binary) FROM t;
+SELECT '2' <= cast(1 as binary) FROM t;
+SELECT '2' <> cast(1 as binary) FROM t;
+SELECT cast(null as string) = cast(1 as binary) FROM t;
+SELECT cast(null as string) > cast(1 as binary) FROM t;
+SELECT cast(null as string) >= cast(1 as binary) FROM t;
+SELECT cast(null as string) < cast(1 as binary) FROM t;
+SELECT cast(null as string) <= cast(1 as binary) FROM t;
+SELECT cast(null as string) <> cast(1 as binary) FROM t;
+SELECT cast(1 as tinyint) = '1' FROM t;
+SELECT cast(1 as tinyint) > '2' FROM t;
+SELECT cast(1 as tinyint) >= '2' FROM t;
+SELECT cast(1 as tinyint) < '2' FROM t;
+SELECT cast(1 as tinyint) <= '2' FROM t;
+SELECT cast(1 as tinyint) <> '2' FROM t;
+SELECT cast(1 as tinyint) = cast(null as string) FROM t;
+SELECT cast(1 as tinyint) > cast(null as string) FROM t;
+SELECT cast(1 as tinyint) >= cast(null as string) FROM t;
+SELECT cast(1 as tinyint) < cast(null as string) FROM t;
+SELECT cast(1 as tinyint) <= cast(null as string) FROM t;
+SELECT cast(1 as tinyint) <> cast(null as string) FROM t;
+SELECT '1' = cast(1 as tinyint) FROM t;
+SELECT '2' > cast(1 as tinyint) FROM t;
+SELECT '2' >= cast(1 as tinyint) FROM t;
+SELECT '2' < cast(1 as tinyint) FROM t;
+SELECT '2' <= cast(1 as tinyint) FROM t;
+SELECT '2' <> cast(1 as tinyint) FROM t;
+SELECT cast(null as string) = cast(1 as tinyint) FROM t;
+SELECT cast(null as string) > cast(1 as tinyint) FROM t;
+SELECT cast(null as string) >= cast(1 as tinyint) FROM t;
+SELECT cast(null as string) < cast(1 as tinyint) FROM t;
+SELECT cast(null as string) <= cast(1 as tinyint) FROM t;
+SELECT cast(null as string) <> cast(1 as tinyint) FROM t;
+SELECT cast(1 as smallint) = '1' FROM t;
+SELECT cast(1 as smallint) > '2' FROM t;
+SELECT cast(1 as smallint) >= '2' FROM t;
+SELECT cast(1 as smallint) < '2' FROM t;
+SELECT cast(1 as smallint) <= '2' FROM t;
+SELECT cast(1 as smallint) <> '2' FROM t;
+SELECT cast(1 as smallint) = cast(null as string) FROM t;
+SELECT cast(1 as smallint) > cast(null as string) FROM t;
+SELECT cast(1 as smallint) >= cast(null as string) FROM t;
+SELECT cast(1 as smallint) < cast(null as string) FROM t;
+SELECT cast(1 as smallint) <= cast(null as string) FROM t;
+SELECT cast(1 as smallint) <> cast(null as string) FROM t;
+SELECT '1' = cast(1 as smallint) FROM t;
+SELECT '2' > cast(1 as smallint) FROM t;
+SELECT '2' >= cast(1 as smallint) FROM t;
+SELECT '2' < cast(1 as smallint) FROM t;
+SELECT '2' <= cast(1 as smallint) FROM t;
+SELECT '2' <> cast(1 as smallint) FROM t;
+SELECT cast(null as string) = cast(1 as smallint) FROM t;
+SELECT cast(null as string) > cast(1 as smallint) FROM t;
+SELECT cast(null as string) >= cast(1 as smallint) FROM t;
+SELECT cast(null as string) < cast(1 as smallint) FROM t;
+SELECT cast(null as string) <= cast(1 as smallint) FROM t;
+SELECT cast(null as string) <> cast(1 as smallint) FROM t;
+SELECT cast(1 as int) = '1' FROM t;
+SELECT cast(1 as int) > '2' FROM t;
+SELECT cast(1 as int) >= '2' FROM t;
+SELECT cast(1 as int) < '2' FROM t;
+SELECT cast(1 as int) <= '2' FROM t;
+SELECT cast(1 as int) <> '2' FROM t;
+SELECT cast(1 as int) = cast(null as string) FROM t;
+SELECT cast(1 as int) > cast(null as string) FROM t;
+SELECT cast(1 as int) >= cast(null as string) FROM t;
+SELECT cast(1 as int) < cast(null as string) FROM t;
+SELECT cast(1 as int) <= cast(null as string) FROM t;
+SELECT cast(1 as int) <> cast(null as string) FROM t;
+SELECT '1' = cast(1 as int) FROM t;
+SELECT '2' > cast(1 as int) FROM t;
+SELECT '2' >= cast(1 as int) FROM t;
+SELECT '2' < cast(1 as int) FROM t;
+SELECT '2' <> cast(1 as int) FROM t;
+SELECT '2' <= cast(1 as int) FROM t;
+SELECT cast(null as string) = cast(1 as int) FROM t;
+SELECT cast(null as string) > cast(1 as int) FROM t;
+SELECT cast(null as string) >= cast(1 as int) FROM t;
+SELECT cast(null as string) < cast(1 as int) FROM t;
+SELECT cast(null as string) <> cast(1 as int) FROM t;
+SELECT cast(null as string) <= cast(1 as int) FROM t;
+SELECT cast(1 as bigint) = '1' FROM t;
+SELECT cast(1 as bigint) > '2' FROM t;
+SELECT cast(1 as bigint) >= '2' FROM t;
+SELECT cast(1 as bigint) < '2' FROM t;
+SELECT cast(1 as bigint) <= '2' FROM t;
+SELECT cast(1 as bigint) <> '2' FROM t;
+SELECT cast(1 as bigint) = cast(null as string) FROM t;
+SELECT cast(1 as bigint) > cast(null as string) FROM t;
+SELECT cast(1 as bigint) >= cast(null as string) FROM t;
+SELECT cast(1 as bigint) < cast(null as string) FROM t;
+SELECT cast(1 as bigint) <= cast(null as string) FROM t;
+SELECT cast(1 as bigint) <> cast(null as string) FROM t;
+SELECT '1' = cast(1 as bigint) FROM t;
+SELECT '2' > cast(1 as bigint) FROM t;
+SELECT '2' >= cast(1 as bigint) FROM t;
+SELECT '2' < cast(1 as bigint) FROM t;
+SELECT '2' <= cast(1 as bigint) FROM t;
+SELECT '2' <> cast(1 as bigint) FROM t;
+SELECT cast(null as string) = cast(1 as bigint) FROM t;
+SELECT cast(null as string) > cast(1 as bigint) FROM t;
+SELECT cast(null as string) >= cast(1 as bigint) FROM t;
+SELECT cast(null as string) < cast(1 as bigint) FROM t;
+SELECT cast(null as string) <= cast(1 as bigint) FROM t;
+SELECT cast(null as string) <> cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) = '1' FROM t;
+SELECT cast(1 as decimal(10, 0)) > '2' FROM t;
+SELECT cast(1 as decimal(10, 0)) >= '2' FROM t;
+SELECT cast(1 as decimal(10, 0)) < '2' FROM t;
+SELECT cast(1 as decimal(10, 0)) <> '2' FROM t;
+SELECT cast(1 as decimal(10, 0)) <= '2' FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(null as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(null as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(null as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(null as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(null as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(null as string) FROM t;
+SELECT '1' = cast(1 as decimal(10, 0)) FROM t;
+SELECT '2' > cast(1 as decimal(10, 0)) FROM t;
+SELECT '2' >= cast(1 as decimal(10, 0)) FROM t;
+SELECT '2' < cast(1 as decimal(10, 0)) FROM t;
+SELECT '2' <= cast(1 as decimal(10, 0)) FROM t;
+SELECT '2' <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(null as string) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(null as string) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(null as string) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(null as string) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(null as string) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(null as string) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) = '1' FROM t;
+SELECT cast(1 as double) > '2' FROM t;
+SELECT cast(1 as double) >= '2' FROM t;
+SELECT cast(1 as double) < '2' FROM t;
+SELECT cast(1 as double) <= '2' FROM t;
+SELECT cast(1 as double) <> '2' FROM t;
+SELECT cast(1 as double) = cast(null as string) FROM t;
+SELECT cast(1 as double) > cast(null as string) FROM t;
+SELECT cast(1 as double) >= cast(null as string) FROM t;
+SELECT cast(1 as double) < cast(null as string) FROM t;
+SELECT cast(1 as double) <= cast(null as string) FROM t;
+SELECT cast(1 as double) <> cast(null as string) FROM t;
+SELECT '1' = cast(1 as double) FROM t;
+SELECT '2' > cast(1 as double) FROM t;
+SELECT '2' >= cast(1 as double) FROM t;
+SELECT '2' < cast(1 as double) FROM t;
+SELECT '2' <= cast(1 as double) FROM t;
+SELECT '2' <> cast(1 as double) FROM t;
+SELECT cast(null as string) = cast(1 as double) FROM t;
+SELECT cast(null as string) > cast(1 as double) FROM t;
+SELECT cast(null as string) >= cast(1 as double) FROM t;
+SELECT cast(null as string) < cast(1 as double) FROM t;
+SELECT cast(null as string) <= cast(1 as double) FROM t;
+SELECT cast(null as string) <> cast(1 as double) FROM t;
+SELECT cast(1 as float) = '1' FROM t;
+SELECT cast(1 as float) > '2' FROM t;
+SELECT cast(1 as float) >= '2' FROM t;
+SELECT cast(1 as float) < '2' FROM t;
+SELECT cast(1 as float) <= '2' FROM t;
+SELECT cast(1 as float) <> '2' FROM t;
+SELECT cast(1 as float) = cast(null as string) FROM t;
+SELECT cast(1 as float) > cast(null as string) FROM t;
+SELECT cast(1 as float) >= cast(null as string) FROM t;
+SELECT cast(1 as float) < cast(null as string) FROM t;
+SELECT cast(1 as float) <= cast(null as string) FROM t;
+SELECT cast(1 as float) <> cast(null as string) FROM t;
+SELECT '1' = cast(1 as float) FROM t;
+SELECT '2' > cast(1 as float) FROM t;
+SELECT '2' >= cast(1 as float) FROM t;
+SELECT '2' < cast(1 as float) FROM t;
+SELECT '2' <= cast(1 as float) FROM t;
+SELECT '2' <> cast(1 as float) FROM t;
+SELECT cast(null as string) = cast(1 as float) FROM t;
+SELECT cast(null as string) > cast(1 as float) FROM t;
+SELECT cast(null as string) >= cast(1 as float) FROM t;
+SELECT cast(null as string) < cast(1 as float) FROM t;
+SELECT cast(null as string) <= cast(1 as float) FROM t;
+SELECT cast(null as string) <> cast(1 as float) FROM t;
+-- the following queries return 1 if the search condition is satisfied
+-- and returns nothing if the search condition is not satisfied
+SELECT '1996-09-09' = date('1996-09-09') FROM t;
+SELECT '1996-9-10' > date('1996-09-09') FROM t;
+SELECT '1996-9-10' >= date('1996-09-09') FROM t;
+SELECT '1996-9-10' < date('1996-09-09') FROM t;
+SELECT '1996-9-10' <= date('1996-09-09') FROM t;
+SELECT '1996-9-10' <> date('1996-09-09') FROM t;
+SELECT cast(null as string) = date('1996-09-09') FROM t;
+SELECT cast(null as string)> date('1996-09-09') FROM t;
+SELECT cast(null as string)>= date('1996-09-09') FROM t;
+SELECT cast(null as string)< date('1996-09-09') FROM t;
+SELECT cast(null as string)<= date('1996-09-09') FROM t;
+SELECT cast(null as string)<> date('1996-09-09') FROM t;
+SELECT date('1996-09-09') = '1996-09-09' FROM t;
+SELECT date('1996-9-10') > '1996-09-09' FROM t;
+SELECT date('1996-9-10') >= '1996-09-09' FROM t;
+SELECT date('1996-9-10') < '1996-09-09' FROM t;
+SELECT date('1996-9-10') <= '1996-09-09' FROM t;
+SELECT date('1996-9-10') <> '1996-09-09' FROM t;
+SELECT date('1996-09-09') = cast(null as string) FROM t;
+SELECT date('1996-9-10') > cast(null as string) FROM t;
+SELECT date('1996-9-10') >= cast(null as string) FROM t;
+SELECT date('1996-9-10') < cast(null as string) FROM t;
+SELECT date('1996-9-10') <= cast(null as string) FROM t;
+SELECT date('1996-9-10') <> cast(null as string) FROM t;
+SELECT '1996-09-09 12:12:12.4' = timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT '1996-09-09 12:12:12.5' > timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT '1996-09-09 12:12:12.5' >= timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT '1996-09-09 12:12:12.5' < timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT '1996-09-09 12:12:12.5' <= timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT '1996-09-09 12:12:12.5' <> timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT cast(null as string) = timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT cast(null as string) > timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT cast(null as string) >= timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT cast(null as string) < timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT cast(null as string) <= timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT cast(null as string) <> timestamp('1996-09-09 12:12:12.4') FROM t;
+SELECT timestamp('1996-09-09 12:12:12.4' )= '1996-09-09 12:12:12.4' FROM t;
+SELECT timestamp('1996-09-09 12:12:12.5' )> '1996-09-09 12:12:12.4' FROM t;
+SELECT timestamp('1996-09-09 12:12:12.5' )>= '1996-09-09 12:12:12.4' FROM t;
+SELECT timestamp('1996-09-09 12:12:12.5' )< '1996-09-09 12:12:12.4' FROM t;
+SELECT timestamp('1996-09-09 12:12:12.5' )<= '1996-09-09 12:12:12.4' FROM t;
+SELECT timestamp('1996-09-09 12:12:12.5' )<> '1996-09-09 12:12:12.4' FROM t;
+SELECT timestamp('1996-09-09 12:12:12.4' )= cast(null as string) FROM t;
+SELECT timestamp('1996-09-09 12:12:12.5' )> cast(null as string) FROM t;
+SELECT timestamp('1996-09-09 12:12:12.5' )>= cast(null as string) FROM t;
+SELECT timestamp('1996-09-09 12:12:12.5' )< cast(null as string) FROM t;
+SELECT timestamp('1996-09-09 12:12:12.5' )<= cast(null as string) FROM t;
+SELECT timestamp('1996-09-09 12:12:12.5' )<> cast(null as string) FROM t;
+SELECT ' ' = X'0020' FROM t;
+SELECT ' ' > X'001F' FROM t;
+SELECT ' ' >= X'001F' FROM t;
+SELECT ' ' < X'001F' FROM t;
+SELECT ' ' <= X'001F' FROM t;
+SELECT ' ' <> X'001F' FROM t;
+SELECT cast(null as string) = X'0020' FROM t;
+SELECT cast(null as string) > X'001F' FROM t;
+SELECT cast(null as string) >= X'001F' FROM t;
+SELECT cast(null as string) < X'001F' FROM t;
+SELECT cast(null as string) <= X'001F' FROM t;
+SELECT cast(null as string) <> X'001F' FROM t;
+SELECT X'0020' = ' ' FROM t;
+SELECT X'001F' > ' ' FROM t;
+SELECT X'001F' >= ' ' FROM t;
+SELECT X'001F' < ' ' FROM t;
+SELECT X'001F' <= ' ' FROM t;
+SELECT X'001F' <> ' ' FROM t;
+SELECT X'0020' = cast(null as string) FROM t;
+SELECT X'001F' > cast(null as string) FROM t;
+SELECT X'001F' >= cast(null as string) FROM t;
+SELECT X'001F' < cast(null as string) FROM t;
+SELECT X'001F' <= cast(null as string) FROM t;
+SELECT X'001F' <> cast(null as string) FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/booleanEquality.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/booleanEquality.sql
new file mode 100644
index 0000000000000..442f2355f8e3a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/booleanEquality.sql
@@ -0,0 +1,122 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+SELECT true = cast(1 as tinyint) FROM t;
+SELECT true = cast(1 as smallint) FROM t;
+SELECT true = cast(1 as int) FROM t;
+SELECT true = cast(1 as bigint) FROM t;
+SELECT true = cast(1 as float) FROM t;
+SELECT true = cast(1 as double) FROM t;
+SELECT true = cast(1 as decimal(10, 0)) FROM t;
+SELECT true = cast(1 as string) FROM t;
+SELECT true = cast('1' as binary) FROM t;
+SELECT true = cast(1 as boolean) FROM t;
+SELECT true = cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT true = cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT true <=> cast(1 as tinyint) FROM t;
+SELECT true <=> cast(1 as smallint) FROM t;
+SELECT true <=> cast(1 as int) FROM t;
+SELECT true <=> cast(1 as bigint) FROM t;
+SELECT true <=> cast(1 as float) FROM t;
+SELECT true <=> cast(1 as double) FROM t;
+SELECT true <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT true <=> cast(1 as string) FROM t;
+SELECT true <=> cast('1' as binary) FROM t;
+SELECT true <=> cast(1 as boolean) FROM t;
+SELECT true <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT true <=> cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as tinyint) = true FROM t;
+SELECT cast(1 as smallint) = true FROM t;
+SELECT cast(1 as int) = true FROM t;
+SELECT cast(1 as bigint) = true FROM t;
+SELECT cast(1 as float) = true FROM t;
+SELECT cast(1 as double) = true FROM t;
+SELECT cast(1 as decimal(10, 0)) = true FROM t;
+SELECT cast(1 as string) = true FROM t;
+SELECT cast('1' as binary) = true FROM t;
+SELECT cast(1 as boolean) = true FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = true FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) = true FROM t;
+
+SELECT cast(1 as tinyint) <=> true FROM t;
+SELECT cast(1 as smallint) <=> true FROM t;
+SELECT cast(1 as int) <=> true FROM t;
+SELECT cast(1 as bigint) <=> true FROM t;
+SELECT cast(1 as float) <=> true FROM t;
+SELECT cast(1 as double) <=> true FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> true FROM t;
+SELECT cast(1 as string) <=> true FROM t;
+SELECT cast('1' as binary) <=> true FROM t;
+SELECT cast(1 as boolean) <=> true FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> true FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) <=> true FROM t;
+
+SELECT false = cast(0 as tinyint) FROM t;
+SELECT false = cast(0 as smallint) FROM t;
+SELECT false = cast(0 as int) FROM t;
+SELECT false = cast(0 as bigint) FROM t;
+SELECT false = cast(0 as float) FROM t;
+SELECT false = cast(0 as double) FROM t;
+SELECT false = cast(0 as decimal(10, 0)) FROM t;
+SELECT false = cast(0 as string) FROM t;
+SELECT false = cast('0' as binary) FROM t;
+SELECT false = cast(0 as boolean) FROM t;
+SELECT false = cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT false = cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT false <=> cast(0 as tinyint) FROM t;
+SELECT false <=> cast(0 as smallint) FROM t;
+SELECT false <=> cast(0 as int) FROM t;
+SELECT false <=> cast(0 as bigint) FROM t;
+SELECT false <=> cast(0 as float) FROM t;
+SELECT false <=> cast(0 as double) FROM t;
+SELECT false <=> cast(0 as decimal(10, 0)) FROM t;
+SELECT false <=> cast(0 as string) FROM t;
+SELECT false <=> cast('0' as binary) FROM t;
+SELECT false <=> cast(0 as boolean) FROM t;
+SELECT false <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT false <=> cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(0 as tinyint) = false FROM t;
+SELECT cast(0 as smallint) = false FROM t;
+SELECT cast(0 as int) = false FROM t;
+SELECT cast(0 as bigint) = false FROM t;
+SELECT cast(0 as float) = false FROM t;
+SELECT cast(0 as double) = false FROM t;
+SELECT cast(0 as decimal(10, 0)) = false FROM t;
+SELECT cast(0 as string) = false FROM t;
+SELECT cast('0' as binary) = false FROM t;
+SELECT cast(0 as boolean) = false FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = false FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) = false FROM t;
+
+SELECT cast(0 as tinyint) <=> false FROM t;
+SELECT cast(0 as smallint) <=> false FROM t;
+SELECT cast(0 as int) <=> false FROM t;
+SELECT cast(0 as bigint) <=> false FROM t;
+SELECT cast(0 as float) <=> false FROM t;
+SELECT cast(0 as double) <=> false FROM t;
+SELECT cast(0 as decimal(10, 0)) <=> false FROM t;
+SELECT cast(0 as string) <=> false FROM t;
+SELECT cast('0' as binary) <=> false FROM t;
+SELECT cast(0 as boolean) <=> false FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> false FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) <=> false FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/caseWhenCoercion.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/caseWhenCoercion.sql
new file mode 100644
index 0000000000000..a780529fdca8c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/caseWhenCoercion.sql
@@ -0,0 +1,174 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
+
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
+
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
+
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
+
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
+
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
+
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
+
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
+
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
+
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
+
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
+
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as tinyint) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as smallint) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as int) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as bigint) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as float) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as double) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as decimal(10, 0)) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as string) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2' as binary) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as boolean) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t;
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2017-12-11 09:30:00' as date) END FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/concat.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/concat.sql
new file mode 100644
index 0000000000000..0beebec5702fd
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/concat.sql
@@ -0,0 +1,93 @@
+-- Concatenate mixed inputs (output type is string)
+SELECT (col1 || col2 || col3) col
+FROM (
+  SELECT
+    id col1,
+    string(id + 1) col2,
+    encode(string(id + 2), 'utf-8') col3
+  FROM range(10)
+);
+
+SELECT ((col1 || col2) || (col3 || col4) || col5) col
+FROM (
+  SELECT
+    'prefix_' col1,
+    id col2,
+    string(id + 1) col3,
+    encode(string(id + 2), 'utf-8') col4,
+    CAST(id AS DOUBLE) col5
+  FROM range(10)
+);
+
+SELECT ((col1 || col2) || (col3 || col4)) col
+FROM (
+  SELECT
+    string(id) col1,
+    string(id + 1) col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+);
+
+-- turn on concatBinaryAsString
+set spark.sql.function.concatBinaryAsString=true;
+
+SELECT (col1 || col2) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2
+  FROM range(10)
+);
+
+SELECT (col1 || col2 || col3 || col4) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+);
+
+SELECT ((col1 || col2) || (col3 || col4)) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+);
+
+-- turn off concatBinaryAsString
+set spark.sql.function.concatBinaryAsString=false;
+
+-- Concatenate binary inputs (output type is binary)
+SELECT (col1 || col2) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2
+  FROM range(10)
+);
+
+SELECT (col1 || col2 || col3 || col4) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+);
+
+SELECT ((col1 || col2) || (col3 || col4)) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/dateTimeOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/dateTimeOperations.sql
new file mode 100644
index 0000000000000..1e98221867965
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/dateTimeOperations.sql
@@ -0,0 +1,60 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+select cast(1 as tinyint) + interval 2 day;
+select cast(1 as smallint) + interval 2 day;
+select cast(1 as int) + interval 2 day;
+select cast(1 as bigint) + interval 2 day;
+select cast(1 as float) + interval 2 day;
+select cast(1 as double) + interval 2 day;
+select cast(1 as decimal(10, 0)) + interval 2 day;
+select cast('2017-12-11' as string) + interval 2 day;
+select cast('2017-12-11 09:30:00' as string) + interval 2 day;
+select cast('1' as binary) + interval 2 day;
+select cast(1 as boolean) + interval 2 day;
+select cast('2017-12-11 09:30:00.0' as timestamp) + interval 2 day;
+select cast('2017-12-11 09:30:00' as date) + interval 2 day;
+
+select interval 2 day + cast(1 as tinyint);
+select interval 2 day + cast(1 as smallint);
+select interval 2 day + cast(1 as int);
+select interval 2 day + cast(1 as bigint);
+select interval 2 day + cast(1 as float);
+select interval 2 day + cast(1 as double);
+select interval 2 day + cast(1 as decimal(10, 0));
+select interval 2 day + cast('2017-12-11' as string);
+select interval 2 day + cast('2017-12-11 09:30:00' as string);
+select interval 2 day + cast('1' as binary);
+select interval 2 day + cast(1 as boolean);
+select interval 2 day + cast('2017-12-11 09:30:00.0' as timestamp);
+select interval 2 day + cast('2017-12-11 09:30:00' as date);
+
+select cast(1 as tinyint) - interval 2 day;
+select cast(1 as smallint) - interval 2 day;
+select cast(1 as int) - interval 2 day;
+select cast(1 as bigint) - interval 2 day;
+select cast(1 as float) - interval 2 day;
+select cast(1 as double) - interval 2 day;
+select cast(1 as decimal(10, 0)) - interval 2 day;
+select cast('2017-12-11' as string) - interval 2 day;
+select cast('2017-12-11 09:30:00' as string) - interval 2 day;
+select cast('1' as binary) - interval 2 day;
+select cast(1 as boolean) - interval 2 day;
+select cast('2017-12-11 09:30:00.0' as timestamp) - interval 2 day;
+select cast('2017-12-11 09:30:00' as date) - interval 2 day;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql
new file mode 100644
index 0000000000000..c8e108ac2c45e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql
@@ -0,0 +1,33 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1.0 as a, 0.0 as b;
+
+-- division, remainder and pmod by 0 return NULL
+select a / b from t;
+select a % b from t;
+select pmod(a, b) from t;
+
+-- arithmetic operations causing an overflow return NULL
+select (5e36 + 0.1) + 5e36;
+select (-4e36 - 0.1) - 7e36;
+select 12345678901234567890.0 * 12345678901234567890.0;
+select 1e35 / 0.1;
+
+-- arithmetic operations causing a precision loss return NULL
+select 123456789123456789.1234567890 * 1.123456789123456789;
+select 0.001 / 9876543210987654321098765432109876543.2
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalPrecision.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalPrecision.sql
new file mode 100644
index 0000000000000..8b04864b18ce3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalPrecision.sql
@@ -0,0 +1,1448 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+SELECT cast(1 as tinyint) + cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) + cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) + cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) + cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) + cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) + cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) + cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) + cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) + cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) + cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) + cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) + cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) + cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) + cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) + cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) + cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) + cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) + cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) + cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) + cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) + cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  + cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  + cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) + cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) + cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as tinyint) - cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) - cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) - cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) - cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) - cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) - cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) - cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) - cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) - cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) - cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) - cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) - cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) - cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) - cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) - cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) - cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) - cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) - cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) - cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) - cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) - cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  - cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  - cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) - cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) - cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as tinyint) * cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) * cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) * cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) * cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) * cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) * cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) * cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) * cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) * cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) * cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) * cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) * cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) * cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) * cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) * cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) * cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) * cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) * cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) * cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) * cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) * cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast('2017*12*11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast('2017*12*11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast('2017*12*11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast('2017*12*11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  * cast('2017*12*11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  * cast('2017*12*11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) * cast('2017*12*11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) * cast('2017*12*11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as tinyint) / cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) / cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) / cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) / cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) / cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) / cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) / cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) / cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) / cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) / cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) / cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) / cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) / cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) / cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) / cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) / cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) / cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) / cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) / cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) / cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) / cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast('2017/12/11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast('2017/12/11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast('2017/12/11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast('2017/12/11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  / cast('2017/12/11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  / cast('2017/12/11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast('2017/12/11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) / cast('2017/12/11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as tinyint) % cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) % cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) % cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) % cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) % cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) % cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) % cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) % cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) % cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) % cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) % cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) % cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) % cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) % cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) % cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) % cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) % cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) % cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) % cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) % cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) % cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  % cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  % cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) % cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) % cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT pmod(cast(1 as tinyint), cast(1 as decimal(3, 0))) FROM t;
+SELECT pmod(cast(1 as tinyint), cast(1 as decimal(5, 0))) FROM t;
+SELECT pmod(cast(1 as tinyint), cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast(1 as tinyint), cast(1 as decimal(20, 0))) FROM t;
+
+SELECT pmod(cast(1 as smallint), cast(1 as decimal(3, 0))) FROM t;
+SELECT pmod(cast(1 as smallint), cast(1 as decimal(5, 0))) FROM t;
+SELECT pmod(cast(1 as smallint), cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast(1 as smallint), cast(1 as decimal(20, 0))) FROM t;
+
+SELECT pmod(cast(1 as int), cast(1 as decimal(3, 0))) FROM t;
+SELECT pmod(cast(1 as int), cast(1 as decimal(5, 0))) FROM t;
+SELECT pmod(cast(1 as int), cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast(1 as int), cast(1 as decimal(20, 0))) FROM t;
+
+SELECT pmod(cast(1 as bigint), cast(1 as decimal(3, 0))) FROM t;
+SELECT pmod(cast(1 as bigint), cast(1 as decimal(5, 0))) FROM t;
+SELECT pmod(cast(1 as bigint), cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast(1 as bigint), cast(1 as decimal(20, 0))) FROM t;
+
+SELECT pmod(cast(1 as float), cast(1 as decimal(3, 0))) FROM t;
+SELECT pmod(cast(1 as float), cast(1 as decimal(5, 0))) FROM t;
+SELECT pmod(cast(1 as float), cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast(1 as float), cast(1 as decimal(20, 0))) FROM t;
+
+SELECT pmod(cast(1 as double), cast(1 as decimal(3, 0))) FROM t;
+SELECT pmod(cast(1 as double), cast(1 as decimal(5, 0))) FROM t;
+SELECT pmod(cast(1 as double), cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast(1 as double), cast(1 as decimal(20, 0))) FROM t;
+
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(3, 0))) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(5, 0))) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(20, 0))) FROM t;
+
+SELECT pmod(cast('1' as binary), cast(1 as decimal(3, 0))) FROM t;
+SELECT pmod(cast('1' as binary), cast(1 as decimal(5, 0))) FROM t;
+SELECT pmod(cast('1' as binary), cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast('1' as binary), cast(1 as decimal(20, 0))) FROM t;
+
+SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(3, 0))) FROM t;
+SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(5, 0))) FROM t;
+SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(20, 0))) FROM t;
+
+SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(3, 0))) FROM t;
+SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(5, 0))) FROM t;
+SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(20, 0))) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as tinyint)) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as tinyint)) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as tinyint)) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as tinyint)) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as smallint)) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as smallint)) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as smallint)) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as smallint)) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as int)) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as int)) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as int)) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as int)) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as bigint)) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as bigint)) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as bigint)) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as bigint)) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as float)) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as float)) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as float)) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as float)) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as double)) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as double)) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as double)) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as double)) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(10, 0))) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as decimal(10, 0))) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as string)) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as string)) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as string)) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as string)) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast('1' as binary)) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast('1' as binary)) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast('1' as binary)) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast('1' as binary)) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as boolean)) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as boolean)) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as boolean)) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as boolean)) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+
+SELECT pmod(cast(1 as decimal(3, 0)) , cast('2017-12-11 09:30:00' as date)) FROM t;
+SELECT pmod(cast(1 as decimal(5, 0)) , cast('2017-12-11 09:30:00' as date)) FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date)) FROM t;
+SELECT pmod(cast(1 as decimal(20, 0)), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as tinyint) = cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) = cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) = cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) = cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) = cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) = cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) = cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) = cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) = cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) = cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) = cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) = cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) = cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) = cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) = cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) = cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) = cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) = cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) = cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) = cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) = cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  = cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  = cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) = cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) = cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as tinyint) <=> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) <=> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) <=> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) <=> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) <=> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) <=> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) <=> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) <=> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) <=> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) <=> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) <=> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) <=> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) <=> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) <=> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) <=> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) <=> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) <=> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) <=> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) <=> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) <=> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) <=> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <=> cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  <=> cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) <=> cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) <=> cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as tinyint) < cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) < cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) < cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) < cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) < cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) < cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) < cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) < cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) < cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) < cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) < cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) < cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) < cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) < cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) < cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) < cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) < cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) < cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) < cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) < cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) < cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  < cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  < cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) < cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) < cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as tinyint) <= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) <= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) <= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) <= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) <= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) <= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) <= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) <= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) <= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) <= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) <= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) <= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) <= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) <= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) <= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) <= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) <= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) <= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) <= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) <= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) <= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <= cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  <= cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) <= cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) <= cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as tinyint) > cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) > cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) > cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) > cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) > cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) > cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) > cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) > cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) > cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) > cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) > cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) > cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) > cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) > cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) > cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) > cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) > cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) > cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) > cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) > cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) > cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  > cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  > cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) > cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) > cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as tinyint) >= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) >= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) >= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) >= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) >= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) >= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) >= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) >= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) >= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) >= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) >= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) >= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) >= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) >= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) >= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) >= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) >= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) >= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) >= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) >= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) >= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  >= cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  >= cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) >= cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) >= cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as tinyint) <> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as tinyint) <> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as tinyint) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) <> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as smallint) <> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as smallint) <> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as smallint) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) <> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as int) <> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as int) <> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as int) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) <> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as bigint) <> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as bigint) <> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as bigint) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) <> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as float) <> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as float) <> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as float) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) <> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as double) <> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as double) <> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as double) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) <> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('1' as binary) <> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('1' as binary) <> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('1' as binary) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) <> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(3, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(5, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(20, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as tinyint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as smallint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as int) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as int) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as int) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as bigint) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as float) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as float) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as float) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as double) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as double) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as double) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as decimal(10, 0)) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as string) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as string) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as string) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast('1' as binary) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as boolean) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+
+SELECT cast(1 as decimal(3, 0))  <> cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(5, 0))  <> cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(10, 0)) <> cast('2017-12-11 09:30:00' as date) FROM t;
+SELECT cast(1 as decimal(20, 0)) <> cast('2017-12-11 09:30:00' as date) FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/division.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/division.sql
new file mode 100644
index 0000000000000..d669740ddd9ca
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/division.sql
@@ -0,0 +1,174 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+SELECT cast(1 as tinyint) / cast(1 as tinyint) FROM t;
+SELECT cast(1 as tinyint) / cast(1 as smallint) FROM t;
+SELECT cast(1 as tinyint) / cast(1 as int) FROM t;
+SELECT cast(1 as tinyint) / cast(1 as bigint) FROM t;
+SELECT cast(1 as tinyint) / cast(1 as float) FROM t;
+SELECT cast(1 as tinyint) / cast(1 as double) FROM t;
+SELECT cast(1 as tinyint) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) / cast(1 as string) FROM t;
+SELECT cast(1 as tinyint) / cast('1' as binary) FROM t;
+SELECT cast(1 as tinyint) / cast(1 as boolean) FROM t;
+SELECT cast(1 as tinyint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as tinyint) / cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as smallint) / cast(1 as tinyint) FROM t;
+SELECT cast(1 as smallint) / cast(1 as smallint) FROM t;
+SELECT cast(1 as smallint) / cast(1 as int) FROM t;
+SELECT cast(1 as smallint) / cast(1 as bigint) FROM t;
+SELECT cast(1 as smallint) / cast(1 as float) FROM t;
+SELECT cast(1 as smallint) / cast(1 as double) FROM t;
+SELECT cast(1 as smallint) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) / cast(1 as string) FROM t;
+SELECT cast(1 as smallint) / cast('1' as binary) FROM t;
+SELECT cast(1 as smallint) / cast(1 as boolean) FROM t;
+SELECT cast(1 as smallint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as smallint) / cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as int) / cast(1 as tinyint) FROM t;
+SELECT cast(1 as int) / cast(1 as smallint) FROM t;
+SELECT cast(1 as int) / cast(1 as int) FROM t;
+SELECT cast(1 as int) / cast(1 as bigint) FROM t;
+SELECT cast(1 as int) / cast(1 as float) FROM t;
+SELECT cast(1 as int) / cast(1 as double) FROM t;
+SELECT cast(1 as int) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) / cast(1 as string) FROM t;
+SELECT cast(1 as int) / cast('1' as binary) FROM t;
+SELECT cast(1 as int) / cast(1 as boolean) FROM t;
+SELECT cast(1 as int) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as int) / cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as bigint) / cast(1 as tinyint) FROM t;
+SELECT cast(1 as bigint) / cast(1 as smallint) FROM t;
+SELECT cast(1 as bigint) / cast(1 as int) FROM t;
+SELECT cast(1 as bigint) / cast(1 as bigint) FROM t;
+SELECT cast(1 as bigint) / cast(1 as float) FROM t;
+SELECT cast(1 as bigint) / cast(1 as double) FROM t;
+SELECT cast(1 as bigint) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) / cast(1 as string) FROM t;
+SELECT cast(1 as bigint) / cast('1' as binary) FROM t;
+SELECT cast(1 as bigint) / cast(1 as boolean) FROM t;
+SELECT cast(1 as bigint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as bigint) / cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as float) / cast(1 as tinyint) FROM t;
+SELECT cast(1 as float) / cast(1 as smallint) FROM t;
+SELECT cast(1 as float) / cast(1 as int) FROM t;
+SELECT cast(1 as float) / cast(1 as bigint) FROM t;
+SELECT cast(1 as float) / cast(1 as float) FROM t;
+SELECT cast(1 as float) / cast(1 as double) FROM t;
+SELECT cast(1 as float) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) / cast(1 as string) FROM t;
+SELECT cast(1 as float) / cast('1' as binary) FROM t;
+SELECT cast(1 as float) / cast(1 as boolean) FROM t;
+SELECT cast(1 as float) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as float) / cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as double) / cast(1 as tinyint) FROM t;
+SELECT cast(1 as double) / cast(1 as smallint) FROM t;
+SELECT cast(1 as double) / cast(1 as int) FROM t;
+SELECT cast(1 as double) / cast(1 as bigint) FROM t;
+SELECT cast(1 as double) / cast(1 as float) FROM t;
+SELECT cast(1 as double) / cast(1 as double) FROM t;
+SELECT cast(1 as double) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) / cast(1 as string) FROM t;
+SELECT cast(1 as double) / cast('1' as binary) FROM t;
+SELECT cast(1 as double) / cast(1 as boolean) FROM t;
+SELECT cast(1 as double) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as double) / cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) / cast(1 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast('1' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast(1 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) / cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as string) / cast(1 as tinyint) FROM t;
+SELECT cast(1 as string) / cast(1 as smallint) FROM t;
+SELECT cast(1 as string) / cast(1 as int) FROM t;
+SELECT cast(1 as string) / cast(1 as bigint) FROM t;
+SELECT cast(1 as string) / cast(1 as float) FROM t;
+SELECT cast(1 as string) / cast(1 as double) FROM t;
+SELECT cast(1 as string) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as string) / cast(1 as string) FROM t;
+SELECT cast(1 as string) / cast('1' as binary) FROM t;
+SELECT cast(1 as string) / cast(1 as boolean) FROM t;
+SELECT cast(1 as string) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as string) / cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast('1' as binary) / cast(1 as tinyint) FROM t;
+SELECT cast('1' as binary) / cast(1 as smallint) FROM t;
+SELECT cast('1' as binary) / cast(1 as int) FROM t;
+SELECT cast('1' as binary) / cast(1 as bigint) FROM t;
+SELECT cast('1' as binary) / cast(1 as float) FROM t;
+SELECT cast('1' as binary) / cast(1 as double) FROM t;
+SELECT cast('1' as binary) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) / cast(1 as string) FROM t;
+SELECT cast('1' as binary) / cast('1' as binary) FROM t;
+SELECT cast('1' as binary) / cast(1 as boolean) FROM t;
+SELECT cast('1' as binary) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast('1' as binary) / cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as boolean) / cast(1 as tinyint) FROM t;
+SELECT cast(1 as boolean) / cast(1 as smallint) FROM t;
+SELECT cast(1 as boolean) / cast(1 as int) FROM t;
+SELECT cast(1 as boolean) / cast(1 as bigint) FROM t;
+SELECT cast(1 as boolean) / cast(1 as float) FROM t;
+SELECT cast(1 as boolean) / cast(1 as double) FROM t;
+SELECT cast(1 as boolean) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast(1 as boolean) / cast(1 as string) FROM t;
+SELECT cast(1 as boolean) / cast('1' as binary) FROM t;
+SELECT cast(1 as boolean) / cast(1 as boolean) FROM t;
+SELECT cast(1 as boolean) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as boolean) / cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as tinyint) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as smallint) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as int) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as bigint) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as float) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as double) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as string) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('1' as binary) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as boolean) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as tinyint) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as smallint) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as int) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as bigint) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as float) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as double) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as string) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) / cast('1' as binary) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as boolean) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast('2017-12-11 09:30:00' as date) / cast('2017-12-11 09:30:00' as date) FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/elt.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/elt.sql
new file mode 100644
index 0000000000000..717616f91db05
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/elt.sql
@@ -0,0 +1,44 @@
+-- Mixed inputs (output type is string)
+SELECT elt(2, col1, col2, col3, col4, col5) col
+FROM (
+  SELECT
+    'prefix_' col1,
+    id col2,
+    string(id + 1) col3,
+    encode(string(id + 2), 'utf-8') col4,
+    CAST(id AS DOUBLE) col5
+  FROM range(10)
+);
+
+SELECT elt(3, col1, col2, col3, col4) col
+FROM (
+  SELECT
+    string(id) col1,
+    string(id + 1) col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+);
+
+-- turn on eltOutputAsString
+set spark.sql.function.eltOutputAsString=true;
+
+SELECT elt(1, col1, col2) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2
+  FROM range(10)
+);
+
+-- turn off eltOutputAsString
+set spark.sql.function.eltOutputAsString=false;
+
+-- Elt binary inputs (output type is binary)
+SELECT elt(2, col1, col2) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2
+  FROM range(10)
+);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/ifCoercion.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/ifCoercion.sql
new file mode 100644
index 0000000000000..42597f169daec
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/ifCoercion.sql
@@ -0,0 +1,174 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+SELECT IF(true, cast(1 as tinyint), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast(1 as tinyint), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast(1 as tinyint), cast(2 as int)) FROM t;
+SELECT IF(true, cast(1 as tinyint), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast(1 as tinyint), cast(2 as float)) FROM t;
+SELECT IF(true, cast(1 as tinyint), cast(2 as double)) FROM t;
+SELECT IF(true, cast(1 as tinyint), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast(1 as tinyint), cast(2 as string)) FROM t;
+SELECT IF(true, cast(1 as tinyint), cast('2' as binary)) FROM t;
+SELECT IF(true, cast(1 as tinyint), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast(1 as tinyint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast(1 as tinyint), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT IF(true, cast(1 as smallint), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast(1 as smallint), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast(1 as smallint), cast(2 as int)) FROM t;
+SELECT IF(true, cast(1 as smallint), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast(1 as smallint), cast(2 as float)) FROM t;
+SELECT IF(true, cast(1 as smallint), cast(2 as double)) FROM t;
+SELECT IF(true, cast(1 as smallint), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast(1 as smallint), cast(2 as string)) FROM t;
+SELECT IF(true, cast(1 as smallint), cast('2' as binary)) FROM t;
+SELECT IF(true, cast(1 as smallint), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast(1 as smallint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast(1 as smallint), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT IF(true, cast(1 as int), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast(1 as int), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast(1 as int), cast(2 as int)) FROM t;
+SELECT IF(true, cast(1 as int), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast(1 as int), cast(2 as float)) FROM t;
+SELECT IF(true, cast(1 as int), cast(2 as double)) FROM t;
+SELECT IF(true, cast(1 as int), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast(1 as int), cast(2 as string)) FROM t;
+SELECT IF(true, cast(1 as int), cast('2' as binary)) FROM t;
+SELECT IF(true, cast(1 as int), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast(1 as int), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast(1 as int), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT IF(true, cast(1 as bigint), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast(1 as bigint), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast(1 as bigint), cast(2 as int)) FROM t;
+SELECT IF(true, cast(1 as bigint), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast(1 as bigint), cast(2 as float)) FROM t;
+SELECT IF(true, cast(1 as bigint), cast(2 as double)) FROM t;
+SELECT IF(true, cast(1 as bigint), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast(1 as bigint), cast(2 as string)) FROM t;
+SELECT IF(true, cast(1 as bigint), cast('2' as binary)) FROM t;
+SELECT IF(true, cast(1 as bigint), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast(1 as bigint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast(1 as bigint), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT IF(true, cast(1 as float), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast(1 as float), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast(1 as float), cast(2 as int)) FROM t;
+SELECT IF(true, cast(1 as float), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast(1 as float), cast(2 as float)) FROM t;
+SELECT IF(true, cast(1 as float), cast(2 as double)) FROM t;
+SELECT IF(true, cast(1 as float), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast(1 as float), cast(2 as string)) FROM t;
+SELECT IF(true, cast(1 as float), cast('2' as binary)) FROM t;
+SELECT IF(true, cast(1 as float), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast(1 as float), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast(1 as float), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT IF(true, cast(1 as double), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast(1 as double), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast(1 as double), cast(2 as int)) FROM t;
+SELECT IF(true, cast(1 as double), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast(1 as double), cast(2 as float)) FROM t;
+SELECT IF(true, cast(1 as double), cast(2 as double)) FROM t;
+SELECT IF(true, cast(1 as double), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast(1 as double), cast(2 as string)) FROM t;
+SELECT IF(true, cast(1 as double), cast('2' as binary)) FROM t;
+SELECT IF(true, cast(1 as double), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast(1 as double), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast(1 as double), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as int)) FROM t;
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as float)) FROM t;
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as double)) FROM t;
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as string)) FROM t;
+SELECT IF(true, cast(1 as decimal(10, 0)), cast('2' as binary)) FROM t;
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT IF(true, cast(1 as string), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast(1 as string), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast(1 as string), cast(2 as int)) FROM t;
+SELECT IF(true, cast(1 as string), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast(1 as string), cast(2 as float)) FROM t;
+SELECT IF(true, cast(1 as string), cast(2 as double)) FROM t;
+SELECT IF(true, cast(1 as string), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast(1 as string), cast(2 as string)) FROM t;
+SELECT IF(true, cast(1 as string), cast('2' as binary)) FROM t;
+SELECT IF(true, cast(1 as string), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT IF(true, cast('1' as binary), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast('1' as binary), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast('1' as binary), cast(2 as int)) FROM t;
+SELECT IF(true, cast('1' as binary), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast('1' as binary), cast(2 as float)) FROM t;
+SELECT IF(true, cast('1' as binary), cast(2 as double)) FROM t;
+SELECT IF(true, cast('1' as binary), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast('1' as binary), cast(2 as string)) FROM t;
+SELECT IF(true, cast('1' as binary), cast('2' as binary)) FROM t;
+SELECT IF(true, cast('1' as binary), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast('1' as binary), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast('1' as binary), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT IF(true, cast(1 as boolean), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast(1 as boolean), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast(1 as boolean), cast(2 as int)) FROM t;
+SELECT IF(true, cast(1 as boolean), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast(1 as boolean), cast(2 as float)) FROM t;
+SELECT IF(true, cast(1 as boolean), cast(2 as double)) FROM t;
+SELECT IF(true, cast(1 as boolean), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast(1 as boolean), cast(2 as string)) FROM t;
+SELECT IF(true, cast(1 as boolean), cast('2' as binary)) FROM t;
+SELECT IF(true, cast(1 as boolean), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast(1 as boolean), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast(1 as boolean), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as int)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as float)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as double)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as string)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast('2' as binary)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as tinyint)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as smallint)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as int)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as bigint)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as float)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as double)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as decimal(10, 0))) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as string)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast('2' as binary)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as boolean)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00' as date)) FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/implicitTypeCasts.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/implicitTypeCasts.sql
new file mode 100644
index 0000000000000..6de22b8b7c3de
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/implicitTypeCasts.sql
@@ -0,0 +1,72 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+-- ImplicitTypeCasts
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+SELECT 1 + '2' FROM t;
+SELECT 1 - '2' FROM t;
+SELECT 1 * '2' FROM t;
+SELECT 4 / '2' FROM t;
+SELECT 1.1 + '2' FROM t;
+SELECT 1.1 - '2' FROM t;
+SELECT 1.1 * '2' FROM t;
+SELECT 4.4 / '2' FROM t;
+SELECT 1.1 + '2.2' FROM t;
+SELECT 1.1 - '2.2' FROM t;
+SELECT 1.1 * '2.2' FROM t;
+SELECT 4.4 / '2.2' FROM t;
+
+-- concatenation
+SELECT '$' || cast(1 as smallint) || '$' FROM t;
+SELECT '$' || 1 || '$' FROM t;
+SELECT '$' || cast(1 as bigint) || '$' FROM t;
+SELECT '$' || cast(1.1 as float) || '$' FROM t;
+SELECT '$' || cast(1.1 as double) || '$' FROM t;
+SELECT '$' || 1.1 || '$' FROM t;
+SELECT '$' || cast(1.1 as decimal(8,3)) || '$' FROM t;
+SELECT '$' || 'abcd' || '$' FROM t;
+SELECT '$' || date('1996-09-09') || '$' FROM t;
+SELECT '$' || timestamp('1996-09-09 10:11:12.4' )|| '$' FROM t;
+
+-- length functions
+SELECT length(cast(1 as smallint)) FROM t;
+SELECT length(cast(1 as int)) FROM t;
+SELECT length(cast(1 as bigint)) FROM t;
+SELECT length(cast(1.1 as float)) FROM t;
+SELECT length(cast(1.1 as double)) FROM t;
+SELECT length(1.1) FROM t;
+SELECT length(cast(1.1 as decimal(8,3))) FROM t;
+SELECT length('four') FROM t;
+SELECT length(date('1996-09-10')) FROM t;
+SELECT length(timestamp('1996-09-10 10:11:12.4')) FROM t;
+
+-- extract
+SELECT year( '1996-01-10') FROM t;
+SELECT month( '1996-01-10') FROM t;
+SELECT day( '1996-01-10') FROM t;
+SELECT hour( '10:11:12') FROM t;
+SELECT minute( '10:11:12') FROM t;
+SELECT second( '10:11:12') FROM t;
+
+-- like
+select 1 like '%' FROM t;
+select date('1996-09-10') like '19%' FROM t;
+select '1' like 1 FROM t;
+select '1 ' like 1 FROM t;
+select '1996-09-10' like date('1996-09-10') FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/inConversion.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/inConversion.sql
new file mode 100644
index 0000000000000..39dbe7268fba0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/inConversion.sql
@@ -0,0 +1,330 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+SELECT cast(1 as tinyint) in (cast(1 as tinyint)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as smallint)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as int)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as bigint)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as float)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as double)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as string)) FROM t;
+SELECT cast(1 as tinyint) in (cast('1' as binary)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as boolean)) FROM t;
+SELECT cast(1 as tinyint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as tinyint) in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as smallint) in (cast(1 as tinyint)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as int)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as bigint)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as float)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as double)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as string)) FROM t;
+SELECT cast(1 as smallint) in (cast('1' as binary)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as boolean)) FROM t;
+SELECT cast(1 as smallint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as smallint) in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as int) in (cast(1 as tinyint)) FROM t;
+SELECT cast(1 as int) in (cast(1 as smallint)) FROM t;
+SELECT cast(1 as int) in (cast(1 as int)) FROM t;
+SELECT cast(1 as int) in (cast(1 as bigint)) FROM t;
+SELECT cast(1 as int) in (cast(1 as float)) FROM t;
+SELECT cast(1 as int) in (cast(1 as double)) FROM t;
+SELECT cast(1 as int) in (cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as int) in (cast(1 as string)) FROM t;
+SELECT cast(1 as int) in (cast('1' as binary)) FROM t;
+SELECT cast(1 as int) in (cast(1 as boolean)) FROM t;
+SELECT cast(1 as int) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as int) in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as bigint) in (cast(1 as tinyint)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as smallint)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as int)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as float)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as double)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as string)) FROM t;
+SELECT cast(1 as bigint) in (cast('1' as binary)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as boolean)) FROM t;
+SELECT cast(1 as bigint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as bigint) in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as float) in (cast(1 as tinyint)) FROM t;
+SELECT cast(1 as float) in (cast(1 as smallint)) FROM t;
+SELECT cast(1 as float) in (cast(1 as int)) FROM t;
+SELECT cast(1 as float) in (cast(1 as bigint)) FROM t;
+SELECT cast(1 as float) in (cast(1 as float)) FROM t;
+SELECT cast(1 as float) in (cast(1 as double)) FROM t;
+SELECT cast(1 as float) in (cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as float) in (cast(1 as string)) FROM t;
+SELECT cast(1 as float) in (cast('1' as binary)) FROM t;
+SELECT cast(1 as float) in (cast(1 as boolean)) FROM t;
+SELECT cast(1 as float) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as float) in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as double) in (cast(1 as tinyint)) FROM t;
+SELECT cast(1 as double) in (cast(1 as smallint)) FROM t;
+SELECT cast(1 as double) in (cast(1 as int)) FROM t;
+SELECT cast(1 as double) in (cast(1 as bigint)) FROM t;
+SELECT cast(1 as double) in (cast(1 as float)) FROM t;
+SELECT cast(1 as double) in (cast(1 as double)) FROM t;
+SELECT cast(1 as double) in (cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as double) in (cast(1 as string)) FROM t;
+SELECT cast(1 as double) in (cast('1' as binary)) FROM t;
+SELECT cast(1 as double) in (cast(1 as boolean)) FROM t;
+SELECT cast(1 as double) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as double) in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as tinyint)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as smallint)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as int)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as bigint)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as float)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as double)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as string)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast('1' as binary)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as boolean)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as string) in (cast(1 as tinyint)) FROM t;
+SELECT cast(1 as string) in (cast(1 as smallint)) FROM t;
+SELECT cast(1 as string) in (cast(1 as int)) FROM t;
+SELECT cast(1 as string) in (cast(1 as bigint)) FROM t;
+SELECT cast(1 as string) in (cast(1 as float)) FROM t;
+SELECT cast(1 as string) in (cast(1 as double)) FROM t;
+SELECT cast(1 as string) in (cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as string) in (cast(1 as string)) FROM t;
+SELECT cast(1 as string) in (cast('1' as binary)) FROM t;
+SELECT cast(1 as string) in (cast(1 as boolean)) FROM t;
+SELECT cast(1 as string) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as string) in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast('1' as binary) in (cast(1 as tinyint)) FROM t;
+SELECT cast('1' as binary) in (cast(1 as smallint)) FROM t;
+SELECT cast('1' as binary) in (cast(1 as int)) FROM t;
+SELECT cast('1' as binary) in (cast(1 as bigint)) FROM t;
+SELECT cast('1' as binary) in (cast(1 as float)) FROM t;
+SELECT cast('1' as binary) in (cast(1 as double)) FROM t;
+SELECT cast('1' as binary) in (cast(1 as decimal(10, 0))) FROM t;
+SELECT cast('1' as binary) in (cast(1 as string)) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary)) FROM t;
+SELECT cast('1' as binary) in (cast(1 as boolean)) FROM t;
+SELECT cast('1' as binary) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast('1' as binary) in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT true in (cast(1 as tinyint)) FROM t;
+SELECT true in (cast(1 as smallint)) FROM t;
+SELECT true in (cast(1 as int)) FROM t;
+SELECT true in (cast(1 as bigint)) FROM t;
+SELECT true in (cast(1 as float)) FROM t;
+SELECT true in (cast(1 as double)) FROM t;
+SELECT true in (cast(1 as decimal(10, 0))) FROM t;
+SELECT true in (cast(1 as string)) FROM t;
+SELECT true in (cast('1' as binary)) FROM t;
+SELECT true in (cast(1 as boolean)) FROM t;
+SELECT true in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT true in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as tinyint)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as smallint)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as int)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as bigint)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as float)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as double)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as decimal(10, 0))) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as string)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2' as binary)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as boolean)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as tinyint)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as smallint)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as int)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as bigint)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as float)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as double)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as decimal(10, 0))) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as string)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2' as binary)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as boolean)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as tinyint)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as smallint)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as int)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as bigint)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as float)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as double)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as string)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('1' as binary)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as boolean)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as tinyint)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as smallint)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as int)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as bigint)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as float)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as double)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as string)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast('1' as binary)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as boolean)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as tinyint)) FROM t;
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as smallint)) FROM t;
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as int)) FROM t;
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as bigint)) FROM t;
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as float)) FROM t;
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as double)) FROM t;
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as string)) FROM t;
+SELECT cast(1 as int) in (cast(1 as int), cast('1' as binary)) FROM t;
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as boolean)) FROM t;
+SELECT cast(1 as int) in (cast(1 as int), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as int) in (cast(1 as int), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as tinyint)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as smallint)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as int)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as bigint)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as float)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as double)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as string)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast('1' as binary)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as boolean)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as tinyint)) FROM t;
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as smallint)) FROM t;
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as int)) FROM t;
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as bigint)) FROM t;
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as float)) FROM t;
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as double)) FROM t;
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as string)) FROM t;
+SELECT cast(1 as float) in (cast(1 as float), cast('1' as binary)) FROM t;
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as boolean)) FROM t;
+SELECT cast(1 as float) in (cast(1 as float), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as float) in (cast(1 as float), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as tinyint)) FROM t;
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as smallint)) FROM t;
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as int)) FROM t;
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as bigint)) FROM t;
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as float)) FROM t;
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as double)) FROM t;
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as string)) FROM t;
+SELECT cast(1 as double) in (cast(1 as double), cast('1' as binary)) FROM t;
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as boolean)) FROM t;
+SELECT cast(1 as double) in (cast(1 as double), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as double) in (cast(1 as double), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as tinyint)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as smallint)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as int)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as bigint)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as float)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as double)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as string)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('1' as binary)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as boolean)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as tinyint)) FROM t;
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as smallint)) FROM t;
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as int)) FROM t;
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as bigint)) FROM t;
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as float)) FROM t;
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as double)) FROM t;
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as string)) FROM t;
+SELECT cast(1 as string) in (cast(1 as string), cast('1' as binary)) FROM t;
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as boolean)) FROM t;
+SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as tinyint)) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as smallint)) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as int)) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as bigint)) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as float)) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as double)) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as string)) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary), cast('1' as binary)) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as boolean)) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast('1' as binary) in (cast('1' as binary), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as tinyint)) FROM t;
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as smallint)) FROM t;
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as int)) FROM t;
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as bigint)) FROM t;
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as float)) FROM t;
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as double)) FROM t;
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as string)) FROM t;
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast('1' as binary)) FROM t;
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as boolean)) FROM t;
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as tinyint)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as smallint)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as int)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as bigint)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as float)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as double)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as string)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast('1' as binary)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as boolean)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as tinyint)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as smallint)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as int)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as bigint)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as float)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as double)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as decimal(10, 0))) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as string)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast('1' as binary)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as boolean)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00' as date)) FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/promoteStrings.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/promoteStrings.sql
new file mode 100644
index 0000000000000..a5603a184578d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/promoteStrings.sql
@@ -0,0 +1,364 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+-- Binary arithmetic
+SELECT '1' + cast(1 as tinyint)                         FROM t;
+SELECT '1' + cast(1 as smallint)                        FROM t;
+SELECT '1' + cast(1 as int)                             FROM t;
+SELECT '1' + cast(1 as bigint)                          FROM t;
+SELECT '1' + cast(1 as float)                           FROM t;
+SELECT '1' + cast(1 as double)                          FROM t;
+SELECT '1' + cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' + '1'                                        FROM t;
+SELECT '1' + cast('1' as binary)                        FROM t;
+SELECT '1' + cast(1 as boolean)                         FROM t;
+SELECT '1' + cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' + cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT '1' - cast(1 as tinyint)                         FROM t;
+SELECT '1' - cast(1 as smallint)                        FROM t;
+SELECT '1' - cast(1 as int)                             FROM t;
+SELECT '1' - cast(1 as bigint)                          FROM t;
+SELECT '1' - cast(1 as float)                           FROM t;
+SELECT '1' - cast(1 as double)                          FROM t;
+SELECT '1' - cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' - '1'                                        FROM t;
+SELECT '1' - cast('1' as binary)                        FROM t;
+SELECT '1' - cast(1 as boolean)                         FROM t;
+SELECT '1' - cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' - cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT '1' * cast(1 as tinyint)                         FROM t;
+SELECT '1' * cast(1 as smallint)                        FROM t;
+SELECT '1' * cast(1 as int)                             FROM t;
+SELECT '1' * cast(1 as bigint)                          FROM t;
+SELECT '1' * cast(1 as float)                           FROM t;
+SELECT '1' * cast(1 as double)                          FROM t;
+SELECT '1' * cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' * '1'                                        FROM t;
+SELECT '1' * cast('1' as binary)                        FROM t;
+SELECT '1' * cast(1 as boolean)                         FROM t;
+SELECT '1' * cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' * cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT '1' / cast(1 as tinyint)                         FROM t;
+SELECT '1' / cast(1 as smallint)                        FROM t;
+SELECT '1' / cast(1 as int)                             FROM t;
+SELECT '1' / cast(1 as bigint)                          FROM t;
+SELECT '1' / cast(1 as float)                           FROM t;
+SELECT '1' / cast(1 as double)                          FROM t;
+SELECT '1' / cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' / '1'                                        FROM t;
+SELECT '1' / cast('1' as binary)                        FROM t;
+SELECT '1' / cast(1 as boolean)                         FROM t;
+SELECT '1' / cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' / cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT '1' % cast(1 as tinyint)                         FROM t;
+SELECT '1' % cast(1 as smallint)                        FROM t;
+SELECT '1' % cast(1 as int)                             FROM t;
+SELECT '1' % cast(1 as bigint)                          FROM t;
+SELECT '1' % cast(1 as float)                           FROM t;
+SELECT '1' % cast(1 as double)                          FROM t;
+SELECT '1' % cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' % '1'                                        FROM t;
+SELECT '1' % cast('1' as binary)                        FROM t;
+SELECT '1' % cast(1 as boolean)                         FROM t;
+SELECT '1' % cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' % cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT pmod('1', cast(1 as tinyint))                         FROM t;
+SELECT pmod('1', cast(1 as smallint))                        FROM t;
+SELECT pmod('1', cast(1 as int))                             FROM t;
+SELECT pmod('1', cast(1 as bigint))                          FROM t;
+SELECT pmod('1', cast(1 as float))                           FROM t;
+SELECT pmod('1', cast(1 as double))                          FROM t;
+SELECT pmod('1', cast(1 as decimal(10, 0)))                  FROM t;
+SELECT pmod('1', '1')                                        FROM t;
+SELECT pmod('1', cast('1' as binary))                        FROM t;
+SELECT pmod('1', cast(1 as boolean))                         FROM t;
+SELECT pmod('1', cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT pmod('1', cast('2017-12-11 09:30:00' as date))        FROM t;
+
+SELECT cast(1 as tinyint)                         + '1' FROM t;
+SELECT cast(1 as smallint)                        + '1' FROM t;
+SELECT cast(1 as int)                             + '1' FROM t;
+SELECT cast(1 as bigint)                          + '1' FROM t;
+SELECT cast(1 as float)                           + '1' FROM t;
+SELECT cast(1 as double)                          + '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  + '1' FROM t;
+SELECT cast('1' as binary)                        + '1' FROM t;
+SELECT cast(1 as boolean)                         + '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) + '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        + '1' FROM t;
+
+SELECT cast(1 as tinyint)                         - '1' FROM t;
+SELECT cast(1 as smallint)                        - '1' FROM t;
+SELECT cast(1 as int)                             - '1' FROM t;
+SELECT cast(1 as bigint)                          - '1' FROM t;
+SELECT cast(1 as float)                           - '1' FROM t;
+SELECT cast(1 as double)                          - '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  - '1' FROM t;
+SELECT cast('1' as binary)                        - '1' FROM t;
+SELECT cast(1 as boolean)                         - '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) - '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        - '1' FROM t;
+
+SELECT cast(1 as tinyint)                         * '1' FROM t;
+SELECT cast(1 as smallint)                        * '1' FROM t;
+SELECT cast(1 as int)                             * '1' FROM t;
+SELECT cast(1 as bigint)                          * '1' FROM t;
+SELECT cast(1 as float)                           * '1' FROM t;
+SELECT cast(1 as double)                          * '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  * '1' FROM t;
+SELECT cast('1' as binary)                        * '1' FROM t;
+SELECT cast(1 as boolean)                         * '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) * '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        * '1' FROM t;
+
+SELECT cast(1 as tinyint)                         / '1' FROM t;
+SELECT cast(1 as smallint)                        / '1' FROM t;
+SELECT cast(1 as int)                             / '1' FROM t;
+SELECT cast(1 as bigint)                          / '1' FROM t;
+SELECT cast(1 as float)                           / '1' FROM t;
+SELECT cast(1 as double)                          / '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  / '1' FROM t;
+SELECT cast('1' as binary)                        / '1' FROM t;
+SELECT cast(1 as boolean)                         / '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        / '1' FROM t;
+
+SELECT cast(1 as tinyint)                         % '1' FROM t;
+SELECT cast(1 as smallint)                        % '1' FROM t;
+SELECT cast(1 as int)                             % '1' FROM t;
+SELECT cast(1 as bigint)                          % '1' FROM t;
+SELECT cast(1 as float)                           % '1' FROM t;
+SELECT cast(1 as double)                          % '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  % '1' FROM t;
+SELECT cast('1' as binary)                        % '1' FROM t;
+SELECT cast(1 as boolean)                         % '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) % '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        % '1' FROM t;
+
+SELECT pmod(cast(1 as tinyint), '1')                         FROM t;
+SELECT pmod(cast(1 as smallint), '1')                        FROM t;
+SELECT pmod(cast(1 as int), '1')                             FROM t;
+SELECT pmod(cast(1 as bigint), '1')                          FROM t;
+SELECT pmod(cast(1 as float), '1')                           FROM t;
+SELECT pmod(cast(1 as double), '1')                          FROM t;
+SELECT pmod(cast(1 as decimal(10, 0)), '1')                  FROM t;
+SELECT pmod(cast('1' as binary), '1')                        FROM t;
+SELECT pmod(cast(1 as boolean), '1')                         FROM t;
+SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), '1') FROM t;
+SELECT pmod(cast('2017-12-11 09:30:00' as date), '1')        FROM t;
+
+-- Equality
+SELECT '1' = cast(1 as tinyint)                         FROM t;
+SELECT '1' = cast(1 as smallint)                        FROM t;
+SELECT '1' = cast(1 as int)                             FROM t;
+SELECT '1' = cast(1 as bigint)                          FROM t;
+SELECT '1' = cast(1 as float)                           FROM t;
+SELECT '1' = cast(1 as double)                          FROM t;
+SELECT '1' = cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' = '1'                                        FROM t;
+SELECT '1' = cast('1' as binary)                        FROM t;
+SELECT '1' = cast(1 as boolean)                         FROM t;
+SELECT '1' = cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' = cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT cast(1 as tinyint)                         = '1' FROM t;
+SELECT cast(1 as smallint)                        = '1' FROM t;
+SELECT cast(1 as int)                             = '1' FROM t;
+SELECT cast(1 as bigint)                          = '1' FROM t;
+SELECT cast(1 as float)                           = '1' FROM t;
+SELECT cast(1 as double)                          = '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  = '1' FROM t;
+SELECT cast('1' as binary)                        = '1' FROM t;
+SELECT cast(1 as boolean)                         = '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        = '1' FROM t;
+
+SELECT '1' <=> cast(1 as tinyint)                         FROM t;
+SELECT '1' <=> cast(1 as smallint)                        FROM t;
+SELECT '1' <=> cast(1 as int)                             FROM t;
+SELECT '1' <=> cast(1 as bigint)                          FROM t;
+SELECT '1' <=> cast(1 as float)                           FROM t;
+SELECT '1' <=> cast(1 as double)                          FROM t;
+SELECT '1' <=> cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' <=> '1'                                        FROM t;
+SELECT '1' <=> cast('1' as binary)                        FROM t;
+SELECT '1' <=> cast(1 as boolean)                         FROM t;
+SELECT '1' <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' <=> cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT cast(1 as tinyint)                         <=> '1' FROM t;
+SELECT cast(1 as smallint)                        <=> '1' FROM t;
+SELECT cast(1 as int)                             <=> '1' FROM t;
+SELECT cast(1 as bigint)                          <=> '1' FROM t;
+SELECT cast(1 as float)                           <=> '1' FROM t;
+SELECT cast(1 as double)                          <=> '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  <=> '1' FROM t;
+SELECT cast('1' as binary)                        <=> '1' FROM t;
+SELECT cast(1 as boolean)                         <=> '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        <=> '1' FROM t;
+
+-- Binary comparison
+SELECT '1' < cast(1 as tinyint)                         FROM t;
+SELECT '1' < cast(1 as smallint)                        FROM t;
+SELECT '1' < cast(1 as int)                             FROM t;
+SELECT '1' < cast(1 as bigint)                          FROM t;
+SELECT '1' < cast(1 as float)                           FROM t;
+SELECT '1' < cast(1 as double)                          FROM t;
+SELECT '1' < cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' < '1'                                        FROM t;
+SELECT '1' < cast('1' as binary)                        FROM t;
+SELECT '1' < cast(1 as boolean)                         FROM t;
+SELECT '1' < cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' < cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT '1' <= cast(1 as tinyint)                         FROM t;
+SELECT '1' <= cast(1 as smallint)                        FROM t;
+SELECT '1' <= cast(1 as int)                             FROM t;
+SELECT '1' <= cast(1 as bigint)                          FROM t;
+SELECT '1' <= cast(1 as float)                           FROM t;
+SELECT '1' <= cast(1 as double)                          FROM t;
+SELECT '1' <= cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' <= '1'                                        FROM t;
+SELECT '1' <= cast('1' as binary)                        FROM t;
+SELECT '1' <= cast(1 as boolean)                         FROM t;
+SELECT '1' <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' <= cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT '1' > cast(1 as tinyint)                         FROM t;
+SELECT '1' > cast(1 as smallint)                        FROM t;
+SELECT '1' > cast(1 as int)                             FROM t;
+SELECT '1' > cast(1 as bigint)                          FROM t;
+SELECT '1' > cast(1 as float)                           FROM t;
+SELECT '1' > cast(1 as double)                          FROM t;
+SELECT '1' > cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' > '1'                                        FROM t;
+SELECT '1' > cast('1' as binary)                        FROM t;
+SELECT '1' > cast(1 as boolean)                         FROM t;
+SELECT '1' > cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' > cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT '1' >= cast(1 as tinyint)                         FROM t;
+SELECT '1' >= cast(1 as smallint)                        FROM t;
+SELECT '1' >= cast(1 as int)                             FROM t;
+SELECT '1' >= cast(1 as bigint)                          FROM t;
+SELECT '1' >= cast(1 as float)                           FROM t;
+SELECT '1' >= cast(1 as double)                          FROM t;
+SELECT '1' >= cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' >= '1'                                        FROM t;
+SELECT '1' >= cast('1' as binary)                        FROM t;
+SELECT '1' >= cast(1 as boolean)                         FROM t;
+SELECT '1' >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' >= cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT '1' <> cast(1 as tinyint)                         FROM t;
+SELECT '1' <> cast(1 as smallint)                        FROM t;
+SELECT '1' <> cast(1 as int)                             FROM t;
+SELECT '1' <> cast(1 as bigint)                          FROM t;
+SELECT '1' <> cast(1 as float)                           FROM t;
+SELECT '1' <> cast(1 as double)                          FROM t;
+SELECT '1' <> cast(1 as decimal(10, 0))                  FROM t;
+SELECT '1' <> '1'                                        FROM t;
+SELECT '1' <> cast('1' as binary)                        FROM t;
+SELECT '1' <> cast(1 as boolean)                         FROM t;
+SELECT '1' <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT '1' <> cast('2017-12-11 09:30:00' as date)        FROM t;
+
+SELECT cast(1 as tinyint)                         < '1' FROM t;
+SELECT cast(1 as smallint)                        < '1' FROM t;
+SELECT cast(1 as int)                             < '1' FROM t;
+SELECT cast(1 as bigint)                          < '1' FROM t;
+SELECT cast(1 as float)                           < '1' FROM t;
+SELECT cast(1 as double)                          < '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  < '1' FROM t;
+SELECT '1'                                        < '1' FROM t;
+SELECT cast('1' as binary)                        < '1' FROM t;
+SELECT cast(1 as boolean)                         < '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) < '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        < '1' FROM t;
+
+SELECT cast(1 as tinyint)                         <= '1' FROM t;
+SELECT cast(1 as smallint)                        <= '1' FROM t;
+SELECT cast(1 as int)                             <= '1' FROM t;
+SELECT cast(1 as bigint)                          <= '1' FROM t;
+SELECT cast(1 as float)                           <= '1' FROM t;
+SELECT cast(1 as double)                          <= '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  <= '1' FROM t;
+SELECT '1'                                        <= '1' FROM t;
+SELECT cast('1' as binary)                        <= '1' FROM t;
+SELECT cast(1 as boolean)                         <= '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        <= '1' FROM t;
+
+SELECT cast(1 as tinyint)                         > '1' FROM t;
+SELECT cast(1 as smallint)                        > '1' FROM t;
+SELECT cast(1 as int)                             > '1' FROM t;
+SELECT cast(1 as bigint)                          > '1' FROM t;
+SELECT cast(1 as float)                           > '1' FROM t;
+SELECT cast(1 as double)                          > '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  > '1' FROM t;
+SELECT '1'                                        > '1' FROM t;
+SELECT cast('1' as binary)                        > '1' FROM t;
+SELECT cast(1 as boolean)                         > '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) > '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        > '1' FROM t;
+
+SELECT cast(1 as tinyint)                         >= '1' FROM t;
+SELECT cast(1 as smallint)                        >= '1' FROM t;
+SELECT cast(1 as int)                             >= '1' FROM t;
+SELECT cast(1 as bigint)                          >= '1' FROM t;
+SELECT cast(1 as float)                           >= '1' FROM t;
+SELECT cast(1 as double)                          >= '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  >= '1' FROM t;
+SELECT '1'                                        >= '1' FROM t;
+SELECT cast('1' as binary)                        >= '1' FROM t;
+SELECT cast(1 as boolean)                         >= '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        >= '1' FROM t;
+
+SELECT cast(1 as tinyint)                         <> '1' FROM t;
+SELECT cast(1 as smallint)                        <> '1' FROM t;
+SELECT cast(1 as int)                             <> '1' FROM t;
+SELECT cast(1 as bigint)                          <> '1' FROM t;
+SELECT cast(1 as float)                           <> '1' FROM t;
+SELECT cast(1 as double)                          <> '1' FROM t;
+SELECT cast(1 as decimal(10, 0))                  <> '1' FROM t;
+SELECT '1'                                        <> '1' FROM t;
+SELECT cast('1' as binary)                        <> '1' FROM t;
+SELECT cast(1 as boolean)                         <> '1' FROM t;
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> '1' FROM t;
+SELECT cast('2017-12-11 09:30:00' as date)        <> '1' FROM t;
+
+-- Functions
+SELECT abs('1') FROM t;
+SELECT sum('1') FROM t;
+SELECT avg('1') FROM t;
+SELECT stddev_pop('1') FROM t;
+SELECT stddev_samp('1') FROM t;
+SELECT - '1' FROM t;
+SELECT + '1' FROM t;
+SELECT var_pop('1') FROM t;
+SELECT var_samp('1') FROM t;
+SELECT skewness('1') FROM t;
+SELECT kurtosis('1') FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/stringCastAndExpressions.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/stringCastAndExpressions.sql
new file mode 100644
index 0000000000000..f17adb56dee91
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/stringCastAndExpressions.sql
@@ -0,0 +1,57 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 'aa' as a;
+
+-- casting to data types which are unable to represent the string input returns NULL
+select cast(a as byte) from t;
+select cast(a as short) from t;
+select cast(a as int) from t;
+select cast(a as long) from t;
+select cast(a as float) from t;
+select cast(a as double) from t;
+select cast(a as decimal) from t;
+select cast(a as boolean) from t;
+select cast(a as timestamp) from t;
+select cast(a as date) from t;
+-- casting to binary works correctly
+select cast(a as binary) from t;
+-- casting to array, struct or map throws exception
+select cast(a as array<string>) from t;
+select cast(a as struct<s:string>) from t;
+select cast(a as map<string, string>) from t;
+
+-- all timestamp/date expressions return NULL if bad input strings are provided
+select to_timestamp(a) from t;
+select to_timestamp('2018-01-01', a) from t;
+select to_unix_timestamp(a) from t;
+select to_unix_timestamp('2018-01-01', a) from t;
+select unix_timestamp(a) from t;
+select unix_timestamp('2018-01-01', a) from t;
+select from_unixtime(a) from t;
+select from_unixtime('2018-01-01', a) from t;
+select next_day(a, 'MO') from t;
+select next_day('2018-01-01', a) from t;
+select trunc(a, 'MM') from t;
+select trunc('2018-01-01', a) from t;
+
+-- some functions return NULL if bad input is provided
+select unhex('-123');
+select sha2(a, a) from t;
+select get_json_object(a, a) from t;
+select json_tuple(a, a) from t;
+select from_json(a, 'a INT') from t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/widenSetOperationTypes.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/widenSetOperationTypes.sql
new file mode 100644
index 0000000000000..66e9689850d93
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/widenSetOperationTypes.sql
@@ -0,0 +1,175 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+-- UNION
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast(1 as smallint) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast(1 as int) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast(1 as bigint) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast(1 as float) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast(1 as double) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast(1 as string) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast('1' as binary) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast(1 as boolean) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
+
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as tinyint) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as smallint) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as int) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as bigint) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as float) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as double) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as string) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2' as binary) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as boolean) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t;
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/windowFrameCoercion.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/windowFrameCoercion.sql
new file mode 100644
index 0000000000000..5cd3538757499
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/windowFrameCoercion.sql
@@ -0,0 +1,44 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+CREATE TEMPORARY VIEW t AS SELECT 1;
+
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as tinyint)) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as smallint)) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as int)) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as bigint)) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as float)) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as double)) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as decimal(10, 0))) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as string)) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('1' as binary)) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as boolean)) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00.0' as timestamp)) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00' as date)) FROM t;
+
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as tinyint) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as smallint) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as int) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as bigint) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as float) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as double) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as decimal(10, 0)) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as string) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('1' as binary) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as boolean) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00.0' as timestamp) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00' as date) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t;
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 7b2f46f6c2a66..bbb6851e69c7e 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -44,6 +44,7 @@ struct<>
 -- !query 4 output
 
 
+
 -- !query 5
 select current_date, current_timestamp from ttf1
 -- !query 5 schema
@@ -63,6 +64,7 @@ struct<>
 -- !query 6 output
 
 
+
 -- !query 7
 select current_date = current_date(), current_timestamp = current_timestamp(), a, b from ttf2
 -- !query 7 schema
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 986bb01c13fe4..c1abc6dff754b 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 25
+-- Number of queries: 26
 
 
 -- !query 0
@@ -227,3 +227,17 @@ SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t
 struct<1:int>
 -- !query 24 output
 1
+
+
+-- !query 25
+SELECT 1 from (
+  SELECT 1 AS z,
+  MIN(a.x)
+  FROM (select 1 as x) a
+  WHERE false
+) b
+where b.z != b.z
+-- !query 25 schema
+struct<1:int>
+-- !query 25 output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out b/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out
new file mode 100644
index 0000000000000..857073a827f24
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out
@@ -0,0 +1,194 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 24
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+CREATE TEMPORARY VIEW empty_table as SELECT a FROM t2 WHERE false
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+SELECT * FROM t1 INNER JOIN empty_table
+-- !query 3 schema
+struct<a:int,a:int>
+-- !query 3 output
+
+
+
+-- !query 4
+SELECT * FROM t1 CROSS JOIN empty_table
+-- !query 4 schema
+struct<a:int,a:int>
+-- !query 4 output
+
+
+
+-- !query 5
+SELECT * FROM t1 LEFT OUTER JOIN empty_table
+-- !query 5 schema
+struct<a:int,a:int>
+-- !query 5 output
+1	NULL
+
+
+-- !query 6
+SELECT * FROM t1 RIGHT OUTER JOIN empty_table
+-- !query 6 schema
+struct<a:int,a:int>
+-- !query 6 output
+
+
+
+-- !query 7
+SELECT * FROM t1 FULL OUTER JOIN empty_table
+-- !query 7 schema
+struct<a:int,a:int>
+-- !query 7 output
+1	NULL
+
+
+-- !query 8
+SELECT * FROM t1 LEFT SEMI JOIN empty_table
+-- !query 8 schema
+struct<a:int>
+-- !query 8 output
+
+
+
+-- !query 9
+SELECT * FROM t1 LEFT ANTI JOIN empty_table
+-- !query 9 schema
+struct<a:int>
+-- !query 9 output
+1
+
+
+-- !query 10
+SELECT * FROM empty_table INNER JOIN t1
+-- !query 10 schema
+struct<a:int,a:int>
+-- !query 10 output
+
+
+
+-- !query 11
+SELECT * FROM empty_table CROSS JOIN t1
+-- !query 11 schema
+struct<a:int,a:int>
+-- !query 11 output
+
+
+
+-- !query 12
+SELECT * FROM empty_table LEFT OUTER JOIN t1
+-- !query 12 schema
+struct<a:int,a:int>
+-- !query 12 output
+
+
+
+-- !query 13
+SELECT * FROM empty_table RIGHT OUTER JOIN t1
+-- !query 13 schema
+struct<a:int,a:int>
+-- !query 13 output
+NULL	1
+
+
+-- !query 14
+SELECT * FROM empty_table FULL OUTER JOIN t1
+-- !query 14 schema
+struct<a:int,a:int>
+-- !query 14 output
+NULL	1
+
+
+-- !query 15
+SELECT * FROM empty_table LEFT SEMI JOIN t1
+-- !query 15 schema
+struct<a:int>
+-- !query 15 output
+
+
+
+-- !query 16
+SELECT * FROM empty_table LEFT ANTI JOIN t1
+-- !query 16 schema
+struct<a:int>
+-- !query 16 output
+
+
+
+-- !query 17
+SELECT * FROM empty_table INNER JOIN empty_table
+-- !query 17 schema
+struct<a:int,a:int>
+-- !query 17 output
+
+
+
+-- !query 18
+SELECT * FROM empty_table CROSS JOIN empty_table
+-- !query 18 schema
+struct<a:int,a:int>
+-- !query 18 output
+
+
+
+-- !query 19
+SELECT * FROM empty_table LEFT OUTER JOIN empty_table
+-- !query 19 schema
+struct<a:int,a:int>
+-- !query 19 output
+
+
+
+-- !query 20
+SELECT * FROM empty_table RIGHT OUTER JOIN empty_table
+-- !query 20 schema
+struct<a:int,a:int>
+-- !query 20 output
+
+
+
+-- !query 21
+SELECT * FROM empty_table FULL OUTER JOIN empty_table
+-- !query 21 schema
+struct<a:int,a:int>
+-- !query 21 output
+
+
+
+-- !query 22
+SELECT * FROM empty_table LEFT SEMI JOIN empty_table
+-- !query 22 schema
+struct<a:int>
+-- !query 22 output
+
+
+
+-- !query 23
+SELECT * FROM empty_table LEFT ANTI JOIN empty_table
+-- !query 23 schema
+struct<a:int>
+-- !query 23 output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
index 8cd0d51da64f5..d51f6d37e4b41 100644
--- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 31
+-- Number of queries: 32
 
 
 -- !query 0
@@ -34,225 +34,225 @@ struct<(CAST(1.5 AS DOUBLE) = CAST(1.51 AS DOUBLE)):boolean>
 false
 
 
--- !query 3
-select 1 > '1'
--- !query 3 schema
-struct<(1 > CAST(1 AS INT)):boolean>
--- !query 3 output
-false
-
-
 -- !query 4
-select 2 > '1.0'
+select 1 > '1'
 -- !query 4 schema
-struct<(2 > CAST(1.0 AS INT)):boolean>
+struct<(1 > CAST(1 AS INT)):boolean>
 -- !query 4 output
-true
+false
 
 
 -- !query 5
-select 2 > '2.0'
+select 2 > '1.0'
 -- !query 5 schema
-struct<(2 > CAST(2.0 AS INT)):boolean>
+struct<(2 > CAST(1.0 AS INT)):boolean>
 -- !query 5 output
-false
+true
 
 
 -- !query 6
-select 2 > '2.2'
+select 2 > '2.0'
 -- !query 6 schema
-struct<(2 > CAST(2.2 AS INT)):boolean>
+struct<(2 > CAST(2.0 AS INT)):boolean>
 -- !query 6 output
 false
 
 
 -- !query 7
-select '1.5' > 0.5
+select 2 > '2.2'
 -- !query 7 schema
-struct<(CAST(1.5 AS DOUBLE) > CAST(0.5 AS DOUBLE)):boolean>
+struct<(2 > CAST(2.2 AS INT)):boolean>
 -- !query 7 output
-true
+false
 
 
 -- !query 8
-select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')
+select '1.5' > 0.5
 -- !query 8 schema
-struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean>
+struct<(CAST(1.5 AS DOUBLE) > CAST(0.5 AS DOUBLE)):boolean>
 -- !query 8 output
-false
+true
 
 
 -- !query 9
-select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52'
+select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')
 -- !query 9 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean>
+struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean>
 -- !query 9 output
 false
 
 
 -- !query 10
-select 1 >= '1'
+select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52'
 -- !query 10 schema
-struct<(1 >= CAST(1 AS INT)):boolean>
+struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean>
 -- !query 10 output
-true
+false
 
 
 -- !query 11
-select 2 >= '1.0'
+select 1 >= '1'
 -- !query 11 schema
-struct<(2 >= CAST(1.0 AS INT)):boolean>
+struct<(1 >= CAST(1 AS INT)):boolean>
 -- !query 11 output
 true
 
 
 -- !query 12
-select 2 >= '2.0'
+select 2 >= '1.0'
 -- !query 12 schema
-struct<(2 >= CAST(2.0 AS INT)):boolean>
+struct<(2 >= CAST(1.0 AS INT)):boolean>
 -- !query 12 output
 true
 
 
 -- !query 13
-select 2.0 >= '2.2'
+select 2 >= '2.0'
 -- !query 13 schema
-struct<(CAST(2.0 AS DOUBLE) >= CAST(2.2 AS DOUBLE)):boolean>
+struct<(2 >= CAST(2.0 AS INT)):boolean>
 -- !query 13 output
-false
+true
 
 
 -- !query 14
-select '1.5' >= 0.5
+select 2.0 >= '2.2'
 -- !query 14 schema
-struct<(CAST(1.5 AS DOUBLE) >= CAST(0.5 AS DOUBLE)):boolean>
+struct<(CAST(2.0 AS DOUBLE) >= CAST(2.2 AS DOUBLE)):boolean>
 -- !query 14 output
-true
+false
 
 
 -- !query 15
-select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')
+select '1.5' >= 0.5
 -- !query 15 schema
-struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean>
+struct<(CAST(1.5 AS DOUBLE) >= CAST(0.5 AS DOUBLE)):boolean>
 -- !query 15 output
 true
 
 
 -- !query 16
-select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52'
+select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')
 -- !query 16 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean>
+struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean>
 -- !query 16 output
-false
+true
 
 
 -- !query 17
-select 1 < '1'
+select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52'
 -- !query 17 schema
-struct<(1 < CAST(1 AS INT)):boolean>
+struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean>
 -- !query 17 output
 false
 
 
 -- !query 18
-select 2 < '1.0'
+select 1 < '1'
 -- !query 18 schema
-struct<(2 < CAST(1.0 AS INT)):boolean>
+struct<(1 < CAST(1 AS INT)):boolean>
 -- !query 18 output
 false
 
 
 -- !query 19
-select 2 < '2.0'
+select 2 < '1.0'
 -- !query 19 schema
-struct<(2 < CAST(2.0 AS INT)):boolean>
+struct<(2 < CAST(1.0 AS INT)):boolean>
 -- !query 19 output
 false
 
 
 -- !query 20
-select 2.0 < '2.2'
+select 2 < '2.0'
 -- !query 20 schema
-struct<(CAST(2.0 AS DOUBLE) < CAST(2.2 AS DOUBLE)):boolean>
+struct<(2 < CAST(2.0 AS INT)):boolean>
 -- !query 20 output
-true
+false
 
 
 -- !query 21
-select 0.5 < '1.5'
+select 2.0 < '2.2'
 -- !query 21 schema
-struct<(CAST(0.5 AS DOUBLE) < CAST(1.5 AS DOUBLE)):boolean>
+struct<(CAST(2.0 AS DOUBLE) < CAST(2.2 AS DOUBLE)):boolean>
 -- !query 21 output
 true
 
 
 -- !query 22
-select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')
+select 0.5 < '1.5'
 -- !query 22 schema
-struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean>
+struct<(CAST(0.5 AS DOUBLE) < CAST(1.5 AS DOUBLE)):boolean>
 -- !query 22 output
-false
+true
 
 
 -- !query 23
-select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52'
+select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')
 -- !query 23 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean>
+struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean>
 -- !query 23 output
-true
+false
 
 
 -- !query 24
-select 1 <= '1'
+select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52'
 -- !query 24 schema
-struct<(1 <= CAST(1 AS INT)):boolean>
+struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean>
 -- !query 24 output
 true
 
 
 -- !query 25
-select 2 <= '1.0'
+select 1 <= '1'
 -- !query 25 schema
-struct<(2 <= CAST(1.0 AS INT)):boolean>
+struct<(1 <= CAST(1 AS INT)):boolean>
 -- !query 25 output
-false
+true
 
 
 -- !query 26
-select 2 <= '2.0'
+select 2 <= '1.0'
 -- !query 26 schema
-struct<(2 <= CAST(2.0 AS INT)):boolean>
+struct<(2 <= CAST(1.0 AS INT)):boolean>
 -- !query 26 output
-true
+false
 
 
 -- !query 27
-select 2.0 <= '2.2'
+select 2 <= '2.0'
 -- !query 27 schema
-struct<(CAST(2.0 AS DOUBLE) <= CAST(2.2 AS DOUBLE)):boolean>
+struct<(2 <= CAST(2.0 AS INT)):boolean>
 -- !query 27 output
 true
 
 
 -- !query 28
-select 0.5 <= '1.5'
+select 2.0 <= '2.2'
 -- !query 28 schema
-struct<(CAST(0.5 AS DOUBLE) <= CAST(1.5 AS DOUBLE)):boolean>
+struct<(CAST(2.0 AS DOUBLE) <= CAST(2.2 AS DOUBLE)):boolean>
 -- !query 28 output
 true
 
 
 -- !query 29
-select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')
+select 0.5 <= '1.5'
 -- !query 29 schema
-struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean>
+struct<(CAST(0.5 AS DOUBLE) <= CAST(1.5 AS DOUBLE)):boolean>
 -- !query 29 output
 true
 
 
 -- !query 30
-select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52'
+select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')
 -- !query 30 schema
-struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean>
+struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean>
 -- !query 30 output
 true
+
+
+-- !query 31
+select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52'
+-- !query 31 schema
+struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean>
+-- !query 31 output
+true
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 2d9b3d7d2ca33..d5f8705a35ed6 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 12
+-- Number of queries: 15
 
 
 -- !query 0
@@ -118,3 +118,46 @@ select right(null, -2), right("abcd", -2), right("abcd", 0), right("abcd", 'a')
 struct<right(NULL, -2):string,right('abcd', -2):string,right('abcd', 0):string,right('abcd', 'a'):string>
 -- !query 11 output
 NULL			NULL
+
+
+-- !query 12
+set spark.sql.function.concatBinaryAsString=false
+-- !query 12 schema
+struct<key:string,value:string>
+-- !query 12 output
+spark.sql.function.concatBinaryAsString	false
+
+
+-- !query 13
+EXPLAIN SELECT ((col1 || col2) || (col3 || col4)) col
+FROM (
+  SELECT
+    string(id) col1,
+    string(id + 1) col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+)
+-- !query 13 schema
+struct<plan:string>
+-- !query 13 output
+== Physical Plan ==
+*Project [concat(cast(id#xL as string), cast((id#xL + 1) as string), cast(encode(cast((id#xL + 2) as string), utf-8) as string), cast(encode(cast((id#xL + 3) as string), utf-8) as string)) AS col#x]
++- *Range (0, 10, step=1, splits=2)
+
+
+-- !query 14
+EXPLAIN SELECT (col1 || (col3 || col4)) col
+FROM (
+  SELECT
+    string(id) col1,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+)
+-- !query 14 schema
+struct<plan:string>
+-- !query 14 output
+== Physical Plan ==
+*Project [concat(cast(id#xL as string), cast(encode(cast((id#xL + 2) as string), utf-8) as string), cast(encode(cast((id#xL + 3) as string), utf-8) as string)) AS col#x]
++- *Range (0, 10, step=1, splits=2)
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
new file mode 100644
index 0000000000000..2914d6015ea88
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
@@ -0,0 +1,2146 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 265
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT cast(1 as binary) = '1' FROM t
+-- !query 1 schema
+struct<>
+-- !query 1 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 2
+SELECT cast(1 as binary) > '2' FROM t
+-- !query 2 schema
+struct<>
+-- !query 2 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 3
+SELECT cast(1 as binary) >= '2' FROM t
+-- !query 3 schema
+struct<>
+-- !query 3 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 4
+SELECT cast(1 as binary) < '2' FROM t
+-- !query 4 schema
+struct<>
+-- !query 4 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 5
+SELECT cast(1 as binary) <= '2' FROM t
+-- !query 5 schema
+struct<>
+-- !query 5 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 6
+SELECT cast(1 as binary) <> '2' FROM t
+-- !query 6 schema
+struct<>
+-- !query 6 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 7
+SELECT cast(1 as binary) = cast(null as string) FROM t
+-- !query 7 schema
+struct<>
+-- !query 7 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 8
+SELECT cast(1 as binary) > cast(null as string) FROM t
+-- !query 8 schema
+struct<>
+-- !query 8 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 9
+SELECT cast(1 as binary) >= cast(null as string) FROM t
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 10
+SELECT cast(1 as binary) < cast(null as string) FROM t
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 11
+SELECT cast(1 as binary) <= cast(null as string) FROM t
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 12
+SELECT cast(1 as binary) <> cast(null as string) FROM t
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 7
+
+
+-- !query 13
+SELECT '1' = cast(1 as binary) FROM t
+-- !query 13 schema
+struct<>
+-- !query 13 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 13
+
+
+-- !query 14
+SELECT '2' > cast(1 as binary) FROM t
+-- !query 14 schema
+struct<>
+-- !query 14 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 13
+
+
+-- !query 15
+SELECT '2' >= cast(1 as binary) FROM t
+-- !query 15 schema
+struct<>
+-- !query 15 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 14
+
+
+-- !query 16
+SELECT '2' < cast(1 as binary) FROM t
+-- !query 16 schema
+struct<>
+-- !query 16 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 13
+
+
+-- !query 17
+SELECT '2' <= cast(1 as binary) FROM t
+-- !query 17 schema
+struct<>
+-- !query 17 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 14
+
+
+-- !query 18
+SELECT '2' <> cast(1 as binary) FROM t
+-- !query 18 schema
+struct<>
+-- !query 18 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 14
+
+
+-- !query 19
+SELECT cast(null as string) = cast(1 as binary) FROM t
+-- !query 19 schema
+struct<>
+-- !query 19 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 30
+
+
+-- !query 20
+SELECT cast(null as string) > cast(1 as binary) FROM t
+-- !query 20 schema
+struct<>
+-- !query 20 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 30
+
+
+-- !query 21
+SELECT cast(null as string) >= cast(1 as binary) FROM t
+-- !query 21 schema
+struct<>
+-- !query 21 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 31
+
+
+-- !query 22
+SELECT cast(null as string) < cast(1 as binary) FROM t
+-- !query 22 schema
+struct<>
+-- !query 22 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 30
+
+
+-- !query 23
+SELECT cast(null as string) <= cast(1 as binary) FROM t
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 31
+
+
+-- !query 24
+SELECT cast(null as string) <> cast(1 as binary) FROM t
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CAST(1 AS BINARY)' due to data type mismatch: cannot cast int to binary; line 1 pos 31
+
+
+-- !query 25
+SELECT cast(1 as tinyint) = '1' FROM t
+-- !query 25 schema
+struct<(CAST(1 AS TINYINT) = CAST(1 AS TINYINT)):boolean>
+-- !query 25 output
+true
+
+
+-- !query 26
+SELECT cast(1 as tinyint) > '2' FROM t
+-- !query 26 schema
+struct<(CAST(1 AS TINYINT) > CAST(2 AS TINYINT)):boolean>
+-- !query 26 output
+false
+
+
+-- !query 27
+SELECT cast(1 as tinyint) >= '2' FROM t
+-- !query 27 schema
+struct<(CAST(1 AS TINYINT) >= CAST(2 AS TINYINT)):boolean>
+-- !query 27 output
+false
+
+
+-- !query 28
+SELECT cast(1 as tinyint) < '2' FROM t
+-- !query 28 schema
+struct<(CAST(1 AS TINYINT) < CAST(2 AS TINYINT)):boolean>
+-- !query 28 output
+true
+
+
+-- !query 29
+SELECT cast(1 as tinyint) <= '2' FROM t
+-- !query 29 schema
+struct<(CAST(1 AS TINYINT) <= CAST(2 AS TINYINT)):boolean>
+-- !query 29 output
+true
+
+
+-- !query 30
+SELECT cast(1 as tinyint) <> '2' FROM t
+-- !query 30 schema
+struct<(NOT (CAST(1 AS TINYINT) = CAST(2 AS TINYINT))):boolean>
+-- !query 30 output
+true
+
+
+-- !query 31
+SELECT cast(1 as tinyint) = cast(null as string) FROM t
+-- !query 31 schema
+struct<(CAST(1 AS TINYINT) = CAST(CAST(NULL AS STRING) AS TINYINT)):boolean>
+-- !query 31 output
+NULL
+
+
+-- !query 32
+SELECT cast(1 as tinyint) > cast(null as string) FROM t
+-- !query 32 schema
+struct<(CAST(1 AS TINYINT) > CAST(CAST(NULL AS STRING) AS TINYINT)):boolean>
+-- !query 32 output
+NULL
+
+
+-- !query 33
+SELECT cast(1 as tinyint) >= cast(null as string) FROM t
+-- !query 33 schema
+struct<(CAST(1 AS TINYINT) >= CAST(CAST(NULL AS STRING) AS TINYINT)):boolean>
+-- !query 33 output
+NULL
+
+
+-- !query 34
+SELECT cast(1 as tinyint) < cast(null as string) FROM t
+-- !query 34 schema
+struct<(CAST(1 AS TINYINT) < CAST(CAST(NULL AS STRING) AS TINYINT)):boolean>
+-- !query 34 output
+NULL
+
+
+-- !query 35
+SELECT cast(1 as tinyint) <= cast(null as string) FROM t
+-- !query 35 schema
+struct<(CAST(1 AS TINYINT) <= CAST(CAST(NULL AS STRING) AS TINYINT)):boolean>
+-- !query 35 output
+NULL
+
+
+-- !query 36
+SELECT cast(1 as tinyint) <> cast(null as string) FROM t
+-- !query 36 schema
+struct<(NOT (CAST(1 AS TINYINT) = CAST(CAST(NULL AS STRING) AS TINYINT))):boolean>
+-- !query 36 output
+NULL
+
+
+-- !query 37
+SELECT '1' = cast(1 as tinyint) FROM t
+-- !query 37 schema
+struct<(CAST(1 AS TINYINT) = CAST(1 AS TINYINT)):boolean>
+-- !query 37 output
+true
+
+
+-- !query 38
+SELECT '2' > cast(1 as tinyint) FROM t
+-- !query 38 schema
+struct<(CAST(2 AS TINYINT) > CAST(1 AS TINYINT)):boolean>
+-- !query 38 output
+true
+
+
+-- !query 39
+SELECT '2' >= cast(1 as tinyint) FROM t
+-- !query 39 schema
+struct<(CAST(2 AS TINYINT) >= CAST(1 AS TINYINT)):boolean>
+-- !query 39 output
+true
+
+
+-- !query 40
+SELECT '2' < cast(1 as tinyint) FROM t
+-- !query 40 schema
+struct<(CAST(2 AS TINYINT) < CAST(1 AS TINYINT)):boolean>
+-- !query 40 output
+false
+
+
+-- !query 41
+SELECT '2' <= cast(1 as tinyint) FROM t
+-- !query 41 schema
+struct<(CAST(2 AS TINYINT) <= CAST(1 AS TINYINT)):boolean>
+-- !query 41 output
+false
+
+
+-- !query 42
+SELECT '2' <> cast(1 as tinyint) FROM t
+-- !query 42 schema
+struct<(NOT (CAST(2 AS TINYINT) = CAST(1 AS TINYINT))):boolean>
+-- !query 42 output
+true
+
+
+-- !query 43
+SELECT cast(null as string) = cast(1 as tinyint) FROM t
+-- !query 43 schema
+struct<(CAST(CAST(NULL AS STRING) AS TINYINT) = CAST(1 AS TINYINT)):boolean>
+-- !query 43 output
+NULL
+
+
+-- !query 44
+SELECT cast(null as string) > cast(1 as tinyint) FROM t
+-- !query 44 schema
+struct<(CAST(CAST(NULL AS STRING) AS TINYINT) > CAST(1 AS TINYINT)):boolean>
+-- !query 44 output
+NULL
+
+
+-- !query 45
+SELECT cast(null as string) >= cast(1 as tinyint) FROM t
+-- !query 45 schema
+struct<(CAST(CAST(NULL AS STRING) AS TINYINT) >= CAST(1 AS TINYINT)):boolean>
+-- !query 45 output
+NULL
+
+
+-- !query 46
+SELECT cast(null as string) < cast(1 as tinyint) FROM t
+-- !query 46 schema
+struct<(CAST(CAST(NULL AS STRING) AS TINYINT) < CAST(1 AS TINYINT)):boolean>
+-- !query 46 output
+NULL
+
+
+-- !query 47
+SELECT cast(null as string) <= cast(1 as tinyint) FROM t
+-- !query 47 schema
+struct<(CAST(CAST(NULL AS STRING) AS TINYINT) <= CAST(1 AS TINYINT)):boolean>
+-- !query 47 output
+NULL
+
+
+-- !query 48
+SELECT cast(null as string) <> cast(1 as tinyint) FROM t
+-- !query 48 schema
+struct<(NOT (CAST(CAST(NULL AS STRING) AS TINYINT) = CAST(1 AS TINYINT))):boolean>
+-- !query 48 output
+NULL
+
+
+-- !query 49
+SELECT cast(1 as smallint) = '1' FROM t
+-- !query 49 schema
+struct<(CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
+-- !query 49 output
+true
+
+
+-- !query 50
+SELECT cast(1 as smallint) > '2' FROM t
+-- !query 50 schema
+struct<(CAST(1 AS SMALLINT) > CAST(2 AS SMALLINT)):boolean>
+-- !query 50 output
+false
+
+
+-- !query 51
+SELECT cast(1 as smallint) >= '2' FROM t
+-- !query 51 schema
+struct<(CAST(1 AS SMALLINT) >= CAST(2 AS SMALLINT)):boolean>
+-- !query 51 output
+false
+
+
+-- !query 52
+SELECT cast(1 as smallint) < '2' FROM t
+-- !query 52 schema
+struct<(CAST(1 AS SMALLINT) < CAST(2 AS SMALLINT)):boolean>
+-- !query 52 output
+true
+
+
+-- !query 53
+SELECT cast(1 as smallint) <= '2' FROM t
+-- !query 53 schema
+struct<(CAST(1 AS SMALLINT) <= CAST(2 AS SMALLINT)):boolean>
+-- !query 53 output
+true
+
+
+-- !query 54
+SELECT cast(1 as smallint) <> '2' FROM t
+-- !query 54 schema
+struct<(NOT (CAST(1 AS SMALLINT) = CAST(2 AS SMALLINT))):boolean>
+-- !query 54 output
+true
+
+
+-- !query 55
+SELECT cast(1 as smallint) = cast(null as string) FROM t
+-- !query 55 schema
+struct<(CAST(1 AS SMALLINT) = CAST(CAST(NULL AS STRING) AS SMALLINT)):boolean>
+-- !query 55 output
+NULL
+
+
+-- !query 56
+SELECT cast(1 as smallint) > cast(null as string) FROM t
+-- !query 56 schema
+struct<(CAST(1 AS SMALLINT) > CAST(CAST(NULL AS STRING) AS SMALLINT)):boolean>
+-- !query 56 output
+NULL
+
+
+-- !query 57
+SELECT cast(1 as smallint) >= cast(null as string) FROM t
+-- !query 57 schema
+struct<(CAST(1 AS SMALLINT) >= CAST(CAST(NULL AS STRING) AS SMALLINT)):boolean>
+-- !query 57 output
+NULL
+
+
+-- !query 58
+SELECT cast(1 as smallint) < cast(null as string) FROM t
+-- !query 58 schema
+struct<(CAST(1 AS SMALLINT) < CAST(CAST(NULL AS STRING) AS SMALLINT)):boolean>
+-- !query 58 output
+NULL
+
+
+-- !query 59
+SELECT cast(1 as smallint) <= cast(null as string) FROM t
+-- !query 59 schema
+struct<(CAST(1 AS SMALLINT) <= CAST(CAST(NULL AS STRING) AS SMALLINT)):boolean>
+-- !query 59 output
+NULL
+
+
+-- !query 60
+SELECT cast(1 as smallint) <> cast(null as string) FROM t
+-- !query 60 schema
+struct<(NOT (CAST(1 AS SMALLINT) = CAST(CAST(NULL AS STRING) AS SMALLINT))):boolean>
+-- !query 60 output
+NULL
+
+
+-- !query 61
+SELECT '1' = cast(1 as smallint) FROM t
+-- !query 61 schema
+struct<(CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
+-- !query 61 output
+true
+
+
+-- !query 62
+SELECT '2' > cast(1 as smallint) FROM t
+-- !query 62 schema
+struct<(CAST(2 AS SMALLINT) > CAST(1 AS SMALLINT)):boolean>
+-- !query 62 output
+true
+
+
+-- !query 63
+SELECT '2' >= cast(1 as smallint) FROM t
+-- !query 63 schema
+struct<(CAST(2 AS SMALLINT) >= CAST(1 AS SMALLINT)):boolean>
+-- !query 63 output
+true
+
+
+-- !query 64
+SELECT '2' < cast(1 as smallint) FROM t
+-- !query 64 schema
+struct<(CAST(2 AS SMALLINT) < CAST(1 AS SMALLINT)):boolean>
+-- !query 64 output
+false
+
+
+-- !query 65
+SELECT '2' <= cast(1 as smallint) FROM t
+-- !query 65 schema
+struct<(CAST(2 AS SMALLINT) <= CAST(1 AS SMALLINT)):boolean>
+-- !query 65 output
+false
+
+
+-- !query 66
+SELECT '2' <> cast(1 as smallint) FROM t
+-- !query 66 schema
+struct<(NOT (CAST(2 AS SMALLINT) = CAST(1 AS SMALLINT))):boolean>
+-- !query 66 output
+true
+
+
+-- !query 67
+SELECT cast(null as string) = cast(1 as smallint) FROM t
+-- !query 67 schema
+struct<(CAST(CAST(NULL AS STRING) AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
+-- !query 67 output
+NULL
+
+
+-- !query 68
+SELECT cast(null as string) > cast(1 as smallint) FROM t
+-- !query 68 schema
+struct<(CAST(CAST(NULL AS STRING) AS SMALLINT) > CAST(1 AS SMALLINT)):boolean>
+-- !query 68 output
+NULL
+
+
+-- !query 69
+SELECT cast(null as string) >= cast(1 as smallint) FROM t
+-- !query 69 schema
+struct<(CAST(CAST(NULL AS STRING) AS SMALLINT) >= CAST(1 AS SMALLINT)):boolean>
+-- !query 69 output
+NULL
+
+
+-- !query 70
+SELECT cast(null as string) < cast(1 as smallint) FROM t
+-- !query 70 schema
+struct<(CAST(CAST(NULL AS STRING) AS SMALLINT) < CAST(1 AS SMALLINT)):boolean>
+-- !query 70 output
+NULL
+
+
+-- !query 71
+SELECT cast(null as string) <= cast(1 as smallint) FROM t
+-- !query 71 schema
+struct<(CAST(CAST(NULL AS STRING) AS SMALLINT) <= CAST(1 AS SMALLINT)):boolean>
+-- !query 71 output
+NULL
+
+
+-- !query 72
+SELECT cast(null as string) <> cast(1 as smallint) FROM t
+-- !query 72 schema
+struct<(NOT (CAST(CAST(NULL AS STRING) AS SMALLINT) = CAST(1 AS SMALLINT))):boolean>
+-- !query 72 output
+NULL
+
+
+-- !query 73
+SELECT cast(1 as int) = '1' FROM t
+-- !query 73 schema
+struct<(CAST(1 AS INT) = CAST(1 AS INT)):boolean>
+-- !query 73 output
+true
+
+
+-- !query 74
+SELECT cast(1 as int) > '2' FROM t
+-- !query 74 schema
+struct<(CAST(1 AS INT) > CAST(2 AS INT)):boolean>
+-- !query 74 output
+false
+
+
+-- !query 75
+SELECT cast(1 as int) >= '2' FROM t
+-- !query 75 schema
+struct<(CAST(1 AS INT) >= CAST(2 AS INT)):boolean>
+-- !query 75 output
+false
+
+
+-- !query 76
+SELECT cast(1 as int) < '2' FROM t
+-- !query 76 schema
+struct<(CAST(1 AS INT) < CAST(2 AS INT)):boolean>
+-- !query 76 output
+true
+
+
+-- !query 77
+SELECT cast(1 as int) <= '2' FROM t
+-- !query 77 schema
+struct<(CAST(1 AS INT) <= CAST(2 AS INT)):boolean>
+-- !query 77 output
+true
+
+
+-- !query 78
+SELECT cast(1 as int) <> '2' FROM t
+-- !query 78 schema
+struct<(NOT (CAST(1 AS INT) = CAST(2 AS INT))):boolean>
+-- !query 78 output
+true
+
+
+-- !query 79
+SELECT cast(1 as int) = cast(null as string) FROM t
+-- !query 79 schema
+struct<(CAST(1 AS INT) = CAST(CAST(NULL AS STRING) AS INT)):boolean>
+-- !query 79 output
+NULL
+
+
+-- !query 80
+SELECT cast(1 as int) > cast(null as string) FROM t
+-- !query 80 schema
+struct<(CAST(1 AS INT) > CAST(CAST(NULL AS STRING) AS INT)):boolean>
+-- !query 80 output
+NULL
+
+
+-- !query 81
+SELECT cast(1 as int) >= cast(null as string) FROM t
+-- !query 81 schema
+struct<(CAST(1 AS INT) >= CAST(CAST(NULL AS STRING) AS INT)):boolean>
+-- !query 81 output
+NULL
+
+
+-- !query 82
+SELECT cast(1 as int) < cast(null as string) FROM t
+-- !query 82 schema
+struct<(CAST(1 AS INT) < CAST(CAST(NULL AS STRING) AS INT)):boolean>
+-- !query 82 output
+NULL
+
+
+-- !query 83
+SELECT cast(1 as int) <= cast(null as string) FROM t
+-- !query 83 schema
+struct<(CAST(1 AS INT) <= CAST(CAST(NULL AS STRING) AS INT)):boolean>
+-- !query 83 output
+NULL
+
+
+-- !query 84
+SELECT cast(1 as int) <> cast(null as string) FROM t
+-- !query 84 schema
+struct<(NOT (CAST(1 AS INT) = CAST(CAST(NULL AS STRING) AS INT))):boolean>
+-- !query 84 output
+NULL
+
+
+-- !query 85
+SELECT '1' = cast(1 as int) FROM t
+-- !query 85 schema
+struct<(CAST(1 AS INT) = CAST(1 AS INT)):boolean>
+-- !query 85 output
+true
+
+
+-- !query 86
+SELECT '2' > cast(1 as int) FROM t
+-- !query 86 schema
+struct<(CAST(2 AS INT) > CAST(1 AS INT)):boolean>
+-- !query 86 output
+true
+
+
+-- !query 87
+SELECT '2' >= cast(1 as int) FROM t
+-- !query 87 schema
+struct<(CAST(2 AS INT) >= CAST(1 AS INT)):boolean>
+-- !query 87 output
+true
+
+
+-- !query 88
+SELECT '2' < cast(1 as int) FROM t
+-- !query 88 schema
+struct<(CAST(2 AS INT) < CAST(1 AS INT)):boolean>
+-- !query 88 output
+false
+
+
+-- !query 89
+SELECT '2' <> cast(1 as int) FROM t
+-- !query 89 schema
+struct<(NOT (CAST(2 AS INT) = CAST(1 AS INT))):boolean>
+-- !query 89 output
+true
+
+
+-- !query 90
+SELECT '2' <= cast(1 as int) FROM t
+-- !query 90 schema
+struct<(CAST(2 AS INT) <= CAST(1 AS INT)):boolean>
+-- !query 90 output
+false
+
+
+-- !query 91
+SELECT cast(null as string) = cast(1 as int) FROM t
+-- !query 91 schema
+struct<(CAST(CAST(NULL AS STRING) AS INT) = CAST(1 AS INT)):boolean>
+-- !query 91 output
+NULL
+
+
+-- !query 92
+SELECT cast(null as string) > cast(1 as int) FROM t
+-- !query 92 schema
+struct<(CAST(CAST(NULL AS STRING) AS INT) > CAST(1 AS INT)):boolean>
+-- !query 92 output
+NULL
+
+
+-- !query 93
+SELECT cast(null as string) >= cast(1 as int) FROM t
+-- !query 93 schema
+struct<(CAST(CAST(NULL AS STRING) AS INT) >= CAST(1 AS INT)):boolean>
+-- !query 93 output
+NULL
+
+
+-- !query 94
+SELECT cast(null as string) < cast(1 as int) FROM t
+-- !query 94 schema
+struct<(CAST(CAST(NULL AS STRING) AS INT) < CAST(1 AS INT)):boolean>
+-- !query 94 output
+NULL
+
+
+-- !query 95
+SELECT cast(null as string) <> cast(1 as int) FROM t
+-- !query 95 schema
+struct<(NOT (CAST(CAST(NULL AS STRING) AS INT) = CAST(1 AS INT))):boolean>
+-- !query 95 output
+NULL
+
+
+-- !query 96
+SELECT cast(null as string) <= cast(1 as int) FROM t
+-- !query 96 schema
+struct<(CAST(CAST(NULL AS STRING) AS INT) <= CAST(1 AS INT)):boolean>
+-- !query 96 output
+NULL
+
+
+-- !query 97
+SELECT cast(1 as bigint) = '1' FROM t
+-- !query 97 schema
+struct<(CAST(1 AS BIGINT) = CAST(1 AS BIGINT)):boolean>
+-- !query 97 output
+true
+
+
+-- !query 98
+SELECT cast(1 as bigint) > '2' FROM t
+-- !query 98 schema
+struct<(CAST(1 AS BIGINT) > CAST(2 AS BIGINT)):boolean>
+-- !query 98 output
+false
+
+
+-- !query 99
+SELECT cast(1 as bigint) >= '2' FROM t
+-- !query 99 schema
+struct<(CAST(1 AS BIGINT) >= CAST(2 AS BIGINT)):boolean>
+-- !query 99 output
+false
+
+
+-- !query 100
+SELECT cast(1 as bigint) < '2' FROM t
+-- !query 100 schema
+struct<(CAST(1 AS BIGINT) < CAST(2 AS BIGINT)):boolean>
+-- !query 100 output
+true
+
+
+-- !query 101
+SELECT cast(1 as bigint) <= '2' FROM t
+-- !query 101 schema
+struct<(CAST(1 AS BIGINT) <= CAST(2 AS BIGINT)):boolean>
+-- !query 101 output
+true
+
+
+-- !query 102
+SELECT cast(1 as bigint) <> '2' FROM t
+-- !query 102 schema
+struct<(NOT (CAST(1 AS BIGINT) = CAST(2 AS BIGINT))):boolean>
+-- !query 102 output
+true
+
+
+-- !query 103
+SELECT cast(1 as bigint) = cast(null as string) FROM t
+-- !query 103 schema
+struct<(CAST(1 AS BIGINT) = CAST(CAST(NULL AS STRING) AS BIGINT)):boolean>
+-- !query 103 output
+NULL
+
+
+-- !query 104
+SELECT cast(1 as bigint) > cast(null as string) FROM t
+-- !query 104 schema
+struct<(CAST(1 AS BIGINT) > CAST(CAST(NULL AS STRING) AS BIGINT)):boolean>
+-- !query 104 output
+NULL
+
+
+-- !query 105
+SELECT cast(1 as bigint) >= cast(null as string) FROM t
+-- !query 105 schema
+struct<(CAST(1 AS BIGINT) >= CAST(CAST(NULL AS STRING) AS BIGINT)):boolean>
+-- !query 105 output
+NULL
+
+
+-- !query 106
+SELECT cast(1 as bigint) < cast(null as string) FROM t
+-- !query 106 schema
+struct<(CAST(1 AS BIGINT) < CAST(CAST(NULL AS STRING) AS BIGINT)):boolean>
+-- !query 106 output
+NULL
+
+
+-- !query 107
+SELECT cast(1 as bigint) <= cast(null as string) FROM t
+-- !query 107 schema
+struct<(CAST(1 AS BIGINT) <= CAST(CAST(NULL AS STRING) AS BIGINT)):boolean>
+-- !query 107 output
+NULL
+
+
+-- !query 108
+SELECT cast(1 as bigint) <> cast(null as string) FROM t
+-- !query 108 schema
+struct<(NOT (CAST(1 AS BIGINT) = CAST(CAST(NULL AS STRING) AS BIGINT))):boolean>
+-- !query 108 output
+NULL
+
+
+-- !query 109
+SELECT '1' = cast(1 as bigint) FROM t
+-- !query 109 schema
+struct<(CAST(1 AS BIGINT) = CAST(1 AS BIGINT)):boolean>
+-- !query 109 output
+true
+
+
+-- !query 110
+SELECT '2' > cast(1 as bigint) FROM t
+-- !query 110 schema
+struct<(CAST(2 AS BIGINT) > CAST(1 AS BIGINT)):boolean>
+-- !query 110 output
+true
+
+
+-- !query 111
+SELECT '2' >= cast(1 as bigint) FROM t
+-- !query 111 schema
+struct<(CAST(2 AS BIGINT) >= CAST(1 AS BIGINT)):boolean>
+-- !query 111 output
+true
+
+
+-- !query 112
+SELECT '2' < cast(1 as bigint) FROM t
+-- !query 112 schema
+struct<(CAST(2 AS BIGINT) < CAST(1 AS BIGINT)):boolean>
+-- !query 112 output
+false
+
+
+-- !query 113
+SELECT '2' <= cast(1 as bigint) FROM t
+-- !query 113 schema
+struct<(CAST(2 AS BIGINT) <= CAST(1 AS BIGINT)):boolean>
+-- !query 113 output
+false
+
+
+-- !query 114
+SELECT '2' <> cast(1 as bigint) FROM t
+-- !query 114 schema
+struct<(NOT (CAST(2 AS BIGINT) = CAST(1 AS BIGINT))):boolean>
+-- !query 114 output
+true
+
+
+-- !query 115
+SELECT cast(null as string) = cast(1 as bigint) FROM t
+-- !query 115 schema
+struct<(CAST(CAST(NULL AS STRING) AS BIGINT) = CAST(1 AS BIGINT)):boolean>
+-- !query 115 output
+NULL
+
+
+-- !query 116
+SELECT cast(null as string) > cast(1 as bigint) FROM t
+-- !query 116 schema
+struct<(CAST(CAST(NULL AS STRING) AS BIGINT) > CAST(1 AS BIGINT)):boolean>
+-- !query 116 output
+NULL
+
+
+-- !query 117
+SELECT cast(null as string) >= cast(1 as bigint) FROM t
+-- !query 117 schema
+struct<(CAST(CAST(NULL AS STRING) AS BIGINT) >= CAST(1 AS BIGINT)):boolean>
+-- !query 117 output
+NULL
+
+
+-- !query 118
+SELECT cast(null as string) < cast(1 as bigint) FROM t
+-- !query 118 schema
+struct<(CAST(CAST(NULL AS STRING) AS BIGINT) < CAST(1 AS BIGINT)):boolean>
+-- !query 118 output
+NULL
+
+
+-- !query 119
+SELECT cast(null as string) <= cast(1 as bigint) FROM t
+-- !query 119 schema
+struct<(CAST(CAST(NULL AS STRING) AS BIGINT) <= CAST(1 AS BIGINT)):boolean>
+-- !query 119 output
+NULL
+
+
+-- !query 120
+SELECT cast(null as string) <> cast(1 as bigint) FROM t
+-- !query 120 schema
+struct<(NOT (CAST(CAST(NULL AS STRING) AS BIGINT) = CAST(1 AS BIGINT))):boolean>
+-- !query 120 output
+NULL
+
+
+-- !query 121
+SELECT cast(1 as decimal(10, 0)) = '1' FROM t
+-- !query 121 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 121 output
+true
+
+
+-- !query 122
+SELECT cast(1 as decimal(10, 0)) > '2' FROM t
+-- !query 122 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(2 AS DOUBLE)):boolean>
+-- !query 122 output
+false
+
+
+-- !query 123
+SELECT cast(1 as decimal(10, 0)) >= '2' FROM t
+-- !query 123 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(2 AS DOUBLE)):boolean>
+-- !query 123 output
+false
+
+
+-- !query 124
+SELECT cast(1 as decimal(10, 0)) < '2' FROM t
+-- !query 124 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(2 AS DOUBLE)):boolean>
+-- !query 124 output
+true
+
+
+-- !query 125
+SELECT cast(1 as decimal(10, 0)) <> '2' FROM t
+-- !query 125 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(2 AS DOUBLE))):boolean>
+-- !query 125 output
+true
+
+
+-- !query 126
+SELECT cast(1 as decimal(10, 0)) <= '2' FROM t
+-- !query 126 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(2 AS DOUBLE)):boolean>
+-- !query 126 output
+true
+
+
+-- !query 127
+SELECT cast(1 as decimal(10, 0)) = cast(null as string) FROM t
+-- !query 127 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
+-- !query 127 output
+NULL
+
+
+-- !query 128
+SELECT cast(1 as decimal(10, 0)) > cast(null as string) FROM t
+-- !query 128 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
+-- !query 128 output
+NULL
+
+
+-- !query 129
+SELECT cast(1 as decimal(10, 0)) >= cast(null as string) FROM t
+-- !query 129 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
+-- !query 129 output
+NULL
+
+
+-- !query 130
+SELECT cast(1 as decimal(10, 0)) < cast(null as string) FROM t
+-- !query 130 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
+-- !query 130 output
+NULL
+
+
+-- !query 131
+SELECT cast(1 as decimal(10, 0)) <> cast(null as string) FROM t
+-- !query 131 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(NULL AS STRING) AS DOUBLE))):boolean>
+-- !query 131 output
+NULL
+
+
+-- !query 132
+SELECT cast(1 as decimal(10, 0)) <= cast(null as string) FROM t
+-- !query 132 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
+-- !query 132 output
+NULL
+
+
+-- !query 133
+SELECT '1' = cast(1 as decimal(10, 0)) FROM t
+-- !query 133 schema
+struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 133 output
+true
+
+
+-- !query 134
+SELECT '2' > cast(1 as decimal(10, 0)) FROM t
+-- !query 134 schema
+struct<(CAST(2 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 134 output
+true
+
+
+-- !query 135
+SELECT '2' >= cast(1 as decimal(10, 0)) FROM t
+-- !query 135 schema
+struct<(CAST(2 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 135 output
+true
+
+
+-- !query 136
+SELECT '2' < cast(1 as decimal(10, 0)) FROM t
+-- !query 136 schema
+struct<(CAST(2 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 136 output
+false
+
+
+-- !query 137
+SELECT '2' <= cast(1 as decimal(10, 0)) FROM t
+-- !query 137 schema
+struct<(CAST(2 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 137 output
+false
+
+
+-- !query 138
+SELECT '2' <> cast(1 as decimal(10, 0)) FROM t
+-- !query 138 schema
+struct<(NOT (CAST(2 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
+-- !query 138 output
+true
+
+
+-- !query 139
+SELECT cast(null as string) = cast(1 as decimal(10, 0)) FROM t
+-- !query 139 schema
+struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 139 output
+NULL
+
+
+-- !query 140
+SELECT cast(null as string) > cast(1 as decimal(10, 0)) FROM t
+-- !query 140 schema
+struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) > CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 140 output
+NULL
+
+
+-- !query 141
+SELECT cast(null as string) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 141 schema
+struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 141 output
+NULL
+
+
+-- !query 142
+SELECT cast(null as string) < cast(1 as decimal(10, 0)) FROM t
+-- !query 142 schema
+struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) < CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 142 output
+NULL
+
+
+-- !query 143
+SELECT cast(null as string) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 143 schema
+struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 143 output
+NULL
+
+
+-- !query 144
+SELECT cast(null as string) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 144 schema
+struct<(NOT (CAST(CAST(NULL AS STRING) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
+-- !query 144 output
+NULL
+
+
+-- !query 145
+SELECT cast(1 as double) = '1' FROM t
+-- !query 145 schema
+struct<(CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 145 output
+true
+
+
+-- !query 146
+SELECT cast(1 as double) > '2' FROM t
+-- !query 146 schema
+struct<(CAST(1 AS DOUBLE) > CAST(2 AS DOUBLE)):boolean>
+-- !query 146 output
+false
+
+
+-- !query 147
+SELECT cast(1 as double) >= '2' FROM t
+-- !query 147 schema
+struct<(CAST(1 AS DOUBLE) >= CAST(2 AS DOUBLE)):boolean>
+-- !query 147 output
+false
+
+
+-- !query 148
+SELECT cast(1 as double) < '2' FROM t
+-- !query 148 schema
+struct<(CAST(1 AS DOUBLE) < CAST(2 AS DOUBLE)):boolean>
+-- !query 148 output
+true
+
+
+-- !query 149
+SELECT cast(1 as double) <= '2' FROM t
+-- !query 149 schema
+struct<(CAST(1 AS DOUBLE) <= CAST(2 AS DOUBLE)):boolean>
+-- !query 149 output
+true
+
+
+-- !query 150
+SELECT cast(1 as double) <> '2' FROM t
+-- !query 150 schema
+struct<(NOT (CAST(1 AS DOUBLE) = CAST(2 AS DOUBLE))):boolean>
+-- !query 150 output
+true
+
+
+-- !query 151
+SELECT cast(1 as double) = cast(null as string) FROM t
+-- !query 151 schema
+struct<(CAST(1 AS DOUBLE) = CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
+-- !query 151 output
+NULL
+
+
+-- !query 152
+SELECT cast(1 as double) > cast(null as string) FROM t
+-- !query 152 schema
+struct<(CAST(1 AS DOUBLE) > CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
+-- !query 152 output
+NULL
+
+
+-- !query 153
+SELECT cast(1 as double) >= cast(null as string) FROM t
+-- !query 153 schema
+struct<(CAST(1 AS DOUBLE) >= CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
+-- !query 153 output
+NULL
+
+
+-- !query 154
+SELECT cast(1 as double) < cast(null as string) FROM t
+-- !query 154 schema
+struct<(CAST(1 AS DOUBLE) < CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
+-- !query 154 output
+NULL
+
+
+-- !query 155
+SELECT cast(1 as double) <= cast(null as string) FROM t
+-- !query 155 schema
+struct<(CAST(1 AS DOUBLE) <= CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
+-- !query 155 output
+NULL
+
+
+-- !query 156
+SELECT cast(1 as double) <> cast(null as string) FROM t
+-- !query 156 schema
+struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(NULL AS STRING) AS DOUBLE))):boolean>
+-- !query 156 output
+NULL
+
+
+-- !query 157
+SELECT '1' = cast(1 as double) FROM t
+-- !query 157 schema
+struct<(CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 157 output
+true
+
+
+-- !query 158
+SELECT '2' > cast(1 as double) FROM t
+-- !query 158 schema
+struct<(CAST(2 AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
+-- !query 158 output
+true
+
+
+-- !query 159
+SELECT '2' >= cast(1 as double) FROM t
+-- !query 159 schema
+struct<(CAST(2 AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
+-- !query 159 output
+true
+
+
+-- !query 160
+SELECT '2' < cast(1 as double) FROM t
+-- !query 160 schema
+struct<(CAST(2 AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
+-- !query 160 output
+false
+
+
+-- !query 161
+SELECT '2' <= cast(1 as double) FROM t
+-- !query 161 schema
+struct<(CAST(2 AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
+-- !query 161 output
+false
+
+
+-- !query 162
+SELECT '2' <> cast(1 as double) FROM t
+-- !query 162 schema
+struct<(NOT (CAST(2 AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
+-- !query 162 output
+true
+
+
+-- !query 163
+SELECT cast(null as string) = cast(1 as double) FROM t
+-- !query 163 schema
+struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 163 output
+NULL
+
+
+-- !query 164
+SELECT cast(null as string) > cast(1 as double) FROM t
+-- !query 164 schema
+struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
+-- !query 164 output
+NULL
+
+
+-- !query 165
+SELECT cast(null as string) >= cast(1 as double) FROM t
+-- !query 165 schema
+struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
+-- !query 165 output
+NULL
+
+
+-- !query 166
+SELECT cast(null as string) < cast(1 as double) FROM t
+-- !query 166 schema
+struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
+-- !query 166 output
+NULL
+
+
+-- !query 167
+SELECT cast(null as string) <= cast(1 as double) FROM t
+-- !query 167 schema
+struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
+-- !query 167 output
+NULL
+
+
+-- !query 168
+SELECT cast(null as string) <> cast(1 as double) FROM t
+-- !query 168 schema
+struct<(NOT (CAST(CAST(NULL AS STRING) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
+-- !query 168 output
+NULL
+
+
+-- !query 169
+SELECT cast(1 as float) = '1' FROM t
+-- !query 169 schema
+struct<(CAST(1 AS FLOAT) = CAST(1 AS FLOAT)):boolean>
+-- !query 169 output
+true
+
+
+-- !query 170
+SELECT cast(1 as float) > '2' FROM t
+-- !query 170 schema
+struct<(CAST(1 AS FLOAT) > CAST(2 AS FLOAT)):boolean>
+-- !query 170 output
+false
+
+
+-- !query 171
+SELECT cast(1 as float) >= '2' FROM t
+-- !query 171 schema
+struct<(CAST(1 AS FLOAT) >= CAST(2 AS FLOAT)):boolean>
+-- !query 171 output
+false
+
+
+-- !query 172
+SELECT cast(1 as float) < '2' FROM t
+-- !query 172 schema
+struct<(CAST(1 AS FLOAT) < CAST(2 AS FLOAT)):boolean>
+-- !query 172 output
+true
+
+
+-- !query 173
+SELECT cast(1 as float) <= '2' FROM t
+-- !query 173 schema
+struct<(CAST(1 AS FLOAT) <= CAST(2 AS FLOAT)):boolean>
+-- !query 173 output
+true
+
+
+-- !query 174
+SELECT cast(1 as float) <> '2' FROM t
+-- !query 174 schema
+struct<(NOT (CAST(1 AS FLOAT) = CAST(2 AS FLOAT))):boolean>
+-- !query 174 output
+true
+
+
+-- !query 175
+SELECT cast(1 as float) = cast(null as string) FROM t
+-- !query 175 schema
+struct<(CAST(1 AS FLOAT) = CAST(CAST(NULL AS STRING) AS FLOAT)):boolean>
+-- !query 175 output
+NULL
+
+
+-- !query 176
+SELECT cast(1 as float) > cast(null as string) FROM t
+-- !query 176 schema
+struct<(CAST(1 AS FLOAT) > CAST(CAST(NULL AS STRING) AS FLOAT)):boolean>
+-- !query 176 output
+NULL
+
+
+-- !query 177
+SELECT cast(1 as float) >= cast(null as string) FROM t
+-- !query 177 schema
+struct<(CAST(1 AS FLOAT) >= CAST(CAST(NULL AS STRING) AS FLOAT)):boolean>
+-- !query 177 output
+NULL
+
+
+-- !query 178
+SELECT cast(1 as float) < cast(null as string) FROM t
+-- !query 178 schema
+struct<(CAST(1 AS FLOAT) < CAST(CAST(NULL AS STRING) AS FLOAT)):boolean>
+-- !query 178 output
+NULL
+
+
+-- !query 179
+SELECT cast(1 as float) <= cast(null as string) FROM t
+-- !query 179 schema
+struct<(CAST(1 AS FLOAT) <= CAST(CAST(NULL AS STRING) AS FLOAT)):boolean>
+-- !query 179 output
+NULL
+
+
+-- !query 180
+SELECT cast(1 as float) <> cast(null as string) FROM t
+-- !query 180 schema
+struct<(NOT (CAST(1 AS FLOAT) = CAST(CAST(NULL AS STRING) AS FLOAT))):boolean>
+-- !query 180 output
+NULL
+
+
+-- !query 181
+SELECT '1' = cast(1 as float) FROM t
+-- !query 181 schema
+struct<(CAST(1 AS FLOAT) = CAST(1 AS FLOAT)):boolean>
+-- !query 181 output
+true
+
+
+-- !query 182
+SELECT '2' > cast(1 as float) FROM t
+-- !query 182 schema
+struct<(CAST(2 AS FLOAT) > CAST(1 AS FLOAT)):boolean>
+-- !query 182 output
+true
+
+
+-- !query 183
+SELECT '2' >= cast(1 as float) FROM t
+-- !query 183 schema
+struct<(CAST(2 AS FLOAT) >= CAST(1 AS FLOAT)):boolean>
+-- !query 183 output
+true
+
+
+-- !query 184
+SELECT '2' < cast(1 as float) FROM t
+-- !query 184 schema
+struct<(CAST(2 AS FLOAT) < CAST(1 AS FLOAT)):boolean>
+-- !query 184 output
+false
+
+
+-- !query 185
+SELECT '2' <= cast(1 as float) FROM t
+-- !query 185 schema
+struct<(CAST(2 AS FLOAT) <= CAST(1 AS FLOAT)):boolean>
+-- !query 185 output
+false
+
+
+-- !query 186
+SELECT '2' <> cast(1 as float) FROM t
+-- !query 186 schema
+struct<(NOT (CAST(2 AS FLOAT) = CAST(1 AS FLOAT))):boolean>
+-- !query 186 output
+true
+
+
+-- !query 187
+SELECT cast(null as string) = cast(1 as float) FROM t
+-- !query 187 schema
+struct<(CAST(CAST(NULL AS STRING) AS FLOAT) = CAST(1 AS FLOAT)):boolean>
+-- !query 187 output
+NULL
+
+
+-- !query 188
+SELECT cast(null as string) > cast(1 as float) FROM t
+-- !query 188 schema
+struct<(CAST(CAST(NULL AS STRING) AS FLOAT) > CAST(1 AS FLOAT)):boolean>
+-- !query 188 output
+NULL
+
+
+-- !query 189
+SELECT cast(null as string) >= cast(1 as float) FROM t
+-- !query 189 schema
+struct<(CAST(CAST(NULL AS STRING) AS FLOAT) >= CAST(1 AS FLOAT)):boolean>
+-- !query 189 output
+NULL
+
+
+-- !query 190
+SELECT cast(null as string) < cast(1 as float) FROM t
+-- !query 190 schema
+struct<(CAST(CAST(NULL AS STRING) AS FLOAT) < CAST(1 AS FLOAT)):boolean>
+-- !query 190 output
+NULL
+
+
+-- !query 191
+SELECT cast(null as string) <= cast(1 as float) FROM t
+-- !query 191 schema
+struct<(CAST(CAST(NULL AS STRING) AS FLOAT) <= CAST(1 AS FLOAT)):boolean>
+-- !query 191 output
+NULL
+
+
+-- !query 192
+SELECT cast(null as string) <> cast(1 as float) FROM t
+-- !query 192 schema
+struct<(NOT (CAST(CAST(NULL AS STRING) AS FLOAT) = CAST(1 AS FLOAT))):boolean>
+-- !query 192 output
+NULL
+
+
+-- !query 193
+SELECT '1996-09-09' = date('1996-09-09') FROM t
+-- !query 193 schema
+struct<(1996-09-09 = CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+-- !query 193 output
+true
+
+
+-- !query 194
+SELECT '1996-9-10' > date('1996-09-09') FROM t
+-- !query 194 schema
+struct<(1996-9-10 > CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+-- !query 194 output
+true
+
+
+-- !query 195
+SELECT '1996-9-10' >= date('1996-09-09') FROM t
+-- !query 195 schema
+struct<(1996-9-10 >= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+-- !query 195 output
+true
+
+
+-- !query 196
+SELECT '1996-9-10' < date('1996-09-09') FROM t
+-- !query 196 schema
+struct<(1996-9-10 < CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+-- !query 196 output
+false
+
+
+-- !query 197
+SELECT '1996-9-10' <= date('1996-09-09') FROM t
+-- !query 197 schema
+struct<(1996-9-10 <= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+-- !query 197 output
+false
+
+
+-- !query 198
+SELECT '1996-9-10' <> date('1996-09-09') FROM t
+-- !query 198 schema
+struct<(NOT (1996-9-10 = CAST(CAST(1996-09-09 AS DATE) AS STRING))):boolean>
+-- !query 198 output
+true
+
+
+-- !query 199
+SELECT cast(null as string) = date('1996-09-09') FROM t
+-- !query 199 schema
+struct<(CAST(NULL AS STRING) = CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+-- !query 199 output
+NULL
+
+
+-- !query 200
+SELECT cast(null as string)> date('1996-09-09') FROM t
+-- !query 200 schema
+struct<(CAST(NULL AS STRING) > CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+-- !query 200 output
+NULL
+
+
+-- !query 201
+SELECT cast(null as string)>= date('1996-09-09') FROM t
+-- !query 201 schema
+struct<(CAST(NULL AS STRING) >= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+-- !query 201 output
+NULL
+
+
+-- !query 202
+SELECT cast(null as string)< date('1996-09-09') FROM t
+-- !query 202 schema
+struct<(CAST(NULL AS STRING) < CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+-- !query 202 output
+NULL
+
+
+-- !query 203
+SELECT cast(null as string)<= date('1996-09-09') FROM t
+-- !query 203 schema
+struct<(CAST(NULL AS STRING) <= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean>
+-- !query 203 output
+NULL
+
+
+-- !query 204
+SELECT cast(null as string)<> date('1996-09-09') FROM t
+-- !query 204 schema
+struct<(NOT (CAST(NULL AS STRING) = CAST(CAST(1996-09-09 AS DATE) AS STRING))):boolean>
+-- !query 204 output
+NULL
+
+
+-- !query 205
+SELECT date('1996-09-09') = '1996-09-09' FROM t
+-- !query 205 schema
+struct<(CAST(CAST(1996-09-09 AS DATE) AS STRING) = 1996-09-09):boolean>
+-- !query 205 output
+true
+
+
+-- !query 206
+SELECT date('1996-9-10') > '1996-09-09' FROM t
+-- !query 206 schema
+struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) > 1996-09-09):boolean>
+-- !query 206 output
+true
+
+
+-- !query 207
+SELECT date('1996-9-10') >= '1996-09-09' FROM t
+-- !query 207 schema
+struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) >= 1996-09-09):boolean>
+-- !query 207 output
+true
+
+
+-- !query 208
+SELECT date('1996-9-10') < '1996-09-09' FROM t
+-- !query 208 schema
+struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) < 1996-09-09):boolean>
+-- !query 208 output
+false
+
+
+-- !query 209
+SELECT date('1996-9-10') <= '1996-09-09' FROM t
+-- !query 209 schema
+struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) <= 1996-09-09):boolean>
+-- !query 209 output
+false
+
+
+-- !query 210
+SELECT date('1996-9-10') <> '1996-09-09' FROM t
+-- !query 210 schema
+struct<(NOT (CAST(CAST(1996-9-10 AS DATE) AS STRING) = 1996-09-09)):boolean>
+-- !query 210 output
+true
+
+
+-- !query 211
+SELECT date('1996-09-09') = cast(null as string) FROM t
+-- !query 211 schema
+struct<(CAST(CAST(1996-09-09 AS DATE) AS STRING) = CAST(NULL AS STRING)):boolean>
+-- !query 211 output
+NULL
+
+
+-- !query 212
+SELECT date('1996-9-10') > cast(null as string) FROM t
+-- !query 212 schema
+struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) > CAST(NULL AS STRING)):boolean>
+-- !query 212 output
+NULL
+
+
+-- !query 213
+SELECT date('1996-9-10') >= cast(null as string) FROM t
+-- !query 213 schema
+struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) >= CAST(NULL AS STRING)):boolean>
+-- !query 213 output
+NULL
+
+
+-- !query 214
+SELECT date('1996-9-10') < cast(null as string) FROM t
+-- !query 214 schema
+struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) < CAST(NULL AS STRING)):boolean>
+-- !query 214 output
+NULL
+
+
+-- !query 215
+SELECT date('1996-9-10') <= cast(null as string) FROM t
+-- !query 215 schema
+struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) <= CAST(NULL AS STRING)):boolean>
+-- !query 215 output
+NULL
+
+
+-- !query 216
+SELECT date('1996-9-10') <> cast(null as string) FROM t
+-- !query 216 schema
+struct<(NOT (CAST(CAST(1996-9-10 AS DATE) AS STRING) = CAST(NULL AS STRING))):boolean>
+-- !query 216 output
+NULL
+
+
+-- !query 217
+SELECT '1996-09-09 12:12:12.4' = timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 217 schema
+struct<(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
+-- !query 217 output
+true
+
+
+-- !query 218
+SELECT '1996-09-09 12:12:12.5' > timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 218 schema
+struct<(1996-09-09 12:12:12.5 > CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 218 output
+true
+
+
+-- !query 219
+SELECT '1996-09-09 12:12:12.5' >= timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 219 schema
+struct<(1996-09-09 12:12:12.5 >= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 219 output
+true
+
+
+-- !query 220
+SELECT '1996-09-09 12:12:12.5' < timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 220 schema
+struct<(1996-09-09 12:12:12.5 < CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 220 output
+false
+
+
+-- !query 221
+SELECT '1996-09-09 12:12:12.5' <= timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 221 schema
+struct<(1996-09-09 12:12:12.5 <= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 221 output
+false
+
+
+-- !query 222
+SELECT '1996-09-09 12:12:12.5' <> timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 222 schema
+struct<(NOT (CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP))):boolean>
+-- !query 222 output
+true
+
+
+-- !query 223
+SELECT cast(null as string) = timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 223 schema
+struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
+-- !query 223 output
+NULL
+
+
+-- !query 224
+SELECT cast(null as string) > timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 224 schema
+struct<(CAST(NULL AS STRING) > CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 224 output
+NULL
+
+
+-- !query 225
+SELECT cast(null as string) >= timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 225 schema
+struct<(CAST(NULL AS STRING) >= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 225 output
+NULL
+
+
+-- !query 226
+SELECT cast(null as string) < timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 226 schema
+struct<(CAST(NULL AS STRING) < CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 226 output
+NULL
+
+
+-- !query 227
+SELECT cast(null as string) <= timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 227 schema
+struct<(CAST(NULL AS STRING) <= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 227 output
+NULL
+
+
+-- !query 228
+SELECT cast(null as string) <> timestamp('1996-09-09 12:12:12.4') FROM t
+-- !query 228 schema
+struct<(NOT (CAST(CAST(NULL AS STRING) AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP))):boolean>
+-- !query 228 output
+NULL
+
+
+-- !query 229
+SELECT timestamp('1996-09-09 12:12:12.4' )= '1996-09-09 12:12:12.4' FROM t
+-- !query 229 schema
+struct<(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
+-- !query 229 output
+true
+
+
+-- !query 230
+SELECT timestamp('1996-09-09 12:12:12.5' )> '1996-09-09 12:12:12.4' FROM t
+-- !query 230 schema
+struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) > 1996-09-09 12:12:12.4):boolean>
+-- !query 230 output
+true
+
+
+-- !query 231
+SELECT timestamp('1996-09-09 12:12:12.5' )>= '1996-09-09 12:12:12.4' FROM t
+-- !query 231 schema
+struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) >= 1996-09-09 12:12:12.4):boolean>
+-- !query 231 output
+true
+
+
+-- !query 232
+SELECT timestamp('1996-09-09 12:12:12.5' )< '1996-09-09 12:12:12.4' FROM t
+-- !query 232 schema
+struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) < 1996-09-09 12:12:12.4):boolean>
+-- !query 232 output
+false
+
+
+-- !query 233
+SELECT timestamp('1996-09-09 12:12:12.5' )<= '1996-09-09 12:12:12.4' FROM t
+-- !query 233 schema
+struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) <= 1996-09-09 12:12:12.4):boolean>
+-- !query 233 output
+false
+
+
+-- !query 234
+SELECT timestamp('1996-09-09 12:12:12.5' )<> '1996-09-09 12:12:12.4' FROM t
+-- !query 234 schema
+struct<(NOT (CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP))):boolean>
+-- !query 234 output
+true
+
+
+-- !query 235
+SELECT timestamp('1996-09-09 12:12:12.4' )= cast(null as string) FROM t
+-- !query 235 schema
+struct<(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) = CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean>
+-- !query 235 output
+NULL
+
+
+-- !query 236
+SELECT timestamp('1996-09-09 12:12:12.5' )> cast(null as string) FROM t
+-- !query 236 schema
+struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) > CAST(NULL AS STRING)):boolean>
+-- !query 236 output
+NULL
+
+
+-- !query 237
+SELECT timestamp('1996-09-09 12:12:12.5' )>= cast(null as string) FROM t
+-- !query 237 schema
+struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) >= CAST(NULL AS STRING)):boolean>
+-- !query 237 output
+NULL
+
+
+-- !query 238
+SELECT timestamp('1996-09-09 12:12:12.5' )< cast(null as string) FROM t
+-- !query 238 schema
+struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) < CAST(NULL AS STRING)):boolean>
+-- !query 238 output
+NULL
+
+
+-- !query 239
+SELECT timestamp('1996-09-09 12:12:12.5' )<= cast(null as string) FROM t
+-- !query 239 schema
+struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) <= CAST(NULL AS STRING)):boolean>
+-- !query 239 output
+NULL
+
+
+-- !query 240
+SELECT timestamp('1996-09-09 12:12:12.5' )<> cast(null as string) FROM t
+-- !query 240 schema
+struct<(NOT (CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) = CAST(CAST(NULL AS STRING) AS TIMESTAMP))):boolean>
+-- !query 240 output
+NULL
+
+
+-- !query 241
+SELECT ' ' = X'0020' FROM t
+-- !query 241 schema
+struct<(CAST(  AS BINARY) = X'0020'):boolean>
+-- !query 241 output
+false
+
+
+-- !query 242
+SELECT ' ' > X'001F' FROM t
+-- !query 242 schema
+struct<(CAST(  AS BINARY) > X'001F'):boolean>
+-- !query 242 output
+true
+
+
+-- !query 243
+SELECT ' ' >= X'001F' FROM t
+-- !query 243 schema
+struct<(CAST(  AS BINARY) >= X'001F'):boolean>
+-- !query 243 output
+true
+
+
+-- !query 244
+SELECT ' ' < X'001F' FROM t
+-- !query 244 schema
+struct<(CAST(  AS BINARY) < X'001F'):boolean>
+-- !query 244 output
+false
+
+
+-- !query 245
+SELECT ' ' <= X'001F' FROM t
+-- !query 245 schema
+struct<(CAST(  AS BINARY) <= X'001F'):boolean>
+-- !query 245 output
+false
+
+
+-- !query 246
+SELECT ' ' <> X'001F' FROM t
+-- !query 246 schema
+struct<(NOT (CAST(  AS BINARY) = X'001F')):boolean>
+-- !query 246 output
+true
+
+
+-- !query 247
+SELECT cast(null as string) = X'0020' FROM t
+-- !query 247 schema
+struct<(CAST(CAST(NULL AS STRING) AS BINARY) = X'0020'):boolean>
+-- !query 247 output
+NULL
+
+
+-- !query 248
+SELECT cast(null as string) > X'001F' FROM t
+-- !query 248 schema
+struct<(CAST(CAST(NULL AS STRING) AS BINARY) > X'001F'):boolean>
+-- !query 248 output
+NULL
+
+
+-- !query 249
+SELECT cast(null as string) >= X'001F' FROM t
+-- !query 249 schema
+struct<(CAST(CAST(NULL AS STRING) AS BINARY) >= X'001F'):boolean>
+-- !query 249 output
+NULL
+
+
+-- !query 250
+SELECT cast(null as string) < X'001F' FROM t
+-- !query 250 schema
+struct<(CAST(CAST(NULL AS STRING) AS BINARY) < X'001F'):boolean>
+-- !query 250 output
+NULL
+
+
+-- !query 251
+SELECT cast(null as string) <= X'001F' FROM t
+-- !query 251 schema
+struct<(CAST(CAST(NULL AS STRING) AS BINARY) <= X'001F'):boolean>
+-- !query 251 output
+NULL
+
+
+-- !query 252
+SELECT cast(null as string) <> X'001F' FROM t
+-- !query 252 schema
+struct<(NOT (CAST(CAST(NULL AS STRING) AS BINARY) = X'001F')):boolean>
+-- !query 252 output
+NULL
+
+
+-- !query 253
+SELECT X'0020' = ' ' FROM t
+-- !query 253 schema
+struct<(X'0020' = CAST(  AS BINARY)):boolean>
+-- !query 253 output
+false
+
+
+-- !query 254
+SELECT X'001F' > ' ' FROM t
+-- !query 254 schema
+struct<(X'001F' > CAST(  AS BINARY)):boolean>
+-- !query 254 output
+false
+
+
+-- !query 255
+SELECT X'001F' >= ' ' FROM t
+-- !query 255 schema
+struct<(X'001F' >= CAST(  AS BINARY)):boolean>
+-- !query 255 output
+false
+
+
+-- !query 256
+SELECT X'001F' < ' ' FROM t
+-- !query 256 schema
+struct<(X'001F' < CAST(  AS BINARY)):boolean>
+-- !query 256 output
+true
+
+
+-- !query 257
+SELECT X'001F' <= ' ' FROM t
+-- !query 257 schema
+struct<(X'001F' <= CAST(  AS BINARY)):boolean>
+-- !query 257 output
+true
+
+
+-- !query 258
+SELECT X'001F' <> ' ' FROM t
+-- !query 258 schema
+struct<(NOT (X'001F' = CAST(  AS BINARY))):boolean>
+-- !query 258 output
+true
+
+
+-- !query 259
+SELECT X'0020' = cast(null as string) FROM t
+-- !query 259 schema
+struct<(X'0020' = CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
+-- !query 259 output
+NULL
+
+
+-- !query 260
+SELECT X'001F' > cast(null as string) FROM t
+-- !query 260 schema
+struct<(X'001F' > CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
+-- !query 260 output
+NULL
+
+
+-- !query 261
+SELECT X'001F' >= cast(null as string) FROM t
+-- !query 261 schema
+struct<(X'001F' >= CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
+-- !query 261 output
+NULL
+
+
+-- !query 262
+SELECT X'001F' < cast(null as string) FROM t
+-- !query 262 schema
+struct<(X'001F' < CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
+-- !query 262 output
+NULL
+
+
+-- !query 263
+SELECT X'001F' <= cast(null as string) FROM t
+-- !query 263 schema
+struct<(X'001F' <= CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
+-- !query 263 output
+NULL
+
+
+-- !query 264
+SELECT X'001F' <> cast(null as string) FROM t
+-- !query 264 schema
+struct<(NOT (X'001F' = CAST(CAST(NULL AS STRING) AS BINARY))):boolean>
+-- !query 264 output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out
new file mode 100644
index 0000000000000..46775d79ff4a2
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out
@@ -0,0 +1,802 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 97
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT true = cast(1 as tinyint) FROM t
+-- !query 1 schema
+struct<(CAST(true AS TINYINT) = CAST(1 AS TINYINT)):boolean>
+-- !query 1 output
+true
+
+
+-- !query 2
+SELECT true = cast(1 as smallint) FROM t
+-- !query 2 schema
+struct<(CAST(true AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
+-- !query 2 output
+true
+
+
+-- !query 3
+SELECT true = cast(1 as int) FROM t
+-- !query 3 schema
+struct<(CAST(true AS INT) = CAST(1 AS INT)):boolean>
+-- !query 3 output
+true
+
+
+-- !query 4
+SELECT true = cast(1 as bigint) FROM t
+-- !query 4 schema
+struct<(CAST(true AS BIGINT) = CAST(1 AS BIGINT)):boolean>
+-- !query 4 output
+true
+
+
+-- !query 5
+SELECT true = cast(1 as float) FROM t
+-- !query 5 schema
+struct<(CAST(true AS FLOAT) = CAST(1 AS FLOAT)):boolean>
+-- !query 5 output
+true
+
+
+-- !query 6
+SELECT true = cast(1 as double) FROM t
+-- !query 6 schema
+struct<(CAST(true AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 6 output
+true
+
+
+-- !query 7
+SELECT true = cast(1 as decimal(10, 0)) FROM t
+-- !query 7 schema
+struct<(CAST(true AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 7 output
+true
+
+
+-- !query 8
+SELECT true = cast(1 as string) FROM t
+-- !query 8 schema
+struct<(true = CAST(CAST(1 AS STRING) AS BOOLEAN)):boolean>
+-- !query 8 output
+true
+
+
+-- !query 9
+SELECT true = cast('1' as binary) FROM t
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(true = CAST('1' AS BINARY))' (boolean and binary).; line 1 pos 7
+
+
+-- !query 10
+SELECT true = cast(1 as boolean) FROM t
+-- !query 10 schema
+struct<(true = CAST(1 AS BOOLEAN)):boolean>
+-- !query 10 output
+true
+
+
+-- !query 11
+SELECT true = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(true = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
+
+
+-- !query 12
+SELECT true = cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(true = CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
+
+
+-- !query 13
+SELECT true <=> cast(1 as tinyint) FROM t
+-- !query 13 schema
+struct<(CAST(true AS TINYINT) <=> CAST(1 AS TINYINT)):boolean>
+-- !query 13 output
+true
+
+
+-- !query 14
+SELECT true <=> cast(1 as smallint) FROM t
+-- !query 14 schema
+struct<(CAST(true AS SMALLINT) <=> CAST(1 AS SMALLINT)):boolean>
+-- !query 14 output
+true
+
+
+-- !query 15
+SELECT true <=> cast(1 as int) FROM t
+-- !query 15 schema
+struct<(CAST(true AS INT) <=> CAST(1 AS INT)):boolean>
+-- !query 15 output
+true
+
+
+-- !query 16
+SELECT true <=> cast(1 as bigint) FROM t
+-- !query 16 schema
+struct<(CAST(true AS BIGINT) <=> CAST(1 AS BIGINT)):boolean>
+-- !query 16 output
+true
+
+
+-- !query 17
+SELECT true <=> cast(1 as float) FROM t
+-- !query 17 schema
+struct<(CAST(true AS FLOAT) <=> CAST(1 AS FLOAT)):boolean>
+-- !query 17 output
+true
+
+
+-- !query 18
+SELECT true <=> cast(1 as double) FROM t
+-- !query 18 schema
+struct<(CAST(true AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
+-- !query 18 output
+true
+
+
+-- !query 19
+SELECT true <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 19 schema
+struct<(CAST(true AS DECIMAL(10,0)) <=> CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 19 output
+true
+
+
+-- !query 20
+SELECT true <=> cast(1 as string) FROM t
+-- !query 20 schema
+struct<(true <=> CAST(CAST(1 AS STRING) AS BOOLEAN)):boolean>
+-- !query 20 output
+true
+
+
+-- !query 21
+SELECT true <=> cast('1' as binary) FROM t
+-- !query 21 schema
+struct<>
+-- !query 21 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(true <=> CAST('1' AS BINARY))' (boolean and binary).; line 1 pos 7
+
+
+-- !query 22
+SELECT true <=> cast(1 as boolean) FROM t
+-- !query 22 schema
+struct<(true <=> CAST(1 AS BOOLEAN)):boolean>
+-- !query 22 output
+true
+
+
+-- !query 23
+SELECT true <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(true <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
+
+
+-- !query 24
+SELECT true <=> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(true <=> CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
+
+
+-- !query 25
+SELECT cast(1 as tinyint) = true FROM t
+-- !query 25 schema
+struct<(CAST(1 AS TINYINT) = CAST(true AS TINYINT)):boolean>
+-- !query 25 output
+true
+
+
+-- !query 26
+SELECT cast(1 as smallint) = true FROM t
+-- !query 26 schema
+struct<(CAST(1 AS SMALLINT) = CAST(true AS SMALLINT)):boolean>
+-- !query 26 output
+true
+
+
+-- !query 27
+SELECT cast(1 as int) = true FROM t
+-- !query 27 schema
+struct<(CAST(1 AS INT) = CAST(true AS INT)):boolean>
+-- !query 27 output
+true
+
+
+-- !query 28
+SELECT cast(1 as bigint) = true FROM t
+-- !query 28 schema
+struct<(CAST(1 AS BIGINT) = CAST(true AS BIGINT)):boolean>
+-- !query 28 output
+true
+
+
+-- !query 29
+SELECT cast(1 as float) = true FROM t
+-- !query 29 schema
+struct<(CAST(1 AS FLOAT) = CAST(true AS FLOAT)):boolean>
+-- !query 29 output
+true
+
+
+-- !query 30
+SELECT cast(1 as double) = true FROM t
+-- !query 30 schema
+struct<(CAST(1 AS DOUBLE) = CAST(true AS DOUBLE)):boolean>
+-- !query 30 output
+true
+
+
+-- !query 31
+SELECT cast(1 as decimal(10, 0)) = true FROM t
+-- !query 31 schema
+struct<(CAST(1 AS DECIMAL(10,0)) = CAST(true AS DECIMAL(10,0))):boolean>
+-- !query 31 output
+true
+
+
+-- !query 32
+SELECT cast(1 as string) = true FROM t
+-- !query 32 schema
+struct<(CAST(CAST(1 AS STRING) AS BOOLEAN) = true):boolean>
+-- !query 32 output
+true
+
+
+-- !query 33
+SELECT cast('1' as binary) = true FROM t
+-- !query 33 schema
+struct<>
+-- !query 33 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) = true)' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = true)' (binary and boolean).; line 1 pos 7
+
+
+-- !query 34
+SELECT cast(1 as boolean) = true FROM t
+-- !query 34 schema
+struct<(CAST(1 AS BOOLEAN) = true):boolean>
+-- !query 34 output
+true
+
+
+-- !query 35
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = true FROM t
+-- !query 35 schema
+struct<>
+-- !query 35 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = true)' (timestamp and boolean).; line 1 pos 7
+
+
+-- !query 36
+SELECT cast('2017-12-11 09:30:00' as date) = true FROM t
+-- !query 36 schema
+struct<>
+-- !query 36 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = true)' (date and boolean).; line 1 pos 7
+
+
+-- !query 37
+SELECT cast(1 as tinyint) <=> true FROM t
+-- !query 37 schema
+struct<(CAST(1 AS TINYINT) <=> CAST(true AS TINYINT)):boolean>
+-- !query 37 output
+true
+
+
+-- !query 38
+SELECT cast(1 as smallint) <=> true FROM t
+-- !query 38 schema
+struct<(CAST(1 AS SMALLINT) <=> CAST(true AS SMALLINT)):boolean>
+-- !query 38 output
+true
+
+
+-- !query 39
+SELECT cast(1 as int) <=> true FROM t
+-- !query 39 schema
+struct<(CAST(1 AS INT) <=> CAST(true AS INT)):boolean>
+-- !query 39 output
+true
+
+
+-- !query 40
+SELECT cast(1 as bigint) <=> true FROM t
+-- !query 40 schema
+struct<(CAST(1 AS BIGINT) <=> CAST(true AS BIGINT)):boolean>
+-- !query 40 output
+true
+
+
+-- !query 41
+SELECT cast(1 as float) <=> true FROM t
+-- !query 41 schema
+struct<(CAST(1 AS FLOAT) <=> CAST(true AS FLOAT)):boolean>
+-- !query 41 output
+true
+
+
+-- !query 42
+SELECT cast(1 as double) <=> true FROM t
+-- !query 42 schema
+struct<(CAST(1 AS DOUBLE) <=> CAST(true AS DOUBLE)):boolean>
+-- !query 42 output
+true
+
+
+-- !query 43
+SELECT cast(1 as decimal(10, 0)) <=> true FROM t
+-- !query 43 schema
+struct<(CAST(1 AS DECIMAL(10,0)) <=> CAST(true AS DECIMAL(10,0))):boolean>
+-- !query 43 output
+true
+
+
+-- !query 44
+SELECT cast(1 as string) <=> true FROM t
+-- !query 44 schema
+struct<(CAST(CAST(1 AS STRING) AS BOOLEAN) <=> true):boolean>
+-- !query 44 output
+true
+
+
+-- !query 45
+SELECT cast('1' as binary) <=> true FROM t
+-- !query 45 schema
+struct<>
+-- !query 45 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) <=> true)' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> true)' (binary and boolean).; line 1 pos 7
+
+
+-- !query 46
+SELECT cast(1 as boolean) <=> true FROM t
+-- !query 46 schema
+struct<(CAST(1 AS BOOLEAN) <=> true):boolean>
+-- !query 46 output
+true
+
+
+-- !query 47
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> true FROM t
+-- !query 47 schema
+struct<>
+-- !query 47 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> true)' (timestamp and boolean).; line 1 pos 7
+
+
+-- !query 48
+SELECT cast('2017-12-11 09:30:00' as date) <=> true FROM t
+-- !query 48 schema
+struct<>
+-- !query 48 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> true)' (date and boolean).; line 1 pos 7
+
+
+-- !query 49
+SELECT false = cast(0 as tinyint) FROM t
+-- !query 49 schema
+struct<(CAST(false AS TINYINT) = CAST(0 AS TINYINT)):boolean>
+-- !query 49 output
+true
+
+
+-- !query 50
+SELECT false = cast(0 as smallint) FROM t
+-- !query 50 schema
+struct<(CAST(false AS SMALLINT) = CAST(0 AS SMALLINT)):boolean>
+-- !query 50 output
+true
+
+
+-- !query 51
+SELECT false = cast(0 as int) FROM t
+-- !query 51 schema
+struct<(CAST(false AS INT) = CAST(0 AS INT)):boolean>
+-- !query 51 output
+true
+
+
+-- !query 52
+SELECT false = cast(0 as bigint) FROM t
+-- !query 52 schema
+struct<(CAST(false AS BIGINT) = CAST(0 AS BIGINT)):boolean>
+-- !query 52 output
+true
+
+
+-- !query 53
+SELECT false = cast(0 as float) FROM t
+-- !query 53 schema
+struct<(CAST(false AS FLOAT) = CAST(0 AS FLOAT)):boolean>
+-- !query 53 output
+true
+
+
+-- !query 54
+SELECT false = cast(0 as double) FROM t
+-- !query 54 schema
+struct<(CAST(false AS DOUBLE) = CAST(0 AS DOUBLE)):boolean>
+-- !query 54 output
+true
+
+
+-- !query 55
+SELECT false = cast(0 as decimal(10, 0)) FROM t
+-- !query 55 schema
+struct<(CAST(false AS DECIMAL(10,0)) = CAST(0 AS DECIMAL(10,0))):boolean>
+-- !query 55 output
+true
+
+
+-- !query 56
+SELECT false = cast(0 as string) FROM t
+-- !query 56 schema
+struct<(false = CAST(CAST(0 AS STRING) AS BOOLEAN)):boolean>
+-- !query 56 output
+true
+
+
+-- !query 57
+SELECT false = cast('0' as binary) FROM t
+-- !query 57 schema
+struct<>
+-- !query 57 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(false = CAST('0' AS BINARY))' due to data type mismatch: differing types in '(false = CAST('0' AS BINARY))' (boolean and binary).; line 1 pos 7
+
+
+-- !query 58
+SELECT false = cast(0 as boolean) FROM t
+-- !query 58 schema
+struct<(false = CAST(0 AS BOOLEAN)):boolean>
+-- !query 58 output
+true
+
+
+-- !query 59
+SELECT false = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 59 schema
+struct<>
+-- !query 59 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(false = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(false = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
+
+
+-- !query 60
+SELECT false = cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 60 schema
+struct<>
+-- !query 60 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(false = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(false = CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
+
+
+-- !query 61
+SELECT false <=> cast(0 as tinyint) FROM t
+-- !query 61 schema
+struct<(CAST(false AS TINYINT) <=> CAST(0 AS TINYINT)):boolean>
+-- !query 61 output
+true
+
+
+-- !query 62
+SELECT false <=> cast(0 as smallint) FROM t
+-- !query 62 schema
+struct<(CAST(false AS SMALLINT) <=> CAST(0 AS SMALLINT)):boolean>
+-- !query 62 output
+true
+
+
+-- !query 63
+SELECT false <=> cast(0 as int) FROM t
+-- !query 63 schema
+struct<(CAST(false AS INT) <=> CAST(0 AS INT)):boolean>
+-- !query 63 output
+true
+
+
+-- !query 64
+SELECT false <=> cast(0 as bigint) FROM t
+-- !query 64 schema
+struct<(CAST(false AS BIGINT) <=> CAST(0 AS BIGINT)):boolean>
+-- !query 64 output
+true
+
+
+-- !query 65
+SELECT false <=> cast(0 as float) FROM t
+-- !query 65 schema
+struct<(CAST(false AS FLOAT) <=> CAST(0 AS FLOAT)):boolean>
+-- !query 65 output
+true
+
+
+-- !query 66
+SELECT false <=> cast(0 as double) FROM t
+-- !query 66 schema
+struct<(CAST(false AS DOUBLE) <=> CAST(0 AS DOUBLE)):boolean>
+-- !query 66 output
+true
+
+
+-- !query 67
+SELECT false <=> cast(0 as decimal(10, 0)) FROM t
+-- !query 67 schema
+struct<(CAST(false AS DECIMAL(10,0)) <=> CAST(0 AS DECIMAL(10,0))):boolean>
+-- !query 67 output
+true
+
+
+-- !query 68
+SELECT false <=> cast(0 as string) FROM t
+-- !query 68 schema
+struct<(false <=> CAST(CAST(0 AS STRING) AS BOOLEAN)):boolean>
+-- !query 68 output
+true
+
+
+-- !query 69
+SELECT false <=> cast('0' as binary) FROM t
+-- !query 69 schema
+struct<>
+-- !query 69 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(false <=> CAST('0' AS BINARY))' due to data type mismatch: differing types in '(false <=> CAST('0' AS BINARY))' (boolean and binary).; line 1 pos 7
+
+
+-- !query 70
+SELECT false <=> cast(0 as boolean) FROM t
+-- !query 70 schema
+struct<(false <=> CAST(0 AS BOOLEAN)):boolean>
+-- !query 70 output
+true
+
+
+-- !query 71
+SELECT false <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 71 schema
+struct<>
+-- !query 71 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(false <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(false <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
+
+
+-- !query 72
+SELECT false <=> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 72 schema
+struct<>
+-- !query 72 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(false <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(false <=> CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
+
+
+-- !query 73
+SELECT cast(0 as tinyint) = false FROM t
+-- !query 73 schema
+struct<(CAST(0 AS TINYINT) = CAST(false AS TINYINT)):boolean>
+-- !query 73 output
+true
+
+
+-- !query 74
+SELECT cast(0 as smallint) = false FROM t
+-- !query 74 schema
+struct<(CAST(0 AS SMALLINT) = CAST(false AS SMALLINT)):boolean>
+-- !query 74 output
+true
+
+
+-- !query 75
+SELECT cast(0 as int) = false FROM t
+-- !query 75 schema
+struct<(CAST(0 AS INT) = CAST(false AS INT)):boolean>
+-- !query 75 output
+true
+
+
+-- !query 76
+SELECT cast(0 as bigint) = false FROM t
+-- !query 76 schema
+struct<(CAST(0 AS BIGINT) = CAST(false AS BIGINT)):boolean>
+-- !query 76 output
+true
+
+
+-- !query 77
+SELECT cast(0 as float) = false FROM t
+-- !query 77 schema
+struct<(CAST(0 AS FLOAT) = CAST(false AS FLOAT)):boolean>
+-- !query 77 output
+true
+
+
+-- !query 78
+SELECT cast(0 as double) = false FROM t
+-- !query 78 schema
+struct<(CAST(0 AS DOUBLE) = CAST(false AS DOUBLE)):boolean>
+-- !query 78 output
+true
+
+
+-- !query 79
+SELECT cast(0 as decimal(10, 0)) = false FROM t
+-- !query 79 schema
+struct<(CAST(0 AS DECIMAL(10,0)) = CAST(false AS DECIMAL(10,0))):boolean>
+-- !query 79 output
+true
+
+
+-- !query 80
+SELECT cast(0 as string) = false FROM t
+-- !query 80 schema
+struct<(CAST(CAST(0 AS STRING) AS BOOLEAN) = false):boolean>
+-- !query 80 output
+true
+
+
+-- !query 81
+SELECT cast('0' as binary) = false FROM t
+-- !query 81 schema
+struct<>
+-- !query 81 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('0' AS BINARY) = false)' due to data type mismatch: differing types in '(CAST('0' AS BINARY) = false)' (binary and boolean).; line 1 pos 7
+
+
+-- !query 82
+SELECT cast(0 as boolean) = false FROM t
+-- !query 82 schema
+struct<(CAST(0 AS BOOLEAN) = false):boolean>
+-- !query 82 output
+true
+
+
+-- !query 83
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = false FROM t
+-- !query 83 schema
+struct<>
+-- !query 83 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = false)' (timestamp and boolean).; line 1 pos 7
+
+
+-- !query 84
+SELECT cast('2017-12-11 09:30:00' as date) = false FROM t
+-- !query 84 schema
+struct<>
+-- !query 84 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = false)' (date and boolean).; line 1 pos 7
+
+
+-- !query 85
+SELECT cast(0 as tinyint) <=> false FROM t
+-- !query 85 schema
+struct<(CAST(0 AS TINYINT) <=> CAST(false AS TINYINT)):boolean>
+-- !query 85 output
+true
+
+
+-- !query 86
+SELECT cast(0 as smallint) <=> false FROM t
+-- !query 86 schema
+struct<(CAST(0 AS SMALLINT) <=> CAST(false AS SMALLINT)):boolean>
+-- !query 86 output
+true
+
+
+-- !query 87
+SELECT cast(0 as int) <=> false FROM t
+-- !query 87 schema
+struct<(CAST(0 AS INT) <=> CAST(false AS INT)):boolean>
+-- !query 87 output
+true
+
+
+-- !query 88
+SELECT cast(0 as bigint) <=> false FROM t
+-- !query 88 schema
+struct<(CAST(0 AS BIGINT) <=> CAST(false AS BIGINT)):boolean>
+-- !query 88 output
+true
+
+
+-- !query 89
+SELECT cast(0 as float) <=> false FROM t
+-- !query 89 schema
+struct<(CAST(0 AS FLOAT) <=> CAST(false AS FLOAT)):boolean>
+-- !query 89 output
+true
+
+
+-- !query 90
+SELECT cast(0 as double) <=> false FROM t
+-- !query 90 schema
+struct<(CAST(0 AS DOUBLE) <=> CAST(false AS DOUBLE)):boolean>
+-- !query 90 output
+true
+
+
+-- !query 91
+SELECT cast(0 as decimal(10, 0)) <=> false FROM t
+-- !query 91 schema
+struct<(CAST(0 AS DECIMAL(10,0)) <=> CAST(false AS DECIMAL(10,0))):boolean>
+-- !query 91 output
+true
+
+
+-- !query 92
+SELECT cast(0 as string) <=> false FROM t
+-- !query 92 schema
+struct<(CAST(CAST(0 AS STRING) AS BOOLEAN) <=> false):boolean>
+-- !query 92 output
+true
+
+
+-- !query 93
+SELECT cast('0' as binary) <=> false FROM t
+-- !query 93 schema
+struct<>
+-- !query 93 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('0' AS BINARY) <=> false)' due to data type mismatch: differing types in '(CAST('0' AS BINARY) <=> false)' (binary and boolean).; line 1 pos 7
+
+
+-- !query 94
+SELECT cast(0 as boolean) <=> false FROM t
+-- !query 94 schema
+struct<(CAST(0 AS BOOLEAN) <=> false):boolean>
+-- !query 94 output
+true
+
+
+-- !query 95
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> false FROM t
+-- !query 95 schema
+struct<>
+-- !query 95 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> false)' (timestamp and boolean).; line 1 pos 7
+
+
+-- !query 96
+SELECT cast('2017-12-11 09:30:00' as date) <=> false FROM t
+-- !query 96 schema
+struct<>
+-- !query 96 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> false)' (date and boolean).; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
new file mode 100644
index 0000000000000..a739f8d73181c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
@@ -0,0 +1,1232 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 145
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as tinyint) END FROM t
+-- !query 1 schema
+struct<CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS TINYINT) END:tinyint>
+-- !query 1 output
+1
+
+
+-- !query 2
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as smallint) END FROM t
+-- !query 2 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS SMALLINT) ELSE CAST(2 AS SMALLINT) END:smallint>
+-- !query 2 output
+1
+
+
+-- !query 3
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as int) END FROM t
+-- !query 3 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS INT) ELSE CAST(2 AS INT) END:int>
+-- !query 3 output
+1
+
+
+-- !query 4
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as bigint) END FROM t
+-- !query 4 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS BIGINT) ELSE CAST(2 AS BIGINT) END:bigint>
+-- !query 4 output
+1
+
+
+-- !query 5
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as float) END FROM t
+-- !query 5 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS FLOAT) ELSE CAST(2 AS FLOAT) END:float>
+-- !query 5 output
+1.0
+
+
+-- !query 6
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as double) END FROM t
+-- !query 6 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
+-- !query 6 output
+1.0
+
+
+-- !query 7
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 7 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)) ELSE CAST(2 AS DECIMAL(10,0)) END:decimal(10,0)>
+-- !query 7 output
+1
+
+
+-- !query 8
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as string) END FROM t
+-- !query 8 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS STRING) ELSE CAST(2 AS STRING) END:string>
+-- !query 8 output
+1
+
+
+-- !query 9
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2' as binary) END FROM t
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 10
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as boolean) END FROM t
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 11
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 12
+SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 13
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as tinyint) END FROM t
+-- !query 13 schema
+struct<CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(CAST(2 AS TINYINT) AS SMALLINT) END:smallint>
+-- !query 13 output
+1
+
+
+-- !query 14
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as smallint) END FROM t
+-- !query 14 schema
+struct<CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS SMALLINT) END:smallint>
+-- !query 14 output
+1
+
+
+-- !query 15
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as int) END FROM t
+-- !query 15 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS INT) ELSE CAST(2 AS INT) END:int>
+-- !query 15 output
+1
+
+
+-- !query 16
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as bigint) END FROM t
+-- !query 16 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS BIGINT) ELSE CAST(2 AS BIGINT) END:bigint>
+-- !query 16 output
+1
+
+
+-- !query 17
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as float) END FROM t
+-- !query 17 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS FLOAT) ELSE CAST(2 AS FLOAT) END:float>
+-- !query 17 output
+1.0
+
+
+-- !query 18
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as double) END FROM t
+-- !query 18 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
+-- !query 18 output
+1.0
+
+
+-- !query 19
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 19 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)) ELSE CAST(2 AS DECIMAL(10,0)) END:decimal(10,0)>
+-- !query 19 output
+1
+
+
+-- !query 20
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as string) END FROM t
+-- !query 20 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS STRING) ELSE CAST(2 AS STRING) END:string>
+-- !query 20 output
+1
+
+
+-- !query 21
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2' as binary) END FROM t
+-- !query 21 schema
+struct<>
+-- !query 21 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 22
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as boolean) END FROM t
+-- !query 22 schema
+struct<>
+-- !query 22 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 23
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 24
+SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 25
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as tinyint) END FROM t
+-- !query 25 schema
+struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(CAST(2 AS TINYINT) AS INT) END:int>
+-- !query 25 output
+1
+
+
+-- !query 26
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as smallint) END FROM t
+-- !query 26 schema
+struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(CAST(2 AS SMALLINT) AS INT) END:int>
+-- !query 26 output
+1
+
+
+-- !query 27
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as int) END FROM t
+-- !query 27 schema
+struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS INT) END:int>
+-- !query 27 output
+1
+
+
+-- !query 28
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as bigint) END FROM t
+-- !query 28 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS INT) AS BIGINT) ELSE CAST(2 AS BIGINT) END:bigint>
+-- !query 28 output
+1
+
+
+-- !query 29
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as float) END FROM t
+-- !query 29 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS INT) AS FLOAT) ELSE CAST(2 AS FLOAT) END:float>
+-- !query 29 output
+1.0
+
+
+-- !query 30
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as double) END FROM t
+-- !query 30 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS INT) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
+-- !query 30 output
+1.0
+
+
+-- !query 31
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 31 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS INT) AS DECIMAL(10,0)) ELSE CAST(2 AS DECIMAL(10,0)) END:decimal(10,0)>
+-- !query 31 output
+1
+
+
+-- !query 32
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as string) END FROM t
+-- !query 32 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS INT) AS STRING) ELSE CAST(2 AS STRING) END:string>
+-- !query 32 output
+1
+
+
+-- !query 33
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2' as binary) END FROM t
+-- !query 33 schema
+struct<>
+-- !query 33 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 34
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as boolean) END FROM t
+-- !query 34 schema
+struct<>
+-- !query 34 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 35
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 35 schema
+struct<>
+-- !query 35 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 36
+SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 36 schema
+struct<>
+-- !query 36 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 37
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as tinyint) END FROM t
+-- !query 37 schema
+struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(CAST(2 AS TINYINT) AS BIGINT) END:bigint>
+-- !query 37 output
+1
+
+
+-- !query 38
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as smallint) END FROM t
+-- !query 38 schema
+struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(CAST(2 AS SMALLINT) AS BIGINT) END:bigint>
+-- !query 38 output
+1
+
+
+-- !query 39
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as int) END FROM t
+-- !query 39 schema
+struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(CAST(2 AS INT) AS BIGINT) END:bigint>
+-- !query 39 output
+1
+
+
+-- !query 40
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as bigint) END FROM t
+-- !query 40 schema
+struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS BIGINT) END:bigint>
+-- !query 40 output
+1
+
+
+-- !query 41
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as float) END FROM t
+-- !query 41 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS BIGINT) AS FLOAT) ELSE CAST(2 AS FLOAT) END:float>
+-- !query 41 output
+1.0
+
+
+-- !query 42
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as double) END FROM t
+-- !query 42 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS BIGINT) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
+-- !query 42 output
+1.0
+
+
+-- !query 43
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 43 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) ELSE CAST(CAST(2 AS DECIMAL(10,0)) AS DECIMAL(20,0)) END:decimal(20,0)>
+-- !query 43 output
+1
+
+
+-- !query 44
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as string) END FROM t
+-- !query 44 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS BIGINT) AS STRING) ELSE CAST(2 AS STRING) END:string>
+-- !query 44 output
+1
+
+
+-- !query 45
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2' as binary) END FROM t
+-- !query 45 schema
+struct<>
+-- !query 45 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 46
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as boolean) END FROM t
+-- !query 46 schema
+struct<>
+-- !query 46 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 47
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 47 schema
+struct<>
+-- !query 47 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 48
+SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 48 schema
+struct<>
+-- !query 48 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 49
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as tinyint) END FROM t
+-- !query 49 schema
+struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(CAST(2 AS TINYINT) AS FLOAT) END:float>
+-- !query 49 output
+1.0
+
+
+-- !query 50
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as smallint) END FROM t
+-- !query 50 schema
+struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(CAST(2 AS SMALLINT) AS FLOAT) END:float>
+-- !query 50 output
+1.0
+
+
+-- !query 51
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as int) END FROM t
+-- !query 51 schema
+struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(CAST(2 AS INT) AS FLOAT) END:float>
+-- !query 51 output
+1.0
+
+
+-- !query 52
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as bigint) END FROM t
+-- !query 52 schema
+struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(CAST(2 AS BIGINT) AS FLOAT) END:float>
+-- !query 52 output
+1.0
+
+
+-- !query 53
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as float) END FROM t
+-- !query 53 schema
+struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS FLOAT) END:float>
+-- !query 53 output
+1.0
+
+
+-- !query 54
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as double) END FROM t
+-- !query 54 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS FLOAT) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
+-- !query 54 output
+1.0
+
+
+-- !query 55
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 55 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS FLOAT) AS DOUBLE) ELSE CAST(CAST(2 AS DECIMAL(10,0)) AS DOUBLE) END:double>
+-- !query 55 output
+1.0
+
+
+-- !query 56
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as string) END FROM t
+-- !query 56 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS FLOAT) AS STRING) ELSE CAST(2 AS STRING) END:string>
+-- !query 56 output
+1.0
+
+
+-- !query 57
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2' as binary) END FROM t
+-- !query 57 schema
+struct<>
+-- !query 57 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 58
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as boolean) END FROM t
+-- !query 58 schema
+struct<>
+-- !query 58 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 59
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 59 schema
+struct<>
+-- !query 59 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 60
+SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 60 schema
+struct<>
+-- !query 60 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 61
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as tinyint) END FROM t
+-- !query 61 schema
+struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS TINYINT) AS DOUBLE) END:double>
+-- !query 61 output
+1.0
+
+
+-- !query 62
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as smallint) END FROM t
+-- !query 62 schema
+struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS SMALLINT) AS DOUBLE) END:double>
+-- !query 62 output
+1.0
+
+
+-- !query 63
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as int) END FROM t
+-- !query 63 schema
+struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS INT) AS DOUBLE) END:double>
+-- !query 63 output
+1.0
+
+
+-- !query 64
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as bigint) END FROM t
+-- !query 64 schema
+struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS BIGINT) AS DOUBLE) END:double>
+-- !query 64 output
+1.0
+
+
+-- !query 65
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as float) END FROM t
+-- !query 65 schema
+struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS FLOAT) AS DOUBLE) END:double>
+-- !query 65 output
+1.0
+
+
+-- !query 66
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as double) END FROM t
+-- !query 66 schema
+struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
+-- !query 66 output
+1.0
+
+
+-- !query 67
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 67 schema
+struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS DECIMAL(10,0)) AS DOUBLE) END:double>
+-- !query 67 output
+1.0
+
+
+-- !query 68
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as string) END FROM t
+-- !query 68 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS DOUBLE) AS STRING) ELSE CAST(2 AS STRING) END:string>
+-- !query 68 output
+1.0
+
+
+-- !query 69
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2' as binary) END FROM t
+-- !query 69 schema
+struct<>
+-- !query 69 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 70
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as boolean) END FROM t
+-- !query 70 schema
+struct<>
+-- !query 70 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 71
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 71 schema
+struct<>
+-- !query 71 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 72
+SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 72 schema
+struct<>
+-- !query 72 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 73
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as tinyint) END FROM t
+-- !query 73 schema
+struct<CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(CAST(2 AS TINYINT) AS DECIMAL(10,0)) END:decimal(10,0)>
+-- !query 73 output
+1
+
+
+-- !query 74
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as smallint) END FROM t
+-- !query 74 schema
+struct<CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(CAST(2 AS SMALLINT) AS DECIMAL(10,0)) END:decimal(10,0)>
+-- !query 74 output
+1
+
+
+-- !query 75
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as int) END FROM t
+-- !query 75 schema
+struct<CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(CAST(2 AS INT) AS DECIMAL(10,0)) END:decimal(10,0)>
+-- !query 75 output
+1
+
+
+-- !query 76
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as bigint) END FROM t
+-- !query 76 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) ELSE CAST(CAST(2 AS BIGINT) AS DECIMAL(20,0)) END:decimal(20,0)>
+-- !query 76 output
+1
+
+
+-- !query 77
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as float) END FROM t
+-- !query 77 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) ELSE CAST(CAST(2 AS FLOAT) AS DOUBLE) END:double>
+-- !query 77 output
+1.0
+
+
+-- !query 78
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as double) END FROM t
+-- !query 78 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
+-- !query 78 output
+1.0
+
+
+-- !query 79
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 79 schema
+struct<CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS DECIMAL(10,0)) END:decimal(10,0)>
+-- !query 79 output
+1
+
+
+-- !query 80
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as string) END FROM t
+-- !query 80 schema
+struct<CASE WHEN true THEN CAST(CAST(1 AS DECIMAL(10,0)) AS STRING) ELSE CAST(2 AS STRING) END:string>
+-- !query 80 output
+1
+
+
+-- !query 81
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2' as binary) END FROM t
+-- !query 81 schema
+struct<>
+-- !query 81 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 82
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as boolean) END FROM t
+-- !query 82 schema
+struct<>
+-- !query 82 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 83
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 83 schema
+struct<>
+-- !query 83 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 84
+SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 84 schema
+struct<>
+-- !query 84 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 85
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as tinyint) END FROM t
+-- !query 85 schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS TINYINT) AS STRING) END:string>
+-- !query 85 output
+1
+
+
+-- !query 86
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as smallint) END FROM t
+-- !query 86 schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS SMALLINT) AS STRING) END:string>
+-- !query 86 output
+1
+
+
+-- !query 87
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as int) END FROM t
+-- !query 87 schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS INT) AS STRING) END:string>
+-- !query 87 output
+1
+
+
+-- !query 88
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as bigint) END FROM t
+-- !query 88 schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS BIGINT) AS STRING) END:string>
+-- !query 88 output
+1
+
+
+-- !query 89
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as float) END FROM t
+-- !query 89 schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS FLOAT) AS STRING) END:string>
+-- !query 89 output
+1
+
+
+-- !query 90
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as double) END FROM t
+-- !query 90 schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS DOUBLE) AS STRING) END:string>
+-- !query 90 output
+1
+
+
+-- !query 91
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 91 schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS DECIMAL(10,0)) AS STRING) END:string>
+-- !query 91 output
+1
+
+
+-- !query 92
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as string) END FROM t
+-- !query 92 schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS STRING) END:string>
+-- !query 92 output
+1
+
+
+-- !query 93
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2' as binary) END FROM t
+-- !query 93 schema
+struct<>
+-- !query 93 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 94
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as boolean) END FROM t
+-- !query 94 schema
+struct<>
+-- !query 94 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 95
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 95 schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) END:string>
+-- !query 95 output
+1
+
+
+-- !query 96
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 96 schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) END:string>
+-- !query 96 output
+1
+
+
+-- !query 97
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as tinyint) END FROM t
+-- !query 97 schema
+struct<>
+-- !query 97 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 98
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as smallint) END FROM t
+-- !query 98 schema
+struct<>
+-- !query 98 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 99
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as int) END FROM t
+-- !query 99 schema
+struct<>
+-- !query 99 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 100
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as bigint) END FROM t
+-- !query 100 schema
+struct<>
+-- !query 100 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 101
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as float) END FROM t
+-- !query 101 schema
+struct<>
+-- !query 101 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 102
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as double) END FROM t
+-- !query 102 schema
+struct<>
+-- !query 102 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 103
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 103 schema
+struct<>
+-- !query 103 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 104
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as string) END FROM t
+-- !query 104 schema
+struct<>
+-- !query 104 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS STRING) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 105
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2' as binary) END FROM t
+-- !query 105 schema
+struct<CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS BINARY) END:binary>
+-- !query 105 output
+1
+
+
+-- !query 106
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as boolean) END FROM t
+-- !query 106 schema
+struct<>
+-- !query 106 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 107
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 107 schema
+struct<>
+-- !query 107 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 108
+SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 108 schema
+struct<>
+-- !query 108 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 109
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as tinyint) END FROM t
+-- !query 109 schema
+struct<>
+-- !query 109 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 110
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as smallint) END FROM t
+-- !query 110 schema
+struct<>
+-- !query 110 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 111
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as int) END FROM t
+-- !query 111 schema
+struct<>
+-- !query 111 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 112
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as bigint) END FROM t
+-- !query 112 schema
+struct<>
+-- !query 112 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 113
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as float) END FROM t
+-- !query 113 schema
+struct<>
+-- !query 113 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 114
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as double) END FROM t
+-- !query 114 schema
+struct<>
+-- !query 114 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 115
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 115 schema
+struct<>
+-- !query 115 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 116
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as string) END FROM t
+-- !query 116 schema
+struct<>
+-- !query 116 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS STRING) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 117
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2' as binary) END FROM t
+-- !query 117 schema
+struct<>
+-- !query 117 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 118
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as boolean) END FROM t
+-- !query 118 schema
+struct<CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS BOOLEAN) END:boolean>
+-- !query 118 output
+true
+
+
+-- !query 119
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 119 schema
+struct<>
+-- !query 119 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 120
+SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 120 schema
+struct<>
+-- !query 120 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 121
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as tinyint) END FROM t
+-- !query 121 schema
+struct<>
+-- !query 121 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 122
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as smallint) END FROM t
+-- !query 122 schema
+struct<>
+-- !query 122 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 123
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as int) END FROM t
+-- !query 123 schema
+struct<>
+-- !query 123 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 124
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as bigint) END FROM t
+-- !query 124 schema
+struct<>
+-- !query 124 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 125
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as float) END FROM t
+-- !query 125 schema
+struct<>
+-- !query 125 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 126
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as double) END FROM t
+-- !query 126 schema
+struct<>
+-- !query 126 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 127
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 127 schema
+struct<>
+-- !query 127 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 128
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as string) END FROM t
+-- !query 128 schema
+struct<CASE WHEN true THEN CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING) ELSE CAST(2 AS STRING) END:string>
+-- !query 128 output
+2017-12-12 09:30:00
+
+
+-- !query 129
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast('2' as binary) END FROM t
+-- !query 129 schema
+struct<>
+-- !query 129 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 130
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as boolean) END FROM t
+-- !query 130 schema
+struct<>
+-- !query 130 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 131
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 131 schema
+struct<CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END:timestamp>
+-- !query 131 output
+2017-12-12 09:30:00
+
+
+-- !query 132
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 132 schema
+struct<CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP) END:timestamp>
+-- !query 132 output
+2017-12-12 09:30:00
+
+
+-- !query 133
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as tinyint) END FROM t
+-- !query 133 schema
+struct<>
+-- !query 133 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 134
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as smallint) END FROM t
+-- !query 134 schema
+struct<>
+-- !query 134 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 135
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as int) END FROM t
+-- !query 135 schema
+struct<>
+-- !query 135 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 136
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as bigint) END FROM t
+-- !query 136 schema
+struct<>
+-- !query 136 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 137
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as float) END FROM t
+-- !query 137 schema
+struct<>
+-- !query 137 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 138
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as double) END FROM t
+-- !query 138 schema
+struct<>
+-- !query 138 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 139
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as decimal(10, 0)) END FROM t
+-- !query 139 schema
+struct<>
+-- !query 139 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 140
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as string) END FROM t
+-- !query 140 schema
+struct<CASE WHEN true THEN CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING) ELSE CAST(2 AS STRING) END:string>
+-- !query 140 output
+2017-12-12
+
+
+-- !query 141
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2' as binary) END FROM t
+-- !query 141 schema
+struct<>
+-- !query 141 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 142
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as boolean) END FROM t
+-- !query 142 schema
+struct<>
+-- !query 142 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type; line 1 pos 7
+
+
+-- !query 143
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query 143 schema
+struct<CASE WHEN true THEN CAST(CAST(2017-12-12 09:30:00 AS DATE) AS TIMESTAMP) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END:timestamp>
+-- !query 143 output
+2017-12-12 00:00:00
+
+
+-- !query 144
+SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
+-- !query 144 schema
+struct<CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2017-12-11 09:30:00 AS DATE) END:date>
+-- !query 144 output
+2017-12-12
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
new file mode 100644
index 0000000000000..09729fdc2ec32
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
@@ -0,0 +1,239 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 11
+
+
+-- !query 0
+SELECT (col1 || col2 || col3) col
+FROM (
+  SELECT
+    id col1,
+    string(id + 1) col2,
+    encode(string(id + 2), 'utf-8') col3
+  FROM range(10)
+)
+-- !query 0 schema
+struct<col:string>
+-- !query 0 output
+012
+123
+234
+345
+456
+567
+678
+789
+8910
+91011
+
+
+-- !query 1
+SELECT ((col1 || col2) || (col3 || col4) || col5) col
+FROM (
+  SELECT
+    'prefix_' col1,
+    id col2,
+    string(id + 1) col3,
+    encode(string(id + 2), 'utf-8') col4,
+    CAST(id AS DOUBLE) col5
+  FROM range(10)
+)
+-- !query 1 schema
+struct<col:string>
+-- !query 1 output
+prefix_0120.0
+prefix_1231.0
+prefix_2342.0
+prefix_3453.0
+prefix_4564.0
+prefix_5675.0
+prefix_6786.0
+prefix_7897.0
+prefix_89108.0
+prefix_910119.0
+
+
+-- !query 2
+SELECT ((col1 || col2) || (col3 || col4)) col
+FROM (
+  SELECT
+    string(id) col1,
+    string(id + 1) col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+)
+-- !query 2 schema
+struct<col:string>
+-- !query 2 output
+0123
+1234
+2345
+3456
+4567
+5678
+6789
+78910
+891011
+9101112
+
+
+-- !query 3
+set spark.sql.function.concatBinaryAsString=true
+-- !query 3 schema
+struct<key:string,value:string>
+-- !query 3 output
+spark.sql.function.concatBinaryAsString	true
+
+
+-- !query 4
+SELECT (col1 || col2) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2
+  FROM range(10)
+)
+-- !query 4 schema
+struct<col:string>
+-- !query 4 output
+01
+12
+23
+34
+45
+56
+67
+78
+89
+910
+
+
+-- !query 5
+SELECT (col1 || col2 || col3 || col4) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+)
+-- !query 5 schema
+struct<col:string>
+-- !query 5 output
+0123
+1234
+2345
+3456
+4567
+5678
+6789
+78910
+891011
+9101112
+
+
+-- !query 6
+SELECT ((col1 || col2) || (col3 || col4)) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+)
+-- !query 6 schema
+struct<col:string>
+-- !query 6 output
+0123
+1234
+2345
+3456
+4567
+5678
+6789
+78910
+891011
+9101112
+
+
+-- !query 7
+set spark.sql.function.concatBinaryAsString=false
+-- !query 7 schema
+struct<key:string,value:string>
+-- !query 7 output
+spark.sql.function.concatBinaryAsString	false
+
+
+-- !query 8
+SELECT (col1 || col2) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2
+  FROM range(10)
+)
+-- !query 8 schema
+struct<col:binary>
+-- !query 8 output
+01
+12
+23
+34
+45
+56
+67
+78
+89
+910
+
+
+-- !query 9
+SELECT (col1 || col2 || col3 || col4) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+)
+-- !query 9 schema
+struct<col:binary>
+-- !query 9 output
+0123
+1234
+2345
+3456
+4567
+5678
+6789
+78910
+891011
+9101112
+
+
+-- !query 10
+SELECT ((col1 || col2) || (col3 || col4)) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+)
+-- !query 10 schema
+struct<col:binary>
+-- !query 10 output
+0123
+1234
+2345
+3456
+4567
+5678
+6789
+78910
+891011
+9101112
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/dateTimeOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/dateTimeOperations.sql.out
new file mode 100644
index 0000000000000..12c1d1617679f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/dateTimeOperations.sql.out
@@ -0,0 +1,349 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 40
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+select cast(1 as tinyint) + interval 2 day
+-- !query 1 schema
+struct<>
+-- !query 1 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) + interval 2 days)' (tinyint and calendarinterval).; line 1 pos 7
+
+
+-- !query 2
+select cast(1 as smallint) + interval 2 day
+-- !query 2 schema
+struct<>
+-- !query 2 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) + interval 2 days)' (smallint and calendarinterval).; line 1 pos 7
+
+
+-- !query 3
+select cast(1 as int) + interval 2 day
+-- !query 3 schema
+struct<>
+-- !query 3 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS INT) + interval 2 days)' (int and calendarinterval).; line 1 pos 7
+
+
+-- !query 4
+select cast(1 as bigint) + interval 2 day
+-- !query 4 schema
+struct<>
+-- !query 4 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) + interval 2 days)' (bigint and calendarinterval).; line 1 pos 7
+
+
+-- !query 5
+select cast(1 as float) + interval 2 day
+-- !query 5 schema
+struct<>
+-- !query 5 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) + interval 2 days)' (float and calendarinterval).; line 1 pos 7
+
+
+-- !query 6
+select cast(1 as double) + interval 2 day
+-- !query 6 schema
+struct<>
+-- !query 6 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) + interval 2 days)' (double and calendarinterval).; line 1 pos 7
+
+
+-- !query 7
+select cast(1 as decimal(10, 0)) + interval 2 day
+-- !query 7 schema
+struct<>
+-- !query 7 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + interval 2 days)' (decimal(10,0) and calendarinterval).; line 1 pos 7
+
+
+-- !query 8
+select cast('2017-12-11' as string) + interval 2 day
+-- !query 8 schema
+struct<CAST(CAST(CAST(2017-12-11 AS STRING) AS TIMESTAMP) + interval 2 days AS STRING):string>
+-- !query 8 output
+2017-12-13 00:00:00
+
+
+-- !query 9
+select cast('2017-12-11 09:30:00' as string) + interval 2 day
+-- !query 9 schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS STRING) AS TIMESTAMP) + interval 2 days AS STRING):string>
+-- !query 9 output
+2017-12-13 09:30:00
+
+
+-- !query 10
+select cast('1' as binary) + interval 2 day
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) + interval 2 days)' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + interval 2 days)' (binary and calendarinterval).; line 1 pos 7
+
+
+-- !query 11
+select cast(1 as boolean) + interval 2 day
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) + interval 2 days)' (boolean and calendarinterval).; line 1 pos 7
+
+
+-- !query 12
+select cast('2017-12-11 09:30:00.0' as timestamp) + interval 2 day
+-- !query 12 schema
+struct<CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + interval 2 days AS TIMESTAMP):timestamp>
+-- !query 12 output
+2017-12-13 09:30:00
+
+
+-- !query 13
+select cast('2017-12-11 09:30:00' as date) + interval 2 day
+-- !query 13 schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP) + interval 2 days AS DATE):date>
+-- !query 13 output
+2017-12-13
+
+
+-- !query 14
+select interval 2 day + cast(1 as tinyint)
+-- !query 14 schema
+struct<>
+-- !query 14 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(interval 2 days + CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS TINYINT))' (calendarinterval and tinyint).; line 1 pos 7
+
+
+-- !query 15
+select interval 2 day + cast(1 as smallint)
+-- !query 15 schema
+struct<>
+-- !query 15 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(interval 2 days + CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS SMALLINT))' (calendarinterval and smallint).; line 1 pos 7
+
+
+-- !query 16
+select interval 2 day + cast(1 as int)
+-- !query 16 schema
+struct<>
+-- !query 16 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(interval 2 days + CAST(1 AS INT))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS INT))' (calendarinterval and int).; line 1 pos 7
+
+
+-- !query 17
+select interval 2 day + cast(1 as bigint)
+-- !query 17 schema
+struct<>
+-- !query 17 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(interval 2 days + CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS BIGINT))' (calendarinterval and bigint).; line 1 pos 7
+
+
+-- !query 18
+select interval 2 day + cast(1 as float)
+-- !query 18 schema
+struct<>
+-- !query 18 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(interval 2 days + CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS FLOAT))' (calendarinterval and float).; line 1 pos 7
+
+
+-- !query 19
+select interval 2 day + cast(1 as double)
+-- !query 19 schema
+struct<>
+-- !query 19 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(interval 2 days + CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS DOUBLE))' (calendarinterval and double).; line 1 pos 7
+
+
+-- !query 20
+select interval 2 day + cast(1 as decimal(10, 0))
+-- !query 20 schema
+struct<>
+-- !query 20 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(interval 2 days + CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS DECIMAL(10,0)))' (calendarinterval and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 21
+select interval 2 day + cast('2017-12-11' as string)
+-- !query 21 schema
+struct<CAST(CAST(CAST(2017-12-11 AS STRING) AS TIMESTAMP) + interval 2 days AS STRING):string>
+-- !query 21 output
+2017-12-13 00:00:00
+
+
+-- !query 22
+select interval 2 day + cast('2017-12-11 09:30:00' as string)
+-- !query 22 schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS STRING) AS TIMESTAMP) + interval 2 days AS STRING):string>
+-- !query 22 output
+2017-12-13 09:30:00
+
+
+-- !query 23
+select interval 2 day + cast('1' as binary)
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(interval 2 days + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(interval 2 days + CAST('1' AS BINARY))' (calendarinterval and binary).; line 1 pos 7
+
+
+-- !query 24
+select interval 2 day + cast(1 as boolean)
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(interval 2 days + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS BOOLEAN))' (calendarinterval and boolean).; line 1 pos 7
+
+
+-- !query 25
+select interval 2 day + cast('2017-12-11 09:30:00.0' as timestamp)
+-- !query 25 schema
+struct<CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + interval 2 days AS TIMESTAMP):timestamp>
+-- !query 25 output
+2017-12-13 09:30:00
+
+
+-- !query 26
+select interval 2 day + cast('2017-12-11 09:30:00' as date)
+-- !query 26 schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP) + interval 2 days AS DATE):date>
+-- !query 26 output
+2017-12-13
+
+
+-- !query 27
+select cast(1 as tinyint) - interval 2 day
+-- !query 27 schema
+struct<>
+-- !query 27 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) - interval 2 days)' (tinyint and calendarinterval).; line 1 pos 7
+
+
+-- !query 28
+select cast(1 as smallint) - interval 2 day
+-- !query 28 schema
+struct<>
+-- !query 28 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) - interval 2 days)' (smallint and calendarinterval).; line 1 pos 7
+
+
+-- !query 29
+select cast(1 as int) - interval 2 day
+-- !query 29 schema
+struct<>
+-- !query 29 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS INT) - interval 2 days)' (int and calendarinterval).; line 1 pos 7
+
+
+-- !query 30
+select cast(1 as bigint) - interval 2 day
+-- !query 30 schema
+struct<>
+-- !query 30 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) - interval 2 days)' (bigint and calendarinterval).; line 1 pos 7
+
+
+-- !query 31
+select cast(1 as float) - interval 2 day
+-- !query 31 schema
+struct<>
+-- !query 31 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) - interval 2 days)' (float and calendarinterval).; line 1 pos 7
+
+
+-- !query 32
+select cast(1 as double) - interval 2 day
+-- !query 32 schema
+struct<>
+-- !query 32 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) - interval 2 days)' (double and calendarinterval).; line 1 pos 7
+
+
+-- !query 33
+select cast(1 as decimal(10, 0)) - interval 2 day
+-- !query 33 schema
+struct<>
+-- !query 33 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - interval 2 days)' (decimal(10,0) and calendarinterval).; line 1 pos 7
+
+
+-- !query 34
+select cast('2017-12-11' as string) - interval 2 day
+-- !query 34 schema
+struct<CAST(CAST(CAST(2017-12-11 AS STRING) AS TIMESTAMP) - interval 2 days AS STRING):string>
+-- !query 34 output
+2017-12-09 00:00:00
+
+
+-- !query 35
+select cast('2017-12-11 09:30:00' as string) - interval 2 day
+-- !query 35 schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS STRING) AS TIMESTAMP) - interval 2 days AS STRING):string>
+-- !query 35 output
+2017-12-09 09:30:00
+
+
+-- !query 36
+select cast('1' as binary) - interval 2 day
+-- !query 36 schema
+struct<>
+-- !query 36 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) - interval 2 days)' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - interval 2 days)' (binary and calendarinterval).; line 1 pos 7
+
+
+-- !query 37
+select cast(1 as boolean) - interval 2 day
+-- !query 37 schema
+struct<>
+-- !query 37 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) - interval 2 days)' (boolean and calendarinterval).; line 1 pos 7
+
+
+-- !query 38
+select cast('2017-12-11 09:30:00.0' as timestamp) - interval 2 day
+-- !query 38 schema
+struct<CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) - interval 2 days AS TIMESTAMP):timestamp>
+-- !query 38 output
+2017-12-09 09:30:00
+
+
+-- !query 39
+select cast('2017-12-11 09:30:00' as date) - interval 2 day
+-- !query 39 schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP) - interval 2 days AS DATE):date>
+-- !query 39 output
+2017-12-09
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalArithmeticOperations.sql.out
new file mode 100644
index 0000000000000..ce02f6adc456c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalArithmeticOperations.sql.out
@@ -0,0 +1,82 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 10
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1.0 as a, 0.0 as b
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+select a / b from t
+-- !query 1 schema
+struct<(CAST(a AS DECIMAL(2,1)) / CAST(b AS DECIMAL(2,1))):decimal(8,6)>
+-- !query 1 output
+NULL
+
+
+-- !query 2
+select a % b from t
+-- !query 2 schema
+struct<(CAST(a AS DECIMAL(2,1)) % CAST(b AS DECIMAL(2,1))):decimal(1,1)>
+-- !query 2 output
+NULL
+
+
+-- !query 3
+select pmod(a, b) from t
+-- !query 3 schema
+struct<pmod(CAST(a AS DECIMAL(2,1)), CAST(b AS DECIMAL(2,1))):decimal(1,1)>
+-- !query 3 output
+NULL
+
+
+-- !query 4
+select (5e36 + 0.1) + 5e36
+-- !query 4 schema
+struct<(CAST((CAST(5E+36 AS DECIMAL(38,1)) + CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) + CAST(5E+36 AS DECIMAL(38,1))):decimal(38,1)>
+-- !query 4 output
+NULL
+
+
+-- !query 5
+select (-4e36 - 0.1) - 7e36
+-- !query 5 schema
+struct<(CAST((CAST(-4E+36 AS DECIMAL(38,1)) - CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) - CAST(7E+36 AS DECIMAL(38,1))):decimal(38,1)>
+-- !query 5 output
+NULL
+
+
+-- !query 6
+select 12345678901234567890.0 * 12345678901234567890.0
+-- !query 6 schema
+struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
+-- !query 6 output
+NULL
+
+
+-- !query 7
+select 1e35 / 0.1
+-- !query 7 schema
+struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,3)>
+-- !query 7 output
+NULL
+
+
+-- !query 8
+select 123456789123456789.1234567890 * 1.123456789123456789
+-- !query 8 schema
+struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,28)>
+-- !query 8 output
+NULL
+
+
+-- !query 9
+select 0.001 / 9876543210987654321098765432109876543.2
+-- !query 9 schema
+struct<(CAST(0.001 AS DECIMAL(38,3)) / CAST(9876543210987654321098765432109876543.2 AS DECIMAL(38,3))):decimal(38,37)>
+-- !query 9 output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out
new file mode 100644
index 0000000000000..ebc8201ed5a1d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out
@@ -0,0 +1,9514 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 1145
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT cast(1 as tinyint) + cast(1 as decimal(3, 0)) FROM t
+-- !query 1 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) + CAST(1 AS DECIMAL(3,0))):decimal(4,0)>
+-- !query 1 output
+2
+
+
+-- !query 2
+SELECT cast(1 as tinyint) + cast(1 as decimal(5, 0)) FROM t
+-- !query 2 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(6,0)) + CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(6,0))):decimal(6,0)>
+-- !query 2 output
+2
+
+
+-- !query 3
+SELECT cast(1 as tinyint) + cast(1 as decimal(10, 0)) FROM t
+-- !query 3 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 3 output
+2
+
+
+-- !query 4
+SELECT cast(1 as tinyint) + cast(1 as decimal(20, 0)) FROM t
+-- !query 4 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 4 output
+2
+
+
+-- !query 5
+SELECT cast(1 as smallint) + cast(1 as decimal(3, 0)) FROM t
+-- !query 5 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(6,0)) + CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(6,0))):decimal(6,0)>
+-- !query 5 output
+2
+
+
+-- !query 6
+SELECT cast(1 as smallint) + cast(1 as decimal(5, 0)) FROM t
+-- !query 6 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) + CAST(1 AS DECIMAL(5,0))):decimal(6,0)>
+-- !query 6 output
+2
+
+
+-- !query 7
+SELECT cast(1 as smallint) + cast(1 as decimal(10, 0)) FROM t
+-- !query 7 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 7 output
+2
+
+
+-- !query 8
+SELECT cast(1 as smallint) + cast(1 as decimal(20, 0)) FROM t
+-- !query 8 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 8 output
+2
+
+
+-- !query 9
+SELECT cast(1 as int) + cast(1 as decimal(3, 0)) FROM t
+-- !query 9 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 9 output
+2
+
+
+-- !query 10
+SELECT cast(1 as int) + cast(1 as decimal(5, 0)) FROM t
+-- !query 10 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 10 output
+2
+
+
+-- !query 11
+SELECT cast(1 as int) + cast(1 as decimal(10, 0)) FROM t
+-- !query 11 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) + CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
+-- !query 11 output
+2
+
+
+-- !query 12
+SELECT cast(1 as int) + cast(1 as decimal(20, 0)) FROM t
+-- !query 12 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 12 output
+2
+
+
+-- !query 13
+SELECT cast(1 as bigint) + cast(1 as decimal(3, 0)) FROM t
+-- !query 13 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 13 output
+2
+
+
+-- !query 14
+SELECT cast(1 as bigint) + cast(1 as decimal(5, 0)) FROM t
+-- !query 14 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 14 output
+2
+
+
+-- !query 15
+SELECT cast(1 as bigint) + cast(1 as decimal(10, 0)) FROM t
+-- !query 15 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 15 output
+2
+
+
+-- !query 16
+SELECT cast(1 as bigint) + cast(1 as decimal(20, 0)) FROM t
+-- !query 16 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) + CAST(1 AS DECIMAL(20,0))):decimal(21,0)>
+-- !query 16 output
+2
+
+
+-- !query 17
+SELECT cast(1 as float) + cast(1 as decimal(3, 0)) FROM t
+-- !query 17 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) + CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
+-- !query 17 output
+2.0
+
+
+-- !query 18
+SELECT cast(1 as float) + cast(1 as decimal(5, 0)) FROM t
+-- !query 18 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) + CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
+-- !query 18 output
+2.0
+
+
+-- !query 19
+SELECT cast(1 as float) + cast(1 as decimal(10, 0)) FROM t
+-- !query 19 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) + CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 19 output
+2.0
+
+
+-- !query 20
+SELECT cast(1 as float) + cast(1 as decimal(20, 0)) FROM t
+-- !query 20 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) + CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
+-- !query 20 output
+2.0
+
+
+-- !query 21
+SELECT cast(1 as double) + cast(1 as decimal(3, 0)) FROM t
+-- !query 21 schema
+struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
+-- !query 21 output
+2.0
+
+
+-- !query 22
+SELECT cast(1 as double) + cast(1 as decimal(5, 0)) FROM t
+-- !query 22 schema
+struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
+-- !query 22 output
+2.0
+
+
+-- !query 23
+SELECT cast(1 as double) + cast(1 as decimal(10, 0)) FROM t
+-- !query 23 schema
+struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 23 output
+2.0
+
+
+-- !query 24
+SELECT cast(1 as double) + cast(1 as decimal(20, 0)) FROM t
+-- !query 24 schema
+struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
+-- !query 24 output
+2.0
+
+
+-- !query 25
+SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(3, 0)) FROM t
+-- !query 25 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 25 output
+2
+
+
+-- !query 26
+SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(5, 0)) FROM t
+-- !query 26 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 26 output
+2
+
+
+-- !query 27
+SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(10, 0)) FROM t
+-- !query 27 schema
+struct<(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
+-- !query 27 output
+2
+
+
+-- !query 28
+SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(20, 0)) FROM t
+-- !query 28 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 28 output
+2
+
+
+-- !query 29
+SELECT cast('1' as binary) + cast(1 as decimal(3, 0)) FROM t
+-- !query 29 schema
+struct<>
+-- !query 29 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 30
+SELECT cast('1' as binary) + cast(1 as decimal(5, 0)) FROM t
+-- !query 30 schema
+struct<>
+-- !query 30 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 31
+SELECT cast('1' as binary) + cast(1 as decimal(10, 0)) FROM t
+-- !query 31 schema
+struct<>
+-- !query 31 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 32
+SELECT cast('1' as binary) + cast(1 as decimal(20, 0)) FROM t
+-- !query 32 schema
+struct<>
+-- !query 32 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 33
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(3, 0)) FROM t
+-- !query 33 schema
+struct<>
+-- !query 33 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 34
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(5, 0)) FROM t
+-- !query 34 schema
+struct<>
+-- !query 34 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 35
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(10, 0)) FROM t
+-- !query 35 schema
+struct<>
+-- !query 35 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 36
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(20, 0)) FROM t
+-- !query 36 schema
+struct<>
+-- !query 36 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 37
+SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(3, 0)) FROM t
+-- !query 37 schema
+struct<>
+-- !query 37 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 38
+SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(5, 0)) FROM t
+-- !query 38 schema
+struct<>
+-- !query 38 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 39
+SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(10, 0)) FROM t
+-- !query 39 schema
+struct<>
+-- !query 39 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 40
+SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(20, 0)) FROM t
+-- !query 40 schema
+struct<>
+-- !query 40 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 41
+SELECT cast(1 as decimal(3, 0))  + cast(1 as tinyint) FROM t
+-- !query 41 schema
+struct<(CAST(1 AS DECIMAL(3,0)) + CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(4,0)>
+-- !query 41 output
+2
+
+
+-- !query 42
+SELECT cast(1 as decimal(5, 0))  + cast(1 as tinyint) FROM t
+-- !query 42 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(6,0)) + CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(6,0))):decimal(6,0)>
+-- !query 42 output
+2
+
+
+-- !query 43
+SELECT cast(1 as decimal(10, 0)) + cast(1 as tinyint) FROM t
+-- !query 43 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 43 output
+2
+
+
+-- !query 44
+SELECT cast(1 as decimal(20, 0)) + cast(1 as tinyint) FROM t
+-- !query 44 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 44 output
+2
+
+
+-- !query 45
+SELECT cast(1 as decimal(3, 0))  + cast(1 as smallint) FROM t
+-- !query 45 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(6,0)) + CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(6,0))):decimal(6,0)>
+-- !query 45 output
+2
+
+
+-- !query 46
+SELECT cast(1 as decimal(5, 0))  + cast(1 as smallint) FROM t
+-- !query 46 schema
+struct<(CAST(1 AS DECIMAL(5,0)) + CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(6,0)>
+-- !query 46 output
+2
+
+
+-- !query 47
+SELECT cast(1 as decimal(10, 0)) + cast(1 as smallint) FROM t
+-- !query 47 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 47 output
+2
+
+
+-- !query 48
+SELECT cast(1 as decimal(20, 0)) + cast(1 as smallint) FROM t
+-- !query 48 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 48 output
+2
+
+
+-- !query 49
+SELECT cast(1 as decimal(3, 0))  + cast(1 as int) FROM t
+-- !query 49 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0)) + CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 49 output
+2
+
+
+-- !query 50
+SELECT cast(1 as decimal(5, 0))  + cast(1 as int) FROM t
+-- !query 50 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0)) + CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 50 output
+2
+
+
+-- !query 51
+SELECT cast(1 as decimal(10, 0)) + cast(1 as int) FROM t
+-- !query 51 schema
+struct<(CAST(1 AS DECIMAL(10,0)) + CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(11,0)>
+-- !query 51 output
+2
+
+
+-- !query 52
+SELECT cast(1 as decimal(20, 0)) + cast(1 as int) FROM t
+-- !query 52 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 52 output
+2
+
+
+-- !query 53
+SELECT cast(1 as decimal(3, 0))  + cast(1 as bigint) FROM t
+-- !query 53 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 53 output
+2
+
+
+-- !query 54
+SELECT cast(1 as decimal(5, 0))  + cast(1 as bigint) FROM t
+-- !query 54 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 54 output
+2
+
+
+-- !query 55
+SELECT cast(1 as decimal(10, 0)) + cast(1 as bigint) FROM t
+-- !query 55 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 55 output
+2
+
+
+-- !query 56
+SELECT cast(1 as decimal(20, 0)) + cast(1 as bigint) FROM t
+-- !query 56 schema
+struct<(CAST(1 AS DECIMAL(20,0)) + CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(21,0)>
+-- !query 56 output
+2
+
+
+-- !query 57
+SELECT cast(1 as decimal(3, 0))  + cast(1 as float) FROM t
+-- !query 57 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) + CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 57 output
+2.0
+
+
+-- !query 58
+SELECT cast(1 as decimal(5, 0))  + cast(1 as float) FROM t
+-- !query 58 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) + CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 58 output
+2.0
+
+
+-- !query 59
+SELECT cast(1 as decimal(10, 0)) + cast(1 as float) FROM t
+-- !query 59 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) + CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 59 output
+2.0
+
+
+-- !query 60
+SELECT cast(1 as decimal(20, 0)) + cast(1 as float) FROM t
+-- !query 60 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) + CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 60 output
+2.0
+
+
+-- !query 61
+SELECT cast(1 as decimal(3, 0))  + cast(1 as double) FROM t
+-- !query 61 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 61 output
+2.0
+
+
+-- !query 62
+SELECT cast(1 as decimal(5, 0))  + cast(1 as double) FROM t
+-- !query 62 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 62 output
+2.0
+
+
+-- !query 63
+SELECT cast(1 as decimal(10, 0)) + cast(1 as double) FROM t
+-- !query 63 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 63 output
+2.0
+
+
+-- !query 64
+SELECT cast(1 as decimal(20, 0)) + cast(1 as double) FROM t
+-- !query 64 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 64 output
+2.0
+
+
+-- !query 65
+SELECT cast(1 as decimal(3, 0))  + cast(1 as decimal(10, 0)) FROM t
+-- !query 65 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 65 output
+2
+
+
+-- !query 66
+SELECT cast(1 as decimal(5, 0))  + cast(1 as decimal(10, 0)) FROM t
+-- !query 66 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 66 output
+2
+
+
+-- !query 67
+SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(10, 0)) FROM t
+-- !query 67 schema
+struct<(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
+-- !query 67 output
+2
+
+
+-- !query 68
+SELECT cast(1 as decimal(20, 0)) + cast(1 as decimal(10, 0)) FROM t
+-- !query 68 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 68 output
+2
+
+
+-- !query 69
+SELECT cast(1 as decimal(3, 0))  + cast(1 as string) FROM t
+-- !query 69 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) + CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 69 output
+2.0
+
+
+-- !query 70
+SELECT cast(1 as decimal(5, 0))  + cast(1 as string) FROM t
+-- !query 70 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) + CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 70 output
+2.0
+
+
+-- !query 71
+SELECT cast(1 as decimal(10, 0)) + cast(1 as string) FROM t
+-- !query 71 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) + CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 71 output
+2.0
+
+
+-- !query 72
+SELECT cast(1 as decimal(20, 0)) + cast(1 as string) FROM t
+-- !query 72 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) + CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 72 output
+2.0
+
+
+-- !query 73
+SELECT cast(1 as decimal(3, 0))  + cast('1' as binary) FROM t
+-- !query 73 schema
+struct<>
+-- !query 73 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) + CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 74
+SELECT cast(1 as decimal(5, 0))  + cast('1' as binary) FROM t
+-- !query 74 schema
+struct<>
+-- !query 74 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) + CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 75
+SELECT cast(1 as decimal(10, 0)) + cast('1' as binary) FROM t
+-- !query 75 schema
+struct<>
+-- !query 75 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 76
+SELECT cast(1 as decimal(20, 0)) + cast('1' as binary) FROM t
+-- !query 76 schema
+struct<>
+-- !query 76 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) + CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 77
+SELECT cast(1 as decimal(3, 0))  + cast(1 as boolean) FROM t
+-- !query 77 schema
+struct<>
+-- !query 77 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) + CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+
+
+-- !query 78
+SELECT cast(1 as decimal(5, 0))  + cast(1 as boolean) FROM t
+-- !query 78 schema
+struct<>
+-- !query 78 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) + CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+
+
+-- !query 79
+SELECT cast(1 as decimal(10, 0)) + cast(1 as boolean) FROM t
+-- !query 79 schema
+struct<>
+-- !query 79 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 80
+SELECT cast(1 as decimal(20, 0)) + cast(1 as boolean) FROM t
+-- !query 80 schema
+struct<>
+-- !query 80 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) + CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+
+
+-- !query 81
+SELECT cast(1 as decimal(3, 0))  + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 81 schema
+struct<>
+-- !query 81 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 82
+SELECT cast(1 as decimal(5, 0))  + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 82 schema
+struct<>
+-- !query 82 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 83
+SELECT cast(1 as decimal(10, 0)) + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 83 schema
+struct<>
+-- !query 83 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 84
+SELECT cast(1 as decimal(20, 0)) + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 84 schema
+struct<>
+-- !query 84 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 85
+SELECT cast(1 as decimal(3, 0))  + cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 85 schema
+struct<>
+-- !query 85 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) + CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) + CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 86
+SELECT cast(1 as decimal(5, 0))  + cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 86 schema
+struct<>
+-- !query 86 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) + CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) + CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 87
+SELECT cast(1 as decimal(10, 0)) + cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 87 schema
+struct<>
+-- !query 87 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) + CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 88
+SELECT cast(1 as decimal(20, 0)) + cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 88 schema
+struct<>
+-- !query 88 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) + CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) + CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 89
+SELECT cast(1 as tinyint) - cast(1 as decimal(3, 0)) FROM t
+-- !query 89 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) - CAST(1 AS DECIMAL(3,0))):decimal(4,0)>
+-- !query 89 output
+0
+
+
+-- !query 90
+SELECT cast(1 as tinyint) - cast(1 as decimal(5, 0)) FROM t
+-- !query 90 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(6,0)) - CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(6,0))):decimal(6,0)>
+-- !query 90 output
+0
+
+
+-- !query 91
+SELECT cast(1 as tinyint) - cast(1 as decimal(10, 0)) FROM t
+-- !query 91 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 91 output
+0
+
+
+-- !query 92
+SELECT cast(1 as tinyint) - cast(1 as decimal(20, 0)) FROM t
+-- !query 92 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 92 output
+0
+
+
+-- !query 93
+SELECT cast(1 as smallint) - cast(1 as decimal(3, 0)) FROM t
+-- !query 93 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(6,0)) - CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(6,0))):decimal(6,0)>
+-- !query 93 output
+0
+
+
+-- !query 94
+SELECT cast(1 as smallint) - cast(1 as decimal(5, 0)) FROM t
+-- !query 94 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) - CAST(1 AS DECIMAL(5,0))):decimal(6,0)>
+-- !query 94 output
+0
+
+
+-- !query 95
+SELECT cast(1 as smallint) - cast(1 as decimal(10, 0)) FROM t
+-- !query 95 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 95 output
+0
+
+
+-- !query 96
+SELECT cast(1 as smallint) - cast(1 as decimal(20, 0)) FROM t
+-- !query 96 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 96 output
+0
+
+
+-- !query 97
+SELECT cast(1 as int) - cast(1 as decimal(3, 0)) FROM t
+-- !query 97 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 97 output
+0
+
+
+-- !query 98
+SELECT cast(1 as int) - cast(1 as decimal(5, 0)) FROM t
+-- !query 98 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 98 output
+0
+
+
+-- !query 99
+SELECT cast(1 as int) - cast(1 as decimal(10, 0)) FROM t
+-- !query 99 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) - CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
+-- !query 99 output
+0
+
+
+-- !query 100
+SELECT cast(1 as int) - cast(1 as decimal(20, 0)) FROM t
+-- !query 100 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 100 output
+0
+
+
+-- !query 101
+SELECT cast(1 as bigint) - cast(1 as decimal(3, 0)) FROM t
+-- !query 101 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 101 output
+0
+
+
+-- !query 102
+SELECT cast(1 as bigint) - cast(1 as decimal(5, 0)) FROM t
+-- !query 102 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 102 output
+0
+
+
+-- !query 103
+SELECT cast(1 as bigint) - cast(1 as decimal(10, 0)) FROM t
+-- !query 103 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 103 output
+0
+
+
+-- !query 104
+SELECT cast(1 as bigint) - cast(1 as decimal(20, 0)) FROM t
+-- !query 104 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) - CAST(1 AS DECIMAL(20,0))):decimal(21,0)>
+-- !query 104 output
+0
+
+
+-- !query 105
+SELECT cast(1 as float) - cast(1 as decimal(3, 0)) FROM t
+-- !query 105 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) - CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
+-- !query 105 output
+0.0
+
+
+-- !query 106
+SELECT cast(1 as float) - cast(1 as decimal(5, 0)) FROM t
+-- !query 106 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) - CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
+-- !query 106 output
+0.0
+
+
+-- !query 107
+SELECT cast(1 as float) - cast(1 as decimal(10, 0)) FROM t
+-- !query 107 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) - CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 107 output
+0.0
+
+
+-- !query 108
+SELECT cast(1 as float) - cast(1 as decimal(20, 0)) FROM t
+-- !query 108 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) - CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
+-- !query 108 output
+0.0
+
+
+-- !query 109
+SELECT cast(1 as double) - cast(1 as decimal(3, 0)) FROM t
+-- !query 109 schema
+struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
+-- !query 109 output
+0.0
+
+
+-- !query 110
+SELECT cast(1 as double) - cast(1 as decimal(5, 0)) FROM t
+-- !query 110 schema
+struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
+-- !query 110 output
+0.0
+
+
+-- !query 111
+SELECT cast(1 as double) - cast(1 as decimal(10, 0)) FROM t
+-- !query 111 schema
+struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 111 output
+0.0
+
+
+-- !query 112
+SELECT cast(1 as double) - cast(1 as decimal(20, 0)) FROM t
+-- !query 112 schema
+struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
+-- !query 112 output
+0.0
+
+
+-- !query 113
+SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(3, 0)) FROM t
+-- !query 113 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 113 output
+0
+
+
+-- !query 114
+SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(5, 0)) FROM t
+-- !query 114 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 114 output
+0
+
+
+-- !query 115
+SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(10, 0)) FROM t
+-- !query 115 schema
+struct<(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
+-- !query 115 output
+0
+
+
+-- !query 116
+SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(20, 0)) FROM t
+-- !query 116 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 116 output
+0
+
+
+-- !query 117
+SELECT cast('1' as binary) - cast(1 as decimal(3, 0)) FROM t
+-- !query 117 schema
+struct<>
+-- !query 117 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 118
+SELECT cast('1' as binary) - cast(1 as decimal(5, 0)) FROM t
+-- !query 118 schema
+struct<>
+-- !query 118 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 119
+SELECT cast('1' as binary) - cast(1 as decimal(10, 0)) FROM t
+-- !query 119 schema
+struct<>
+-- !query 119 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 120
+SELECT cast('1' as binary) - cast(1 as decimal(20, 0)) FROM t
+-- !query 120 schema
+struct<>
+-- !query 120 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 121
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(3, 0)) FROM t
+-- !query 121 schema
+struct<>
+-- !query 121 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 122
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(5, 0)) FROM t
+-- !query 122 schema
+struct<>
+-- !query 122 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 123
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(10, 0)) FROM t
+-- !query 123 schema
+struct<>
+-- !query 123 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 124
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(20, 0)) FROM t
+-- !query 124 schema
+struct<>
+-- !query 124 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 125
+SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(3, 0)) FROM t
+-- !query 125 schema
+struct<>
+-- !query 125 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 126
+SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(5, 0)) FROM t
+-- !query 126 schema
+struct<>
+-- !query 126 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 127
+SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(10, 0)) FROM t
+-- !query 127 schema
+struct<>
+-- !query 127 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 128
+SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(20, 0)) FROM t
+-- !query 128 schema
+struct<>
+-- !query 128 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 129
+SELECT cast(1 as decimal(3, 0))  - cast(1 as tinyint) FROM t
+-- !query 129 schema
+struct<(CAST(1 AS DECIMAL(3,0)) - CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(4,0)>
+-- !query 129 output
+0
+
+
+-- !query 130
+SELECT cast(1 as decimal(5, 0))  - cast(1 as tinyint) FROM t
+-- !query 130 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(6,0)) - CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(6,0))):decimal(6,0)>
+-- !query 130 output
+0
+
+
+-- !query 131
+SELECT cast(1 as decimal(10, 0)) - cast(1 as tinyint) FROM t
+-- !query 131 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 131 output
+0
+
+
+-- !query 132
+SELECT cast(1 as decimal(20, 0)) - cast(1 as tinyint) FROM t
+-- !query 132 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 132 output
+0
+
+
+-- !query 133
+SELECT cast(1 as decimal(3, 0))  - cast(1 as smallint) FROM t
+-- !query 133 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(6,0)) - CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(6,0))):decimal(6,0)>
+-- !query 133 output
+0
+
+
+-- !query 134
+SELECT cast(1 as decimal(5, 0))  - cast(1 as smallint) FROM t
+-- !query 134 schema
+struct<(CAST(1 AS DECIMAL(5,0)) - CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(6,0)>
+-- !query 134 output
+0
+
+
+-- !query 135
+SELECT cast(1 as decimal(10, 0)) - cast(1 as smallint) FROM t
+-- !query 135 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 135 output
+0
+
+
+-- !query 136
+SELECT cast(1 as decimal(20, 0)) - cast(1 as smallint) FROM t
+-- !query 136 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 136 output
+0
+
+
+-- !query 137
+SELECT cast(1 as decimal(3, 0))  - cast(1 as int) FROM t
+-- !query 137 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0)) - CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 137 output
+0
+
+
+-- !query 138
+SELECT cast(1 as decimal(5, 0))  - cast(1 as int) FROM t
+-- !query 138 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0)) - CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 138 output
+0
+
+
+-- !query 139
+SELECT cast(1 as decimal(10, 0)) - cast(1 as int) FROM t
+-- !query 139 schema
+struct<(CAST(1 AS DECIMAL(10,0)) - CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(11,0)>
+-- !query 139 output
+0
+
+
+-- !query 140
+SELECT cast(1 as decimal(20, 0)) - cast(1 as int) FROM t
+-- !query 140 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 140 output
+0
+
+
+-- !query 141
+SELECT cast(1 as decimal(3, 0))  - cast(1 as bigint) FROM t
+-- !query 141 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 141 output
+0
+
+
+-- !query 142
+SELECT cast(1 as decimal(5, 0))  - cast(1 as bigint) FROM t
+-- !query 142 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 142 output
+0
+
+
+-- !query 143
+SELECT cast(1 as decimal(10, 0)) - cast(1 as bigint) FROM t
+-- !query 143 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 143 output
+0
+
+
+-- !query 144
+SELECT cast(1 as decimal(20, 0)) - cast(1 as bigint) FROM t
+-- !query 144 schema
+struct<(CAST(1 AS DECIMAL(20,0)) - CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(21,0)>
+-- !query 144 output
+0
+
+
+-- !query 145
+SELECT cast(1 as decimal(3, 0))  - cast(1 as float) FROM t
+-- !query 145 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) - CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 145 output
+0.0
+
+
+-- !query 146
+SELECT cast(1 as decimal(5, 0))  - cast(1 as float) FROM t
+-- !query 146 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) - CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 146 output
+0.0
+
+
+-- !query 147
+SELECT cast(1 as decimal(10, 0)) - cast(1 as float) FROM t
+-- !query 147 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) - CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 147 output
+0.0
+
+
+-- !query 148
+SELECT cast(1 as decimal(20, 0)) - cast(1 as float) FROM t
+-- !query 148 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) - CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 148 output
+0.0
+
+
+-- !query 149
+SELECT cast(1 as decimal(3, 0))  - cast(1 as double) FROM t
+-- !query 149 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 149 output
+0.0
+
+
+-- !query 150
+SELECT cast(1 as decimal(5, 0))  - cast(1 as double) FROM t
+-- !query 150 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 150 output
+0.0
+
+
+-- !query 151
+SELECT cast(1 as decimal(10, 0)) - cast(1 as double) FROM t
+-- !query 151 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 151 output
+0.0
+
+
+-- !query 152
+SELECT cast(1 as decimal(20, 0)) - cast(1 as double) FROM t
+-- !query 152 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 152 output
+0.0
+
+
+-- !query 153
+SELECT cast(1 as decimal(3, 0))  - cast(1 as decimal(10, 0)) FROM t
+-- !query 153 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 153 output
+0
+
+
+-- !query 154
+SELECT cast(1 as decimal(5, 0))  - cast(1 as decimal(10, 0)) FROM t
+-- !query 154 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
+-- !query 154 output
+0
+
+
+-- !query 155
+SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(10, 0)) FROM t
+-- !query 155 schema
+struct<(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
+-- !query 155 output
+0
+
+
+-- !query 156
+SELECT cast(1 as decimal(20, 0)) - cast(1 as decimal(10, 0)) FROM t
+-- !query 156 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
+-- !query 156 output
+0
+
+
+-- !query 157
+SELECT cast(1 as decimal(3, 0))  - cast(1 as string) FROM t
+-- !query 157 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) - CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 157 output
+0.0
+
+
+-- !query 158
+SELECT cast(1 as decimal(5, 0))  - cast(1 as string) FROM t
+-- !query 158 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) - CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 158 output
+0.0
+
+
+-- !query 159
+SELECT cast(1 as decimal(10, 0)) - cast(1 as string) FROM t
+-- !query 159 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) - CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 159 output
+0.0
+
+
+-- !query 160
+SELECT cast(1 as decimal(20, 0)) - cast(1 as string) FROM t
+-- !query 160 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) - CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 160 output
+0.0
+
+
+-- !query 161
+SELECT cast(1 as decimal(3, 0))  - cast('1' as binary) FROM t
+-- !query 161 schema
+struct<>
+-- !query 161 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) - CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 162
+SELECT cast(1 as decimal(5, 0))  - cast('1' as binary) FROM t
+-- !query 162 schema
+struct<>
+-- !query 162 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) - CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 163
+SELECT cast(1 as decimal(10, 0)) - cast('1' as binary) FROM t
+-- !query 163 schema
+struct<>
+-- !query 163 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 164
+SELECT cast(1 as decimal(20, 0)) - cast('1' as binary) FROM t
+-- !query 164 schema
+struct<>
+-- !query 164 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) - CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 165
+SELECT cast(1 as decimal(3, 0))  - cast(1 as boolean) FROM t
+-- !query 165 schema
+struct<>
+-- !query 165 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) - CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+
+
+-- !query 166
+SELECT cast(1 as decimal(5, 0))  - cast(1 as boolean) FROM t
+-- !query 166 schema
+struct<>
+-- !query 166 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) - CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+
+
+-- !query 167
+SELECT cast(1 as decimal(10, 0)) - cast(1 as boolean) FROM t
+-- !query 167 schema
+struct<>
+-- !query 167 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 168
+SELECT cast(1 as decimal(20, 0)) - cast(1 as boolean) FROM t
+-- !query 168 schema
+struct<>
+-- !query 168 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) - CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+
+
+-- !query 169
+SELECT cast(1 as decimal(3, 0))  - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 169 schema
+struct<>
+-- !query 169 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 170
+SELECT cast(1 as decimal(5, 0))  - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 170 schema
+struct<>
+-- !query 170 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 171
+SELECT cast(1 as decimal(10, 0)) - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 171 schema
+struct<>
+-- !query 171 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 172
+SELECT cast(1 as decimal(20, 0)) - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 172 schema
+struct<>
+-- !query 172 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 173
+SELECT cast(1 as decimal(3, 0))  - cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 173 schema
+struct<>
+-- !query 173 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) - CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 174
+SELECT cast(1 as decimal(5, 0))  - cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 174 schema
+struct<>
+-- !query 174 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) - CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 175
+SELECT cast(1 as decimal(10, 0)) - cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 175 schema
+struct<>
+-- !query 175 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 176
+SELECT cast(1 as decimal(20, 0)) - cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 176 schema
+struct<>
+-- !query 176 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) - CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 177
+SELECT cast(1 as tinyint) * cast(1 as decimal(3, 0)) FROM t
+-- !query 177 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) * CAST(1 AS DECIMAL(3,0))):decimal(7,0)>
+-- !query 177 output
+1
+
+
+-- !query 178
+SELECT cast(1 as tinyint) * cast(1 as decimal(5, 0)) FROM t
+-- !query 178 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) * CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(9,0)>
+-- !query 178 output
+1
+
+
+-- !query 179
+SELECT cast(1 as tinyint) * cast(1 as decimal(10, 0)) FROM t
+-- !query 179 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,0)>
+-- !query 179 output
+1
+
+
+-- !query 180
+SELECT cast(1 as tinyint) * cast(1 as decimal(20, 0)) FROM t
+-- !query 180 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(24,0)>
+-- !query 180 output
+1
+
+
+-- !query 181
+SELECT cast(1 as smallint) * cast(1 as decimal(3, 0)) FROM t
+-- !query 181 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) * CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(9,0)>
+-- !query 181 output
+1
+
+
+-- !query 182
+SELECT cast(1 as smallint) * cast(1 as decimal(5, 0)) FROM t
+-- !query 182 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) * CAST(1 AS DECIMAL(5,0))):decimal(11,0)>
+-- !query 182 output
+1
+
+
+-- !query 183
+SELECT cast(1 as smallint) * cast(1 as decimal(10, 0)) FROM t
+-- !query 183 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,0)>
+-- !query 183 output
+1
+
+
+-- !query 184
+SELECT cast(1 as smallint) * cast(1 as decimal(20, 0)) FROM t
+-- !query 184 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(26,0)>
+-- !query 184 output
+1
+
+
+-- !query 185
+SELECT cast(1 as int) * cast(1 as decimal(3, 0)) FROM t
+-- !query 185 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(14,0)>
+-- !query 185 output
+1
+
+
+-- !query 186
+SELECT cast(1 as int) * cast(1 as decimal(5, 0)) FROM t
+-- !query 186 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,0)>
+-- !query 186 output
+1
+
+
+-- !query 187
+SELECT cast(1 as int) * cast(1 as decimal(10, 0)) FROM t
+-- !query 187 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) * CAST(1 AS DECIMAL(10,0))):decimal(21,0)>
+-- !query 187 output
+1
+
+
+-- !query 188
+SELECT cast(1 as int) * cast(1 as decimal(20, 0)) FROM t
+-- !query 188 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,0)>
+-- !query 188 output
+1
+
+
+-- !query 189
+SELECT cast(1 as bigint) * cast(1 as decimal(3, 0)) FROM t
+-- !query 189 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(24,0)>
+-- !query 189 output
+1
+
+
+-- !query 190
+SELECT cast(1 as bigint) * cast(1 as decimal(5, 0)) FROM t
+-- !query 190 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(26,0)>
+-- !query 190 output
+1
+
+
+-- !query 191
+SELECT cast(1 as bigint) * cast(1 as decimal(10, 0)) FROM t
+-- !query 191 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,0)>
+-- !query 191 output
+1
+
+
+-- !query 192
+SELECT cast(1 as bigint) * cast(1 as decimal(20, 0)) FROM t
+-- !query 192 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) * CAST(1 AS DECIMAL(20,0))):decimal(38,0)>
+-- !query 192 output
+1
+
+
+-- !query 193
+SELECT cast(1 as float) * cast(1 as decimal(3, 0)) FROM t
+-- !query 193 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) * CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
+-- !query 193 output
+1.0
+
+
+-- !query 194
+SELECT cast(1 as float) * cast(1 as decimal(5, 0)) FROM t
+-- !query 194 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) * CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
+-- !query 194 output
+1.0
+
+
+-- !query 195
+SELECT cast(1 as float) * cast(1 as decimal(10, 0)) FROM t
+-- !query 195 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) * CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 195 output
+1.0
+
+
+-- !query 196
+SELECT cast(1 as float) * cast(1 as decimal(20, 0)) FROM t
+-- !query 196 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) * CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
+-- !query 196 output
+1.0
+
+
+-- !query 197
+SELECT cast(1 as double) * cast(1 as decimal(3, 0)) FROM t
+-- !query 197 schema
+struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
+-- !query 197 output
+1.0
+
+
+-- !query 198
+SELECT cast(1 as double) * cast(1 as decimal(5, 0)) FROM t
+-- !query 198 schema
+struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
+-- !query 198 output
+1.0
+
+
+-- !query 199
+SELECT cast(1 as double) * cast(1 as decimal(10, 0)) FROM t
+-- !query 199 schema
+struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 199 output
+1.0
+
+
+-- !query 200
+SELECT cast(1 as double) * cast(1 as decimal(20, 0)) FROM t
+-- !query 200 schema
+struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
+-- !query 200 output
+1.0
+
+
+-- !query 201
+SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(3, 0)) FROM t
+-- !query 201 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(14,0)>
+-- !query 201 output
+1
+
+
+-- !query 202
+SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(5, 0)) FROM t
+-- !query 202 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,0)>
+-- !query 202 output
+1
+
+
+-- !query 203
+SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(10, 0)) FROM t
+-- !query 203 schema
+struct<(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS DECIMAL(10,0))):decimal(21,0)>
+-- !query 203 output
+1
+
+
+-- !query 204
+SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(20, 0)) FROM t
+-- !query 204 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,0)>
+-- !query 204 output
+1
+
+
+-- !query 205
+SELECT cast('1' as binary) * cast(1 as decimal(3, 0)) FROM t
+-- !query 205 schema
+struct<>
+-- !query 205 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 206
+SELECT cast('1' as binary) * cast(1 as decimal(5, 0)) FROM t
+-- !query 206 schema
+struct<>
+-- !query 206 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 207
+SELECT cast('1' as binary) * cast(1 as decimal(10, 0)) FROM t
+-- !query 207 schema
+struct<>
+-- !query 207 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 208
+SELECT cast('1' as binary) * cast(1 as decimal(20, 0)) FROM t
+-- !query 208 schema
+struct<>
+-- !query 208 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 209
+SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(3, 0)) FROM t
+-- !query 209 schema
+struct<>
+-- !query 209 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 210
+SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(5, 0)) FROM t
+-- !query 210 schema
+struct<>
+-- !query 210 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 211
+SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(10, 0)) FROM t
+-- !query 211 schema
+struct<>
+-- !query 211 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 212
+SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(20, 0)) FROM t
+-- !query 212 schema
+struct<>
+-- !query 212 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 213
+SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(3, 0)) FROM t
+-- !query 213 schema
+struct<>
+-- !query 213 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 214
+SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(5, 0)) FROM t
+-- !query 214 schema
+struct<>
+-- !query 214 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 215
+SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(10, 0)) FROM t
+-- !query 215 schema
+struct<>
+-- !query 215 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 216
+SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(20, 0)) FROM t
+-- !query 216 schema
+struct<>
+-- !query 216 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 217
+SELECT cast(1 as decimal(3, 0))  * cast(1 as tinyint) FROM t
+-- !query 217 schema
+struct<(CAST(1 AS DECIMAL(3,0)) * CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(7,0)>
+-- !query 217 output
+1
+
+
+-- !query 218
+SELECT cast(1 as decimal(5, 0))  * cast(1 as tinyint) FROM t
+-- !query 218 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) * CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(9,0)>
+-- !query 218 output
+1
+
+
+-- !query 219
+SELECT cast(1 as decimal(10, 0)) * cast(1 as tinyint) FROM t
+-- !query 219 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(14,0)>
+-- !query 219 output
+1
+
+
+-- !query 220
+SELECT cast(1 as decimal(20, 0)) * cast(1 as tinyint) FROM t
+-- !query 220 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(24,0)>
+-- !query 220 output
+1
+
+
+-- !query 221
+SELECT cast(1 as decimal(3, 0))  * cast(1 as smallint) FROM t
+-- !query 221 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) * CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(9,0)>
+-- !query 221 output
+1
+
+
+-- !query 222
+SELECT cast(1 as decimal(5, 0))  * cast(1 as smallint) FROM t
+-- !query 222 schema
+struct<(CAST(1 AS DECIMAL(5,0)) * CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(11,0)>
+-- !query 222 output
+1
+
+
+-- !query 223
+SELECT cast(1 as decimal(10, 0)) * cast(1 as smallint) FROM t
+-- !query 223 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,0)>
+-- !query 223 output
+1
+
+
+-- !query 224
+SELECT cast(1 as decimal(20, 0)) * cast(1 as smallint) FROM t
+-- !query 224 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(26,0)>
+-- !query 224 output
+1
+
+
+-- !query 225
+SELECT cast(1 as decimal(3, 0))  * cast(1 as int) FROM t
+-- !query 225 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) * CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,0)>
+-- !query 225 output
+1
+
+
+-- !query 226
+SELECT cast(1 as decimal(5, 0))  * cast(1 as int) FROM t
+-- !query 226 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) * CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,0)>
+-- !query 226 output
+1
+
+
+-- !query 227
+SELECT cast(1 as decimal(10, 0)) * cast(1 as int) FROM t
+-- !query 227 schema
+struct<(CAST(1 AS DECIMAL(10,0)) * CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(21,0)>
+-- !query 227 output
+1
+
+
+-- !query 228
+SELECT cast(1 as decimal(20, 0)) * cast(1 as int) FROM t
+-- !query 228 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,0)>
+-- !query 228 output
+1
+
+
+-- !query 229
+SELECT cast(1 as decimal(3, 0))  * cast(1 as bigint) FROM t
+-- !query 229 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(24,0)>
+-- !query 229 output
+1
+
+
+-- !query 230
+SELECT cast(1 as decimal(5, 0))  * cast(1 as bigint) FROM t
+-- !query 230 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(26,0)>
+-- !query 230 output
+1
+
+
+-- !query 231
+SELECT cast(1 as decimal(10, 0)) * cast(1 as bigint) FROM t
+-- !query 231 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,0)>
+-- !query 231 output
+1
+
+
+-- !query 232
+SELECT cast(1 as decimal(20, 0)) * cast(1 as bigint) FROM t
+-- !query 232 schema
+struct<(CAST(1 AS DECIMAL(20,0)) * CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(38,0)>
+-- !query 232 output
+1
+
+
+-- !query 233
+SELECT cast(1 as decimal(3, 0))  * cast(1 as float) FROM t
+-- !query 233 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) * CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 233 output
+1.0
+
+
+-- !query 234
+SELECT cast(1 as decimal(5, 0))  * cast(1 as float) FROM t
+-- !query 234 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) * CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 234 output
+1.0
+
+
+-- !query 235
+SELECT cast(1 as decimal(10, 0)) * cast(1 as float) FROM t
+-- !query 235 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) * CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 235 output
+1.0
+
+
+-- !query 236
+SELECT cast(1 as decimal(20, 0)) * cast(1 as float) FROM t
+-- !query 236 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) * CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 236 output
+1.0
+
+
+-- !query 237
+SELECT cast(1 as decimal(3, 0))  * cast(1 as double) FROM t
+-- !query 237 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 237 output
+1.0
+
+
+-- !query 238
+SELECT cast(1 as decimal(5, 0))  * cast(1 as double) FROM t
+-- !query 238 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 238 output
+1.0
+
+
+-- !query 239
+SELECT cast(1 as decimal(10, 0)) * cast(1 as double) FROM t
+-- !query 239 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 239 output
+1.0
+
+
+-- !query 240
+SELECT cast(1 as decimal(20, 0)) * cast(1 as double) FROM t
+-- !query 240 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 240 output
+1.0
+
+
+-- !query 241
+SELECT cast(1 as decimal(3, 0))  * cast(1 as decimal(10, 0)) FROM t
+-- !query 241 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,0)>
+-- !query 241 output
+1
+
+
+-- !query 242
+SELECT cast(1 as decimal(5, 0))  * cast(1 as decimal(10, 0)) FROM t
+-- !query 242 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,0)>
+-- !query 242 output
+1
+
+
+-- !query 243
+SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(10, 0)) FROM t
+-- !query 243 schema
+struct<(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS DECIMAL(10,0))):decimal(21,0)>
+-- !query 243 output
+1
+
+
+-- !query 244
+SELECT cast(1 as decimal(20, 0)) * cast(1 as decimal(10, 0)) FROM t
+-- !query 244 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,0)>
+-- !query 244 output
+1
+
+
+-- !query 245
+SELECT cast(1 as decimal(3, 0))  * cast(1 as string) FROM t
+-- !query 245 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) * CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 245 output
+1.0
+
+
+-- !query 246
+SELECT cast(1 as decimal(5, 0))  * cast(1 as string) FROM t
+-- !query 246 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) * CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 246 output
+1.0
+
+
+-- !query 247
+SELECT cast(1 as decimal(10, 0)) * cast(1 as string) FROM t
+-- !query 247 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) * CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 247 output
+1.0
+
+
+-- !query 248
+SELECT cast(1 as decimal(20, 0)) * cast(1 as string) FROM t
+-- !query 248 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) * CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 248 output
+1.0
+
+
+-- !query 249
+SELECT cast(1 as decimal(3, 0))  * cast('1' as binary) FROM t
+-- !query 249 schema
+struct<>
+-- !query 249 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 250
+SELECT cast(1 as decimal(5, 0))  * cast('1' as binary) FROM t
+-- !query 250 schema
+struct<>
+-- !query 250 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 251
+SELECT cast(1 as decimal(10, 0)) * cast('1' as binary) FROM t
+-- !query 251 schema
+struct<>
+-- !query 251 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 252
+SELECT cast(1 as decimal(20, 0)) * cast('1' as binary) FROM t
+-- !query 252 schema
+struct<>
+-- !query 252 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 253
+SELECT cast(1 as decimal(3, 0))  * cast(1 as boolean) FROM t
+-- !query 253 schema
+struct<>
+-- !query 253 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+
+
+-- !query 254
+SELECT cast(1 as decimal(5, 0))  * cast(1 as boolean) FROM t
+-- !query 254 schema
+struct<>
+-- !query 254 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+
+
+-- !query 255
+SELECT cast(1 as decimal(10, 0)) * cast(1 as boolean) FROM t
+-- !query 255 schema
+struct<>
+-- !query 255 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 256
+SELECT cast(1 as decimal(20, 0)) * cast(1 as boolean) FROM t
+-- !query 256 schema
+struct<>
+-- !query 256 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+
+
+-- !query 257
+SELECT cast(1 as decimal(3, 0))  * cast('2017*12*11 09:30:00.0' as timestamp) FROM t
+-- !query 257 schema
+struct<>
+-- !query 257 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 258
+SELECT cast(1 as decimal(5, 0))  * cast('2017*12*11 09:30:00.0' as timestamp) FROM t
+-- !query 258 schema
+struct<>
+-- !query 258 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 259
+SELECT cast(1 as decimal(10, 0)) * cast('2017*12*11 09:30:00.0' as timestamp) FROM t
+-- !query 259 schema
+struct<>
+-- !query 259 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 260
+SELECT cast(1 as decimal(20, 0)) * cast('2017*12*11 09:30:00.0' as timestamp) FROM t
+-- !query 260 schema
+struct<>
+-- !query 260 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 261
+SELECT cast(1 as decimal(3, 0))  * cast('2017*12*11 09:30:00' as date) FROM t
+-- !query 261 schema
+struct<>
+-- !query 261 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 262
+SELECT cast(1 as decimal(5, 0))  * cast('2017*12*11 09:30:00' as date) FROM t
+-- !query 262 schema
+struct<>
+-- !query 262 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 263
+SELECT cast(1 as decimal(10, 0)) * cast('2017*12*11 09:30:00' as date) FROM t
+-- !query 263 schema
+struct<>
+-- !query 263 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 264
+SELECT cast(1 as decimal(20, 0)) * cast('2017*12*11 09:30:00' as date) FROM t
+-- !query 264 schema
+struct<>
+-- !query 264 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 265
+SELECT cast(1 as tinyint) / cast(1 as decimal(3, 0)) FROM t
+-- !query 265 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) / CAST(1 AS DECIMAL(3,0))):decimal(9,6)>
+-- !query 265 output
+1
+
+
+-- !query 266
+SELECT cast(1 as tinyint) / cast(1 as decimal(5, 0)) FROM t
+-- !query 266 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) / CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(9,6)>
+-- !query 266 output
+1
+
+
+-- !query 267
+SELECT cast(1 as tinyint) / cast(1 as decimal(10, 0)) FROM t
+-- !query 267 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,11)>
+-- !query 267 output
+1
+
+
+-- !query 268
+SELECT cast(1 as tinyint) / cast(1 as decimal(20, 0)) FROM t
+-- !query 268 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(24,21)>
+-- !query 268 output
+1
+
+
+-- !query 269
+SELECT cast(1 as smallint) / cast(1 as decimal(3, 0)) FROM t
+-- !query 269 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) / CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(11,6)>
+-- !query 269 output
+1
+
+
+-- !query 270
+SELECT cast(1 as smallint) / cast(1 as decimal(5, 0)) FROM t
+-- !query 270 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) / CAST(1 AS DECIMAL(5,0))):decimal(11,6)>
+-- !query 270 output
+1
+
+
+-- !query 271
+SELECT cast(1 as smallint) / cast(1 as decimal(10, 0)) FROM t
+-- !query 271 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,11)>
+-- !query 271 output
+1
+
+
+-- !query 272
+SELECT cast(1 as smallint) / cast(1 as decimal(20, 0)) FROM t
+-- !query 272 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(26,21)>
+-- !query 272 output
+1
+
+
+-- !query 273
+SELECT cast(1 as int) / cast(1 as decimal(3, 0)) FROM t
+-- !query 273 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(16,6)>
+-- !query 273 output
+1
+
+
+-- !query 274
+SELECT cast(1 as int) / cast(1 as decimal(5, 0)) FROM t
+-- !query 274 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,6)>
+-- !query 274 output
+1
+
+
+-- !query 275
+SELECT cast(1 as int) / cast(1 as decimal(10, 0)) FROM t
+-- !query 275 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) / CAST(1 AS DECIMAL(10,0))):decimal(21,11)>
+-- !query 275 output
+1
+
+
+-- !query 276
+SELECT cast(1 as int) / cast(1 as decimal(20, 0)) FROM t
+-- !query 276 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,21)>
+-- !query 276 output
+1
+
+
+-- !query 277
+SELECT cast(1 as bigint) / cast(1 as decimal(3, 0)) FROM t
+-- !query 277 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(26,6)>
+-- !query 277 output
+1
+
+
+-- !query 278
+SELECT cast(1 as bigint) / cast(1 as decimal(5, 0)) FROM t
+-- !query 278 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(26,6)>
+-- !query 278 output
+1
+
+
+-- !query 279
+SELECT cast(1 as bigint) / cast(1 as decimal(10, 0)) FROM t
+-- !query 279 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,11)>
+-- !query 279 output
+1
+
+
+-- !query 280
+SELECT cast(1 as bigint) / cast(1 as decimal(20, 0)) FROM t
+-- !query 280 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) / CAST(1 AS DECIMAL(20,0))):decimal(38,19)>
+-- !query 280 output
+1
+
+
+-- !query 281
+SELECT cast(1 as float) / cast(1 as decimal(3, 0)) FROM t
+-- !query 281 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) AS DOUBLE)):double>
+-- !query 281 output
+1.0
+
+
+-- !query 282
+SELECT cast(1 as float) / cast(1 as decimal(5, 0)) FROM t
+-- !query 282 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) AS DOUBLE)):double>
+-- !query 282 output
+1.0
+
+
+-- !query 283
+SELECT cast(1 as float) / cast(1 as decimal(10, 0)) FROM t
+-- !query 283 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) AS DOUBLE)):double>
+-- !query 283 output
+1.0
+
+
+-- !query 284
+SELECT cast(1 as float) / cast(1 as decimal(20, 0)) FROM t
+-- !query 284 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) AS DOUBLE)):double>
+-- !query 284 output
+1.0
+
+
+-- !query 285
+SELECT cast(1 as double) / cast(1 as decimal(3, 0)) FROM t
+-- !query 285 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
+-- !query 285 output
+1.0
+
+
+-- !query 286
+SELECT cast(1 as double) / cast(1 as decimal(5, 0)) FROM t
+-- !query 286 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
+-- !query 286 output
+1.0
+
+
+-- !query 287
+SELECT cast(1 as double) / cast(1 as decimal(10, 0)) FROM t
+-- !query 287 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 287 output
+1.0
+
+
+-- !query 288
+SELECT cast(1 as double) / cast(1 as decimal(20, 0)) FROM t
+-- !query 288 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
+-- !query 288 output
+1.0
+
+
+-- !query 289
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(3, 0)) FROM t
+-- !query 289 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(16,6)>
+-- !query 289 output
+1
+
+
+-- !query 290
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(5, 0)) FROM t
+-- !query 290 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,6)>
+-- !query 290 output
+1
+
+
+-- !query 291
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(10, 0)) FROM t
+-- !query 291 schema
+struct<(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS DECIMAL(10,0))):decimal(21,11)>
+-- !query 291 output
+1
+
+
+-- !query 292
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(20, 0)) FROM t
+-- !query 292 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,21)>
+-- !query 292 output
+1
+
+
+-- !query 293
+SELECT cast('1' as binary) / cast(1 as decimal(3, 0)) FROM t
+-- !query 293 schema
+struct<>
+-- !query 293 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 294
+SELECT cast('1' as binary) / cast(1 as decimal(5, 0)) FROM t
+-- !query 294 schema
+struct<>
+-- !query 294 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 295
+SELECT cast('1' as binary) / cast(1 as decimal(10, 0)) FROM t
+-- !query 295 schema
+struct<>
+-- !query 295 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 296
+SELECT cast('1' as binary) / cast(1 as decimal(20, 0)) FROM t
+-- !query 296 schema
+struct<>
+-- !query 296 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 297
+SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(3, 0)) FROM t
+-- !query 297 schema
+struct<>
+-- !query 297 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 298
+SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(5, 0)) FROM t
+-- !query 298 schema
+struct<>
+-- !query 298 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 299
+SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(10, 0)) FROM t
+-- !query 299 schema
+struct<>
+-- !query 299 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 300
+SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(20, 0)) FROM t
+-- !query 300 schema
+struct<>
+-- !query 300 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 301
+SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(3, 0)) FROM t
+-- !query 301 schema
+struct<>
+-- !query 301 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 302
+SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(5, 0)) FROM t
+-- !query 302 schema
+struct<>
+-- !query 302 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 303
+SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(10, 0)) FROM t
+-- !query 303 schema
+struct<>
+-- !query 303 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 304
+SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(20, 0)) FROM t
+-- !query 304 schema
+struct<>
+-- !query 304 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 305
+SELECT cast(1 as decimal(3, 0))  / cast(1 as tinyint) FROM t
+-- !query 305 schema
+struct<(CAST(1 AS DECIMAL(3,0)) / CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(9,6)>
+-- !query 305 output
+1
+
+
+-- !query 306
+SELECT cast(1 as decimal(5, 0))  / cast(1 as tinyint) FROM t
+-- !query 306 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) / CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(11,6)>
+-- !query 306 output
+1
+
+
+-- !query 307
+SELECT cast(1 as decimal(10, 0)) / cast(1 as tinyint) FROM t
+-- !query 307 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(16,6)>
+-- !query 307 output
+1
+
+
+-- !query 308
+SELECT cast(1 as decimal(20, 0)) / cast(1 as tinyint) FROM t
+-- !query 308 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(26,6)>
+-- !query 308 output
+1
+
+
+-- !query 309
+SELECT cast(1 as decimal(3, 0))  / cast(1 as smallint) FROM t
+-- !query 309 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) / CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(9,6)>
+-- !query 309 output
+1
+
+
+-- !query 310
+SELECT cast(1 as decimal(5, 0))  / cast(1 as smallint) FROM t
+-- !query 310 schema
+struct<(CAST(1 AS DECIMAL(5,0)) / CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(11,6)>
+-- !query 310 output
+1
+
+
+-- !query 311
+SELECT cast(1 as decimal(10, 0)) / cast(1 as smallint) FROM t
+-- !query 311 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,6)>
+-- !query 311 output
+1
+
+
+-- !query 312
+SELECT cast(1 as decimal(20, 0)) / cast(1 as smallint) FROM t
+-- !query 312 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(26,6)>
+-- !query 312 output
+1
+
+
+-- !query 313
+SELECT cast(1 as decimal(3, 0))  / cast(1 as int) FROM t
+-- !query 313 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,11)>
+-- !query 313 output
+1
+
+
+-- !query 314
+SELECT cast(1 as decimal(5, 0))  / cast(1 as int) FROM t
+-- !query 314 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,11)>
+-- !query 314 output
+1
+
+
+-- !query 315
+SELECT cast(1 as decimal(10, 0)) / cast(1 as int) FROM t
+-- !query 315 schema
+struct<(CAST(1 AS DECIMAL(10,0)) / CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(21,11)>
+-- !query 315 output
+1
+
+
+-- !query 316
+SELECT cast(1 as decimal(20, 0)) / cast(1 as int) FROM t
+-- !query 316 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,11)>
+-- !query 316 output
+1
+
+
+-- !query 317
+SELECT cast(1 as decimal(3, 0))  / cast(1 as bigint) FROM t
+-- !query 317 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(24,21)>
+-- !query 317 output
+1
+
+
+-- !query 318
+SELECT cast(1 as decimal(5, 0))  / cast(1 as bigint) FROM t
+-- !query 318 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(26,21)>
+-- !query 318 output
+1
+
+
+-- !query 319
+SELECT cast(1 as decimal(10, 0)) / cast(1 as bigint) FROM t
+-- !query 319 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,21)>
+-- !query 319 output
+1
+
+
+-- !query 320
+SELECT cast(1 as decimal(20, 0)) / cast(1 as bigint) FROM t
+-- !query 320 schema
+struct<(CAST(1 AS DECIMAL(20,0)) / CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(38,19)>
+-- !query 320 output
+1
+
+
+-- !query 321
+SELECT cast(1 as decimal(3, 0))  / cast(1 as float) FROM t
+-- !query 321 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 321 output
+1.0
+
+
+-- !query 322
+SELECT cast(1 as decimal(5, 0))  / cast(1 as float) FROM t
+-- !query 322 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 322 output
+1.0
+
+
+-- !query 323
+SELECT cast(1 as decimal(10, 0)) / cast(1 as float) FROM t
+-- !query 323 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 323 output
+1.0
+
+
+-- !query 324
+SELECT cast(1 as decimal(20, 0)) / cast(1 as float) FROM t
+-- !query 324 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 324 output
+1.0
+
+
+-- !query 325
+SELECT cast(1 as decimal(3, 0))  / cast(1 as double) FROM t
+-- !query 325 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
+-- !query 325 output
+1.0
+
+
+-- !query 326
+SELECT cast(1 as decimal(5, 0))  / cast(1 as double) FROM t
+-- !query 326 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
+-- !query 326 output
+1.0
+
+
+-- !query 327
+SELECT cast(1 as decimal(10, 0)) / cast(1 as double) FROM t
+-- !query 327 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
+-- !query 327 output
+1.0
+
+
+-- !query 328
+SELECT cast(1 as decimal(20, 0)) / cast(1 as double) FROM t
+-- !query 328 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
+-- !query 328 output
+1.0
+
+
+-- !query 329
+SELECT cast(1 as decimal(3, 0))  / cast(1 as decimal(10, 0)) FROM t
+-- !query 329 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,11)>
+-- !query 329 output
+1
+
+
+-- !query 330
+SELECT cast(1 as decimal(5, 0))  / cast(1 as decimal(10, 0)) FROM t
+-- !query 330 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,11)>
+-- !query 330 output
+1
+
+
+-- !query 331
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(10, 0)) FROM t
+-- !query 331 schema
+struct<(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS DECIMAL(10,0))):decimal(21,11)>
+-- !query 331 output
+1
+
+
+-- !query 332
+SELECT cast(1 as decimal(20, 0)) / cast(1 as decimal(10, 0)) FROM t
+-- !query 332 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,11)>
+-- !query 332 output
+1
+
+
+-- !query 333
+SELECT cast(1 as decimal(3, 0))  / cast(1 as string) FROM t
+-- !query 333 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 333 output
+1.0
+
+
+-- !query 334
+SELECT cast(1 as decimal(5, 0))  / cast(1 as string) FROM t
+-- !query 334 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 334 output
+1.0
+
+
+-- !query 335
+SELECT cast(1 as decimal(10, 0)) / cast(1 as string) FROM t
+-- !query 335 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 335 output
+1.0
+
+
+-- !query 336
+SELECT cast(1 as decimal(20, 0)) / cast(1 as string) FROM t
+-- !query 336 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 336 output
+1.0
+
+
+-- !query 337
+SELECT cast(1 as decimal(3, 0))  / cast('1' as binary) FROM t
+-- !query 337 schema
+struct<>
+-- !query 337 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 338
+SELECT cast(1 as decimal(5, 0))  / cast('1' as binary) FROM t
+-- !query 338 schema
+struct<>
+-- !query 338 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 339
+SELECT cast(1 as decimal(10, 0)) / cast('1' as binary) FROM t
+-- !query 339 schema
+struct<>
+-- !query 339 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 340
+SELECT cast(1 as decimal(20, 0)) / cast('1' as binary) FROM t
+-- !query 340 schema
+struct<>
+-- !query 340 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 341
+SELECT cast(1 as decimal(3, 0))  / cast(1 as boolean) FROM t
+-- !query 341 schema
+struct<>
+-- !query 341 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+
+
+-- !query 342
+SELECT cast(1 as decimal(5, 0))  / cast(1 as boolean) FROM t
+-- !query 342 schema
+struct<>
+-- !query 342 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+
+
+-- !query 343
+SELECT cast(1 as decimal(10, 0)) / cast(1 as boolean) FROM t
+-- !query 343 schema
+struct<>
+-- !query 343 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 344
+SELECT cast(1 as decimal(20, 0)) / cast(1 as boolean) FROM t
+-- !query 344 schema
+struct<>
+-- !query 344 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+
+
+-- !query 345
+SELECT cast(1 as decimal(3, 0))  / cast('2017/12/11 09:30:00.0' as timestamp) FROM t
+-- !query 345 schema
+struct<>
+-- !query 345 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 346
+SELECT cast(1 as decimal(5, 0))  / cast('2017/12/11 09:30:00.0' as timestamp) FROM t
+-- !query 346 schema
+struct<>
+-- !query 346 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 347
+SELECT cast(1 as decimal(10, 0)) / cast('2017/12/11 09:30:00.0' as timestamp) FROM t
+-- !query 347 schema
+struct<>
+-- !query 347 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 348
+SELECT cast(1 as decimal(20, 0)) / cast('2017/12/11 09:30:00.0' as timestamp) FROM t
+-- !query 348 schema
+struct<>
+-- !query 348 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 349
+SELECT cast(1 as decimal(3, 0))  / cast('2017/12/11 09:30:00' as date) FROM t
+-- !query 349 schema
+struct<>
+-- !query 349 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 350
+SELECT cast(1 as decimal(5, 0))  / cast('2017/12/11 09:30:00' as date) FROM t
+-- !query 350 schema
+struct<>
+-- !query 350 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 351
+SELECT cast(1 as decimal(10, 0)) / cast('2017/12/11 09:30:00' as date) FROM t
+-- !query 351 schema
+struct<>
+-- !query 351 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 352
+SELECT cast(1 as decimal(20, 0)) / cast('2017/12/11 09:30:00' as date) FROM t
+-- !query 352 schema
+struct<>
+-- !query 352 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 353
+SELECT cast(1 as tinyint) % cast(1 as decimal(3, 0)) FROM t
+-- !query 353 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) % CAST(1 AS DECIMAL(3,0))):decimal(3,0)>
+-- !query 353 output
+0
+
+
+-- !query 354
+SELECT cast(1 as tinyint) % cast(1 as decimal(5, 0)) FROM t
+-- !query 354 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) % CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(3,0)>
+-- !query 354 output
+0
+
+
+-- !query 355
+SELECT cast(1 as tinyint) % cast(1 as decimal(10, 0)) FROM t
+-- !query 355 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 355 output
+0
+
+
+-- !query 356
+SELECT cast(1 as tinyint) % cast(1 as decimal(20, 0)) FROM t
+-- !query 356 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(3,0)>
+-- !query 356 output
+0
+
+
+-- !query 357
+SELECT cast(1 as smallint) % cast(1 as decimal(3, 0)) FROM t
+-- !query 357 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) % CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(3,0)>
+-- !query 357 output
+0
+
+
+-- !query 358
+SELECT cast(1 as smallint) % cast(1 as decimal(5, 0)) FROM t
+-- !query 358 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) % CAST(1 AS DECIMAL(5,0))):decimal(5,0)>
+-- !query 358 output
+0
+
+
+-- !query 359
+SELECT cast(1 as smallint) % cast(1 as decimal(10, 0)) FROM t
+-- !query 359 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 359 output
+0
+
+
+-- !query 360
+SELECT cast(1 as smallint) % cast(1 as decimal(20, 0)) FROM t
+-- !query 360 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(5,0)>
+-- !query 360 output
+0
+
+
+-- !query 361
+SELECT cast(1 as int) % cast(1 as decimal(3, 0)) FROM t
+-- !query 361 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 361 output
+0
+
+
+-- !query 362
+SELECT cast(1 as int) % cast(1 as decimal(5, 0)) FROM t
+-- !query 362 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 362 output
+0
+
+
+-- !query 363
+SELECT cast(1 as int) % cast(1 as decimal(10, 0)) FROM t
+-- !query 363 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) % CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
+-- !query 363 output
+0
+
+
+-- !query 364
+SELECT cast(1 as int) % cast(1 as decimal(20, 0)) FROM t
+-- !query 364 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 364 output
+0
+
+
+-- !query 365
+SELECT cast(1 as bigint) % cast(1 as decimal(3, 0)) FROM t
+-- !query 365 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(3,0)>
+-- !query 365 output
+0
+
+
+-- !query 366
+SELECT cast(1 as bigint) % cast(1 as decimal(5, 0)) FROM t
+-- !query 366 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(5,0)>
+-- !query 366 output
+0
+
+
+-- !query 367
+SELECT cast(1 as bigint) % cast(1 as decimal(10, 0)) FROM t
+-- !query 367 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 367 output
+0
+
+
+-- !query 368
+SELECT cast(1 as bigint) % cast(1 as decimal(20, 0)) FROM t
+-- !query 368 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) % CAST(1 AS DECIMAL(20,0))):decimal(20,0)>
+-- !query 368 output
+0
+
+
+-- !query 369
+SELECT cast(1 as float) % cast(1 as decimal(3, 0)) FROM t
+-- !query 369 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) % CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
+-- !query 369 output
+0.0
+
+
+-- !query 370
+SELECT cast(1 as float) % cast(1 as decimal(5, 0)) FROM t
+-- !query 370 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) % CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
+-- !query 370 output
+0.0
+
+
+-- !query 371
+SELECT cast(1 as float) % cast(1 as decimal(10, 0)) FROM t
+-- !query 371 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) % CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 371 output
+0.0
+
+
+-- !query 372
+SELECT cast(1 as float) % cast(1 as decimal(20, 0)) FROM t
+-- !query 372 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) % CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
+-- !query 372 output
+0.0
+
+
+-- !query 373
+SELECT cast(1 as double) % cast(1 as decimal(3, 0)) FROM t
+-- !query 373 schema
+struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
+-- !query 373 output
+0.0
+
+
+-- !query 374
+SELECT cast(1 as double) % cast(1 as decimal(5, 0)) FROM t
+-- !query 374 schema
+struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
+-- !query 374 output
+0.0
+
+
+-- !query 375
+SELECT cast(1 as double) % cast(1 as decimal(10, 0)) FROM t
+-- !query 375 schema
+struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 375 output
+0.0
+
+
+-- !query 376
+SELECT cast(1 as double) % cast(1 as decimal(20, 0)) FROM t
+-- !query 376 schema
+struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
+-- !query 376 output
+0.0
+
+
+-- !query 377
+SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(3, 0)) FROM t
+-- !query 377 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 377 output
+0
+
+
+-- !query 378
+SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(5, 0)) FROM t
+-- !query 378 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 378 output
+0
+
+
+-- !query 379
+SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(10, 0)) FROM t
+-- !query 379 schema
+struct<(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
+-- !query 379 output
+0
+
+
+-- !query 380
+SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(20, 0)) FROM t
+-- !query 380 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 380 output
+0
+
+
+-- !query 381
+SELECT cast('1' as binary) % cast(1 as decimal(3, 0)) FROM t
+-- !query 381 schema
+struct<>
+-- !query 381 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 382
+SELECT cast('1' as binary) % cast(1 as decimal(5, 0)) FROM t
+-- !query 382 schema
+struct<>
+-- !query 382 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 383
+SELECT cast('1' as binary) % cast(1 as decimal(10, 0)) FROM t
+-- !query 383 schema
+struct<>
+-- !query 383 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 384
+SELECT cast('1' as binary) % cast(1 as decimal(20, 0)) FROM t
+-- !query 384 schema
+struct<>
+-- !query 384 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 385
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(3, 0)) FROM t
+-- !query 385 schema
+struct<>
+-- !query 385 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 386
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(5, 0)) FROM t
+-- !query 386 schema
+struct<>
+-- !query 386 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 387
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(10, 0)) FROM t
+-- !query 387 schema
+struct<>
+-- !query 387 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 388
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(20, 0)) FROM t
+-- !query 388 schema
+struct<>
+-- !query 388 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 389
+SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(3, 0)) FROM t
+-- !query 389 schema
+struct<>
+-- !query 389 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 390
+SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(5, 0)) FROM t
+-- !query 390 schema
+struct<>
+-- !query 390 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 391
+SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(10, 0)) FROM t
+-- !query 391 schema
+struct<>
+-- !query 391 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 392
+SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(20, 0)) FROM t
+-- !query 392 schema
+struct<>
+-- !query 392 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 393
+SELECT cast(1 as decimal(3, 0))  % cast(1 as tinyint) FROM t
+-- !query 393 schema
+struct<(CAST(1 AS DECIMAL(3,0)) % CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(3,0)>
+-- !query 393 output
+0
+
+
+-- !query 394
+SELECT cast(1 as decimal(5, 0))  % cast(1 as tinyint) FROM t
+-- !query 394 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) % CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(3,0)>
+-- !query 394 output
+0
+
+
+-- !query 395
+SELECT cast(1 as decimal(10, 0)) % cast(1 as tinyint) FROM t
+-- !query 395 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 395 output
+0
+
+
+-- !query 396
+SELECT cast(1 as decimal(20, 0)) % cast(1 as tinyint) FROM t
+-- !query 396 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(3,0)>
+-- !query 396 output
+0
+
+
+-- !query 397
+SELECT cast(1 as decimal(3, 0))  % cast(1 as smallint) FROM t
+-- !query 397 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) % CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(3,0)>
+-- !query 397 output
+0
+
+
+-- !query 398
+SELECT cast(1 as decimal(5, 0))  % cast(1 as smallint) FROM t
+-- !query 398 schema
+struct<(CAST(1 AS DECIMAL(5,0)) % CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(5,0)>
+-- !query 398 output
+0
+
+
+-- !query 399
+SELECT cast(1 as decimal(10, 0)) % cast(1 as smallint) FROM t
+-- !query 399 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 399 output
+0
+
+
+-- !query 400
+SELECT cast(1 as decimal(20, 0)) % cast(1 as smallint) FROM t
+-- !query 400 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(5,0)>
+-- !query 400 output
+0
+
+
+-- !query 401
+SELECT cast(1 as decimal(3, 0))  % cast(1 as int) FROM t
+-- !query 401 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) % CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 401 output
+0
+
+
+-- !query 402
+SELECT cast(1 as decimal(5, 0))  % cast(1 as int) FROM t
+-- !query 402 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) % CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 402 output
+0
+
+
+-- !query 403
+SELECT cast(1 as decimal(10, 0)) % cast(1 as int) FROM t
+-- !query 403 schema
+struct<(CAST(1 AS DECIMAL(10,0)) % CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(10,0)>
+-- !query 403 output
+0
+
+
+-- !query 404
+SELECT cast(1 as decimal(20, 0)) % cast(1 as int) FROM t
+-- !query 404 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 404 output
+0
+
+
+-- !query 405
+SELECT cast(1 as decimal(3, 0))  % cast(1 as bigint) FROM t
+-- !query 405 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(3,0)>
+-- !query 405 output
+0
+
+
+-- !query 406
+SELECT cast(1 as decimal(5, 0))  % cast(1 as bigint) FROM t
+-- !query 406 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(5,0)>
+-- !query 406 output
+0
+
+
+-- !query 407
+SELECT cast(1 as decimal(10, 0)) % cast(1 as bigint) FROM t
+-- !query 407 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 407 output
+0
+
+
+-- !query 408
+SELECT cast(1 as decimal(20, 0)) % cast(1 as bigint) FROM t
+-- !query 408 schema
+struct<(CAST(1 AS DECIMAL(20,0)) % CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(20,0)>
+-- !query 408 output
+0
+
+
+-- !query 409
+SELECT cast(1 as decimal(3, 0))  % cast(1 as float) FROM t
+-- !query 409 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) % CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 409 output
+0.0
+
+
+-- !query 410
+SELECT cast(1 as decimal(5, 0))  % cast(1 as float) FROM t
+-- !query 410 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) % CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 410 output
+0.0
+
+
+-- !query 411
+SELECT cast(1 as decimal(10, 0)) % cast(1 as float) FROM t
+-- !query 411 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) % CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 411 output
+0.0
+
+
+-- !query 412
+SELECT cast(1 as decimal(20, 0)) % cast(1 as float) FROM t
+-- !query 412 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) % CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 412 output
+0.0
+
+
+-- !query 413
+SELECT cast(1 as decimal(3, 0))  % cast(1 as double) FROM t
+-- !query 413 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 413 output
+0.0
+
+
+-- !query 414
+SELECT cast(1 as decimal(5, 0))  % cast(1 as double) FROM t
+-- !query 414 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 414 output
+0.0
+
+
+-- !query 415
+SELECT cast(1 as decimal(10, 0)) % cast(1 as double) FROM t
+-- !query 415 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 415 output
+0.0
+
+
+-- !query 416
+SELECT cast(1 as decimal(20, 0)) % cast(1 as double) FROM t
+-- !query 416 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 416 output
+0.0
+
+
+-- !query 417
+SELECT cast(1 as decimal(3, 0))  % cast(1 as decimal(10, 0)) FROM t
+-- !query 417 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 417 output
+0
+
+
+-- !query 418
+SELECT cast(1 as decimal(5, 0))  % cast(1 as decimal(10, 0)) FROM t
+-- !query 418 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 418 output
+0
+
+
+-- !query 419
+SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(10, 0)) FROM t
+-- !query 419 schema
+struct<(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
+-- !query 419 output
+0
+
+
+-- !query 420
+SELECT cast(1 as decimal(20, 0)) % cast(1 as decimal(10, 0)) FROM t
+-- !query 420 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 420 output
+0
+
+
+-- !query 421
+SELECT cast(1 as decimal(3, 0))  % cast(1 as string) FROM t
+-- !query 421 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) % CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 421 output
+0.0
+
+
+-- !query 422
+SELECT cast(1 as decimal(5, 0))  % cast(1 as string) FROM t
+-- !query 422 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) % CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 422 output
+0.0
+
+
+-- !query 423
+SELECT cast(1 as decimal(10, 0)) % cast(1 as string) FROM t
+-- !query 423 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) % CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 423 output
+0.0
+
+
+-- !query 424
+SELECT cast(1 as decimal(20, 0)) % cast(1 as string) FROM t
+-- !query 424 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) % CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 424 output
+0.0
+
+
+-- !query 425
+SELECT cast(1 as decimal(3, 0))  % cast('1' as binary) FROM t
+-- !query 425 schema
+struct<>
+-- !query 425 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 426
+SELECT cast(1 as decimal(5, 0))  % cast('1' as binary) FROM t
+-- !query 426 schema
+struct<>
+-- !query 426 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 427
+SELECT cast(1 as decimal(10, 0)) % cast('1' as binary) FROM t
+-- !query 427 schema
+struct<>
+-- !query 427 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 428
+SELECT cast(1 as decimal(20, 0)) % cast('1' as binary) FROM t
+-- !query 428 schema
+struct<>
+-- !query 428 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 429
+SELECT cast(1 as decimal(3, 0))  % cast(1 as boolean) FROM t
+-- !query 429 schema
+struct<>
+-- !query 429 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+
+
+-- !query 430
+SELECT cast(1 as decimal(5, 0))  % cast(1 as boolean) FROM t
+-- !query 430 schema
+struct<>
+-- !query 430 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+
+
+-- !query 431
+SELECT cast(1 as decimal(10, 0)) % cast(1 as boolean) FROM t
+-- !query 431 schema
+struct<>
+-- !query 431 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 432
+SELECT cast(1 as decimal(20, 0)) % cast(1 as boolean) FROM t
+-- !query 432 schema
+struct<>
+-- !query 432 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+
+
+-- !query 433
+SELECT cast(1 as decimal(3, 0))  % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 433 schema
+struct<>
+-- !query 433 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 434
+SELECT cast(1 as decimal(5, 0))  % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 434 schema
+struct<>
+-- !query 434 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 435
+SELECT cast(1 as decimal(10, 0)) % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 435 schema
+struct<>
+-- !query 435 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 436
+SELECT cast(1 as decimal(20, 0)) % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 436 schema
+struct<>
+-- !query 436 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 437
+SELECT cast(1 as decimal(3, 0))  % cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 437 schema
+struct<>
+-- !query 437 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 438
+SELECT cast(1 as decimal(5, 0))  % cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 438 schema
+struct<>
+-- !query 438 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 439
+SELECT cast(1 as decimal(10, 0)) % cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 439 schema
+struct<>
+-- !query 439 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 440
+SELECT cast(1 as decimal(20, 0)) % cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 440 schema
+struct<>
+-- !query 440 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 441
+SELECT pmod(cast(1 as tinyint), cast(1 as decimal(3, 0))) FROM t
+-- !query 441 schema
+struct<pmod(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)), CAST(1 AS DECIMAL(3,0))):decimal(3,0)>
+-- !query 441 output
+0
+
+
+-- !query 442
+SELECT pmod(cast(1 as tinyint), cast(1 as decimal(5, 0))) FROM t
+-- !query 442 schema
+struct<pmod(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)), CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(3,0)>
+-- !query 442 output
+0
+
+
+-- !query 443
+SELECT pmod(cast(1 as tinyint), cast(1 as decimal(10, 0))) FROM t
+-- !query 443 schema
+struct<pmod(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 443 output
+0
+
+
+-- !query 444
+SELECT pmod(cast(1 as tinyint), cast(1 as decimal(20, 0))) FROM t
+-- !query 444 schema
+struct<pmod(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(3,0)>
+-- !query 444 output
+0
+
+
+-- !query 445
+SELECT pmod(cast(1 as smallint), cast(1 as decimal(3, 0))) FROM t
+-- !query 445 schema
+struct<pmod(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)), CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(3,0)>
+-- !query 445 output
+0
+
+
+-- !query 446
+SELECT pmod(cast(1 as smallint), cast(1 as decimal(5, 0))) FROM t
+-- !query 446 schema
+struct<pmod(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)), CAST(1 AS DECIMAL(5,0))):decimal(5,0)>
+-- !query 446 output
+0
+
+
+-- !query 447
+SELECT pmod(cast(1 as smallint), cast(1 as decimal(10, 0))) FROM t
+-- !query 447 schema
+struct<pmod(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 447 output
+0
+
+
+-- !query 448
+SELECT pmod(cast(1 as smallint), cast(1 as decimal(20, 0))) FROM t
+-- !query 448 schema
+struct<pmod(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(5,0)>
+-- !query 448 output
+0
+
+
+-- !query 449
+SELECT pmod(cast(1 as int), cast(1 as decimal(3, 0))) FROM t
+-- !query 449 schema
+struct<pmod(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 449 output
+0
+
+
+-- !query 450
+SELECT pmod(cast(1 as int), cast(1 as decimal(5, 0))) FROM t
+-- !query 450 schema
+struct<pmod(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 450 output
+0
+
+
+-- !query 451
+SELECT pmod(cast(1 as int), cast(1 as decimal(10, 0))) FROM t
+-- !query 451 schema
+struct<pmod(CAST(CAST(1 AS INT) AS DECIMAL(10,0)), CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
+-- !query 451 output
+0
+
+
+-- !query 452
+SELECT pmod(cast(1 as int), cast(1 as decimal(20, 0))) FROM t
+-- !query 452 schema
+struct<pmod(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 452 output
+0
+
+
+-- !query 453
+SELECT pmod(cast(1 as bigint), cast(1 as decimal(3, 0))) FROM t
+-- !query 453 schema
+struct<pmod(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(3,0)>
+-- !query 453 output
+0
+
+
+-- !query 454
+SELECT pmod(cast(1 as bigint), cast(1 as decimal(5, 0))) FROM t
+-- !query 454 schema
+struct<pmod(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(5,0)>
+-- !query 454 output
+0
+
+
+-- !query 455
+SELECT pmod(cast(1 as bigint), cast(1 as decimal(10, 0))) FROM t
+-- !query 455 schema
+struct<pmod(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 455 output
+0
+
+
+-- !query 456
+SELECT pmod(cast(1 as bigint), cast(1 as decimal(20, 0))) FROM t
+-- !query 456 schema
+struct<pmod(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)), CAST(1 AS DECIMAL(20,0))):decimal(20,0)>
+-- !query 456 output
+0
+
+
+-- !query 457
+SELECT pmod(cast(1 as float), cast(1 as decimal(3, 0))) FROM t
+-- !query 457 schema
+struct<pmod(CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
+-- !query 457 output
+0.0
+
+
+-- !query 458
+SELECT pmod(cast(1 as float), cast(1 as decimal(5, 0))) FROM t
+-- !query 458 schema
+struct<pmod(CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
+-- !query 458 output
+0.0
+
+
+-- !query 459
+SELECT pmod(cast(1 as float), cast(1 as decimal(10, 0))) FROM t
+-- !query 459 schema
+struct<pmod(CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 459 output
+0.0
+
+
+-- !query 460
+SELECT pmod(cast(1 as float), cast(1 as decimal(20, 0))) FROM t
+-- !query 460 schema
+struct<pmod(CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
+-- !query 460 output
+0.0
+
+
+-- !query 461
+SELECT pmod(cast(1 as double), cast(1 as decimal(3, 0))) FROM t
+-- !query 461 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
+-- !query 461 output
+0.0
+
+
+-- !query 462
+SELECT pmod(cast(1 as double), cast(1 as decimal(5, 0))) FROM t
+-- !query 462 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
+-- !query 462 output
+0.0
+
+
+-- !query 463
+SELECT pmod(cast(1 as double), cast(1 as decimal(10, 0))) FROM t
+-- !query 463 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 463 output
+0.0
+
+
+-- !query 464
+SELECT pmod(cast(1 as double), cast(1 as decimal(20, 0))) FROM t
+-- !query 464 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
+-- !query 464 output
+0.0
+
+
+-- !query 465
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(3, 0))) FROM t
+-- !query 465 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 465 output
+0
+
+
+-- !query 466
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(5, 0))) FROM t
+-- !query 466 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 466 output
+0
+
+
+-- !query 467
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(10, 0))) FROM t
+-- !query 467 schema
+struct<pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
+-- !query 467 output
+0
+
+
+-- !query 468
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(20, 0))) FROM t
+-- !query 468 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 468 output
+0
+
+
+-- !query 469
+SELECT pmod(cast('1' as binary), cast(1 as decimal(3, 0))) FROM t
+-- !query 469 schema
+struct<>
+-- !query 469 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 470
+SELECT pmod(cast('1' as binary), cast(1 as decimal(5, 0))) FROM t
+-- !query 470 schema
+struct<>
+-- !query 470 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 471
+SELECT pmod(cast('1' as binary), cast(1 as decimal(10, 0))) FROM t
+-- !query 471 schema
+struct<>
+-- !query 471 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 472
+SELECT pmod(cast('1' as binary), cast(1 as decimal(20, 0))) FROM t
+-- !query 472 schema
+struct<>
+-- !query 472 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 473
+SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(3, 0))) FROM t
+-- !query 473 schema
+struct<>
+-- !query 473 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 474
+SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(5, 0))) FROM t
+-- !query 474 schema
+struct<>
+-- !query 474 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 475
+SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(10, 0))) FROM t
+-- !query 475 schema
+struct<>
+-- !query 475 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 476
+SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(20, 0))) FROM t
+-- !query 476 schema
+struct<>
+-- !query 476 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 477
+SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(3, 0))) FROM t
+-- !query 477 schema
+struct<>
+-- !query 477 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 478
+SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(5, 0))) FROM t
+-- !query 478 schema
+struct<>
+-- !query 478 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 479
+SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(10, 0))) FROM t
+-- !query 479 schema
+struct<>
+-- !query 479 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 480
+SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(20, 0))) FROM t
+-- !query 480 schema
+struct<>
+-- !query 480 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 481
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as tinyint)) FROM t
+-- !query 481 schema
+struct<pmod(CAST(1 AS DECIMAL(3,0)), CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(3,0)>
+-- !query 481 output
+0
+
+
+-- !query 482
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as tinyint)) FROM t
+-- !query 482 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)), CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(3,0)>
+-- !query 482 output
+0
+
+
+-- !query 483
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as tinyint)) FROM t
+-- !query 483 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 483 output
+0
+
+
+-- !query 484
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as tinyint)) FROM t
+-- !query 484 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(3,0)>
+-- !query 484 output
+0
+
+
+-- !query 485
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as smallint)) FROM t
+-- !query 485 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)), CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(3,0)>
+-- !query 485 output
+0
+
+
+-- !query 486
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as smallint)) FROM t
+-- !query 486 schema
+struct<pmod(CAST(1 AS DECIMAL(5,0)), CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(5,0)>
+-- !query 486 output
+0
+
+
+-- !query 487
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as smallint)) FROM t
+-- !query 487 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 487 output
+0
+
+
+-- !query 488
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as smallint)) FROM t
+-- !query 488 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(5,0)>
+-- !query 488 output
+0
+
+
+-- !query 489
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as int)) FROM t
+-- !query 489 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)), CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 489 output
+0
+
+
+-- !query 490
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as int)) FROM t
+-- !query 490 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)), CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 490 output
+0
+
+
+-- !query 491
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as int)) FROM t
+-- !query 491 schema
+struct<pmod(CAST(1 AS DECIMAL(10,0)), CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(10,0)>
+-- !query 491 output
+0
+
+
+-- !query 492
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as int)) FROM t
+-- !query 492 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 492 output
+0
+
+
+-- !query 493
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as bigint)) FROM t
+-- !query 493 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(3,0)>
+-- !query 493 output
+0
+
+
+-- !query 494
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as bigint)) FROM t
+-- !query 494 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(5,0)>
+-- !query 494 output
+0
+
+
+-- !query 495
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as bigint)) FROM t
+-- !query 495 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 495 output
+0
+
+
+-- !query 496
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as bigint)) FROM t
+-- !query 496 schema
+struct<pmod(CAST(1 AS DECIMAL(20,0)), CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(20,0)>
+-- !query 496 output
+0
+
+
+-- !query 497
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as float)) FROM t
+-- !query 497 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 497 output
+0.0
+
+
+-- !query 498
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as float)) FROM t
+-- !query 498 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 498 output
+0.0
+
+
+-- !query 499
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as float)) FROM t
+-- !query 499 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 499 output
+0.0
+
+
+-- !query 500
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as float)) FROM t
+-- !query 500 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 500 output
+0.0
+
+
+-- !query 501
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as double)) FROM t
+-- !query 501 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 501 output
+0.0
+
+
+-- !query 502
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as double)) FROM t
+-- !query 502 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 502 output
+0.0
+
+
+-- !query 503
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as double)) FROM t
+-- !query 503 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 503 output
+0.0
+
+
+-- !query 504
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as double)) FROM t
+-- !query 504 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 504 output
+0.0
+
+
+-- !query 505
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as decimal(10, 0))) FROM t
+-- !query 505 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
+-- !query 505 output
+0
+
+
+-- !query 506
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as decimal(10, 0))) FROM t
+-- !query 506 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
+-- !query 506 output
+0
+
+
+-- !query 507
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(10, 0))) FROM t
+-- !query 507 schema
+struct<pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
+-- !query 507 output
+0
+
+
+-- !query 508
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as decimal(10, 0))) FROM t
+-- !query 508 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
+-- !query 508 output
+0
+
+
+-- !query 509
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as string)) FROM t
+-- !query 509 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 509 output
+0.0
+
+
+-- !query 510
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as string)) FROM t
+-- !query 510 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 510 output
+0.0
+
+
+-- !query 511
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as string)) FROM t
+-- !query 511 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 511 output
+0.0
+
+
+-- !query 512
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as string)) FROM t
+-- !query 512 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 512 output
+0.0
+
+
+-- !query 513
+SELECT pmod(cast(1 as decimal(3, 0)) , cast('1' as binary)) FROM t
+-- !query 513 schema
+struct<>
+-- !query 513 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 514
+SELECT pmod(cast(1 as decimal(5, 0)) , cast('1' as binary)) FROM t
+-- !query 514 schema
+struct<>
+-- !query 514 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 515
+SELECT pmod(cast(1 as decimal(10, 0)), cast('1' as binary)) FROM t
+-- !query 515 schema
+struct<>
+-- !query 515 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 516
+SELECT pmod(cast(1 as decimal(20, 0)), cast('1' as binary)) FROM t
+-- !query 516 schema
+struct<>
+-- !query 516 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 517
+SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as boolean)) FROM t
+-- !query 517 schema
+struct<>
+-- !query 517 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+
+
+-- !query 518
+SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as boolean)) FROM t
+-- !query 518 schema
+struct<>
+-- !query 518 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+
+
+-- !query 519
+SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as boolean)) FROM t
+-- !query 519 schema
+struct<>
+-- !query 519 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 520
+SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as boolean)) FROM t
+-- !query 520 schema
+struct<>
+-- !query 520 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+
+
+-- !query 521
+SELECT pmod(cast(1 as decimal(3, 0)) , cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 521 schema
+struct<>
+-- !query 521 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 522
+SELECT pmod(cast(1 as decimal(5, 0)) , cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 522 schema
+struct<>
+-- !query 522 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 523
+SELECT pmod(cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 523 schema
+struct<>
+-- !query 523 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 524
+SELECT pmod(cast(1 as decimal(20, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 524 schema
+struct<>
+-- !query 524 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 525
+SELECT pmod(cast(1 as decimal(3, 0)) , cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 525 schema
+struct<>
+-- !query 525 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 526
+SELECT pmod(cast(1 as decimal(5, 0)) , cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 526 schema
+struct<>
+-- !query 526 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 527
+SELECT pmod(cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 527 schema
+struct<>
+-- !query 527 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 528
+SELECT pmod(cast(1 as decimal(20, 0)), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 528 schema
+struct<>
+-- !query 528 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 529
+SELECT cast(1 as tinyint) = cast(1 as decimal(3, 0)) FROM t
+-- !query 529 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) = CAST(1 AS DECIMAL(3,0))):boolean>
+-- !query 529 output
+true
+
+
+-- !query 530
+SELECT cast(1 as tinyint) = cast(1 as decimal(5, 0)) FROM t
+-- !query 530 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 530 output
+true
+
+
+-- !query 531
+SELECT cast(1 as tinyint) = cast(1 as decimal(10, 0)) FROM t
+-- !query 531 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 531 output
+true
+
+
+-- !query 532
+SELECT cast(1 as tinyint) = cast(1 as decimal(20, 0)) FROM t
+-- !query 532 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 532 output
+true
+
+
+-- !query 533
+SELECT cast(1 as smallint) = cast(1 as decimal(3, 0)) FROM t
+-- !query 533 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 533 output
+true
+
+
+-- !query 534
+SELECT cast(1 as smallint) = cast(1 as decimal(5, 0)) FROM t
+-- !query 534 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) = CAST(1 AS DECIMAL(5,0))):boolean>
+-- !query 534 output
+true
+
+
+-- !query 535
+SELECT cast(1 as smallint) = cast(1 as decimal(10, 0)) FROM t
+-- !query 535 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 535 output
+true
+
+
+-- !query 536
+SELECT cast(1 as smallint) = cast(1 as decimal(20, 0)) FROM t
+-- !query 536 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 536 output
+true
+
+
+-- !query 537
+SELECT cast(1 as int) = cast(1 as decimal(3, 0)) FROM t
+-- !query 537 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 537 output
+true
+
+
+-- !query 538
+SELECT cast(1 as int) = cast(1 as decimal(5, 0)) FROM t
+-- !query 538 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 538 output
+true
+
+
+-- !query 539
+SELECT cast(1 as int) = cast(1 as decimal(10, 0)) FROM t
+-- !query 539 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 539 output
+true
+
+
+-- !query 540
+SELECT cast(1 as int) = cast(1 as decimal(20, 0)) FROM t
+-- !query 540 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 540 output
+true
+
+
+-- !query 541
+SELECT cast(1 as bigint) = cast(1 as decimal(3, 0)) FROM t
+-- !query 541 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 541 output
+true
+
+
+-- !query 542
+SELECT cast(1 as bigint) = cast(1 as decimal(5, 0)) FROM t
+-- !query 542 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 542 output
+true
+
+
+-- !query 543
+SELECT cast(1 as bigint) = cast(1 as decimal(10, 0)) FROM t
+-- !query 543 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 543 output
+true
+
+
+-- !query 544
+SELECT cast(1 as bigint) = cast(1 as decimal(20, 0)) FROM t
+-- !query 544 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) = CAST(1 AS DECIMAL(20,0))):boolean>
+-- !query 544 output
+true
+
+
+-- !query 545
+SELECT cast(1 as float) = cast(1 as decimal(3, 0)) FROM t
+-- !query 545 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 545 output
+true
+
+
+-- !query 546
+SELECT cast(1 as float) = cast(1 as decimal(5, 0)) FROM t
+-- !query 546 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 546 output
+true
+
+
+-- !query 547
+SELECT cast(1 as float) = cast(1 as decimal(10, 0)) FROM t
+-- !query 547 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 547 output
+true
+
+
+-- !query 548
+SELECT cast(1 as float) = cast(1 as decimal(20, 0)) FROM t
+-- !query 548 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 548 output
+true
+
+
+-- !query 549
+SELECT cast(1 as double) = cast(1 as decimal(3, 0)) FROM t
+-- !query 549 schema
+struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 549 output
+true
+
+
+-- !query 550
+SELECT cast(1 as double) = cast(1 as decimal(5, 0)) FROM t
+-- !query 550 schema
+struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 550 output
+true
+
+
+-- !query 551
+SELECT cast(1 as double) = cast(1 as decimal(10, 0)) FROM t
+-- !query 551 schema
+struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 551 output
+true
+
+
+-- !query 552
+SELECT cast(1 as double) = cast(1 as decimal(20, 0)) FROM t
+-- !query 552 schema
+struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 552 output
+true
+
+
+-- !query 553
+SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(3, 0)) FROM t
+-- !query 553 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 553 output
+true
+
+
+-- !query 554
+SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(5, 0)) FROM t
+-- !query 554 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 554 output
+true
+
+
+-- !query 555
+SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(10, 0)) FROM t
+-- !query 555 schema
+struct<(CAST(1 AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 555 output
+true
+
+
+-- !query 556
+SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(20, 0)) FROM t
+-- !query 556 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 556 output
+true
+
+
+-- !query 557
+SELECT cast('1' as binary) = cast(1 as decimal(3, 0)) FROM t
+-- !query 557 schema
+struct<>
+-- !query 557 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 558
+SELECT cast('1' as binary) = cast(1 as decimal(5, 0)) FROM t
+-- !query 558 schema
+struct<>
+-- !query 558 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 559
+SELECT cast('1' as binary) = cast(1 as decimal(10, 0)) FROM t
+-- !query 559 schema
+struct<>
+-- !query 559 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 560
+SELECT cast('1' as binary) = cast(1 as decimal(20, 0)) FROM t
+-- !query 560 schema
+struct<>
+-- !query 560 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 561
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(3, 0)) FROM t
+-- !query 561 schema
+struct<>
+-- !query 561 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 562
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(5, 0)) FROM t
+-- !query 562 schema
+struct<>
+-- !query 562 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 563
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(10, 0)) FROM t
+-- !query 563 schema
+struct<>
+-- !query 563 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 564
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(20, 0)) FROM t
+-- !query 564 schema
+struct<>
+-- !query 564 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 565
+SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(3, 0)) FROM t
+-- !query 565 schema
+struct<>
+-- !query 565 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 566
+SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(5, 0)) FROM t
+-- !query 566 schema
+struct<>
+-- !query 566 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 567
+SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(10, 0)) FROM t
+-- !query 567 schema
+struct<>
+-- !query 567 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 568
+SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(20, 0)) FROM t
+-- !query 568 schema
+struct<>
+-- !query 568 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 569
+SELECT cast(1 as decimal(3, 0))  = cast(1 as tinyint) FROM t
+-- !query 569 schema
+struct<(CAST(1 AS DECIMAL(3,0)) = CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
+-- !query 569 output
+true
+
+
+-- !query 570
+SELECT cast(1 as decimal(5, 0))  = cast(1 as tinyint) FROM t
+-- !query 570 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 570 output
+true
+
+
+-- !query 571
+SELECT cast(1 as decimal(10, 0)) = cast(1 as tinyint) FROM t
+-- !query 571 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 571 output
+true
+
+
+-- !query 572
+SELECT cast(1 as decimal(20, 0)) = cast(1 as tinyint) FROM t
+-- !query 572 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 572 output
+true
+
+
+-- !query 573
+SELECT cast(1 as decimal(3, 0))  = cast(1 as smallint) FROM t
+-- !query 573 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 573 output
+true
+
+
+-- !query 574
+SELECT cast(1 as decimal(5, 0))  = cast(1 as smallint) FROM t
+-- !query 574 schema
+struct<(CAST(1 AS DECIMAL(5,0)) = CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
+-- !query 574 output
+true
+
+
+-- !query 575
+SELECT cast(1 as decimal(10, 0)) = cast(1 as smallint) FROM t
+-- !query 575 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 575 output
+true
+
+
+-- !query 576
+SELECT cast(1 as decimal(20, 0)) = cast(1 as smallint) FROM t
+-- !query 576 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 576 output
+true
+
+
+-- !query 577
+SELECT cast(1 as decimal(3, 0))  = cast(1 as int) FROM t
+-- !query 577 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 577 output
+true
+
+
+-- !query 578
+SELECT cast(1 as decimal(5, 0))  = cast(1 as int) FROM t
+-- !query 578 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 578 output
+true
+
+
+-- !query 579
+SELECT cast(1 as decimal(10, 0)) = cast(1 as int) FROM t
+-- !query 579 schema
+struct<(CAST(1 AS DECIMAL(10,0)) = CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
+-- !query 579 output
+true
+
+
+-- !query 580
+SELECT cast(1 as decimal(20, 0)) = cast(1 as int) FROM t
+-- !query 580 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 580 output
+true
+
+
+-- !query 581
+SELECT cast(1 as decimal(3, 0))  = cast(1 as bigint) FROM t
+-- !query 581 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 581 output
+true
+
+
+-- !query 582
+SELECT cast(1 as decimal(5, 0))  = cast(1 as bigint) FROM t
+-- !query 582 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 582 output
+true
+
+
+-- !query 583
+SELECT cast(1 as decimal(10, 0)) = cast(1 as bigint) FROM t
+-- !query 583 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 583 output
+true
+
+
+-- !query 584
+SELECT cast(1 as decimal(20, 0)) = cast(1 as bigint) FROM t
+-- !query 584 schema
+struct<(CAST(1 AS DECIMAL(20,0)) = CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
+-- !query 584 output
+true
+
+
+-- !query 585
+SELECT cast(1 as decimal(3, 0))  = cast(1 as float) FROM t
+-- !query 585 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 585 output
+true
+
+
+-- !query 586
+SELECT cast(1 as decimal(5, 0))  = cast(1 as float) FROM t
+-- !query 586 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 586 output
+true
+
+
+-- !query 587
+SELECT cast(1 as decimal(10, 0)) = cast(1 as float) FROM t
+-- !query 587 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 587 output
+true
+
+
+-- !query 588
+SELECT cast(1 as decimal(20, 0)) = cast(1 as float) FROM t
+-- !query 588 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 588 output
+true
+
+
+-- !query 589
+SELECT cast(1 as decimal(3, 0))  = cast(1 as double) FROM t
+-- !query 589 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 589 output
+true
+
+
+-- !query 590
+SELECT cast(1 as decimal(5, 0))  = cast(1 as double) FROM t
+-- !query 590 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 590 output
+true
+
+
+-- !query 591
+SELECT cast(1 as decimal(10, 0)) = cast(1 as double) FROM t
+-- !query 591 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 591 output
+true
+
+
+-- !query 592
+SELECT cast(1 as decimal(20, 0)) = cast(1 as double) FROM t
+-- !query 592 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 592 output
+true
+
+
+-- !query 593
+SELECT cast(1 as decimal(3, 0))  = cast(1 as decimal(10, 0)) FROM t
+-- !query 593 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 593 output
+true
+
+
+-- !query 594
+SELECT cast(1 as decimal(5, 0))  = cast(1 as decimal(10, 0)) FROM t
+-- !query 594 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 594 output
+true
+
+
+-- !query 595
+SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(10, 0)) FROM t
+-- !query 595 schema
+struct<(CAST(1 AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 595 output
+true
+
+
+-- !query 596
+SELECT cast(1 as decimal(20, 0)) = cast(1 as decimal(10, 0)) FROM t
+-- !query 596 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 596 output
+true
+
+
+-- !query 597
+SELECT cast(1 as decimal(3, 0))  = cast(1 as string) FROM t
+-- !query 597 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 597 output
+true
+
+
+-- !query 598
+SELECT cast(1 as decimal(5, 0))  = cast(1 as string) FROM t
+-- !query 598 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 598 output
+true
+
+
+-- !query 599
+SELECT cast(1 as decimal(10, 0)) = cast(1 as string) FROM t
+-- !query 599 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 599 output
+true
+
+
+-- !query 600
+SELECT cast(1 as decimal(20, 0)) = cast(1 as string) FROM t
+-- !query 600 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 600 output
+true
+
+
+-- !query 601
+SELECT cast(1 as decimal(3, 0))  = cast('1' as binary) FROM t
+-- !query 601 schema
+struct<>
+-- !query 601 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 602
+SELECT cast(1 as decimal(5, 0))  = cast('1' as binary) FROM t
+-- !query 602 schema
+struct<>
+-- !query 602 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 603
+SELECT cast(1 as decimal(10, 0)) = cast('1' as binary) FROM t
+-- !query 603 schema
+struct<>
+-- !query 603 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 604
+SELECT cast(1 as decimal(20, 0)) = cast('1' as binary) FROM t
+-- !query 604 schema
+struct<>
+-- !query 604 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 605
+SELECT cast(1 as decimal(3, 0))  = cast(1 as boolean) FROM t
+-- !query 605 schema
+struct<(CAST(1 AS DECIMAL(3,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(3,0))):boolean>
+-- !query 605 output
+true
+
+
+-- !query 606
+SELECT cast(1 as decimal(5, 0))  = cast(1 as boolean) FROM t
+-- !query 606 schema
+struct<(CAST(1 AS DECIMAL(5,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(5,0))):boolean>
+-- !query 606 output
+true
+
+
+-- !query 607
+SELECT cast(1 as decimal(10, 0)) = cast(1 as boolean) FROM t
+-- !query 607 schema
+struct<(CAST(1 AS DECIMAL(10,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(10,0))):boolean>
+-- !query 607 output
+true
+
+
+-- !query 608
+SELECT cast(1 as decimal(20, 0)) = cast(1 as boolean) FROM t
+-- !query 608 schema
+struct<(CAST(1 AS DECIMAL(20,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(20,0))):boolean>
+-- !query 608 output
+true
+
+
+-- !query 609
+SELECT cast(1 as decimal(3, 0))  = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 609 schema
+struct<>
+-- !query 609 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 610
+SELECT cast(1 as decimal(5, 0))  = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 610 schema
+struct<>
+-- !query 610 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 611
+SELECT cast(1 as decimal(10, 0)) = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 611 schema
+struct<>
+-- !query 611 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 612
+SELECT cast(1 as decimal(20, 0)) = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 612 schema
+struct<>
+-- !query 612 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 613
+SELECT cast(1 as decimal(3, 0))  = cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 613 schema
+struct<>
+-- !query 613 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 614
+SELECT cast(1 as decimal(5, 0))  = cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 614 schema
+struct<>
+-- !query 614 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 615
+SELECT cast(1 as decimal(10, 0)) = cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 615 schema
+struct<>
+-- !query 615 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 616
+SELECT cast(1 as decimal(20, 0)) = cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 616 schema
+struct<>
+-- !query 616 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 617
+SELECT cast(1 as tinyint) <=> cast(1 as decimal(3, 0)) FROM t
+-- !query 617 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) <=> CAST(1 AS DECIMAL(3,0))):boolean>
+-- !query 617 output
+true
+
+
+-- !query 618
+SELECT cast(1 as tinyint) <=> cast(1 as decimal(5, 0)) FROM t
+-- !query 618 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 618 output
+true
+
+
+-- !query 619
+SELECT cast(1 as tinyint) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 619 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 619 output
+true
+
+
+-- !query 620
+SELECT cast(1 as tinyint) <=> cast(1 as decimal(20, 0)) FROM t
+-- !query 620 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 620 output
+true
+
+
+-- !query 621
+SELECT cast(1 as smallint) <=> cast(1 as decimal(3, 0)) FROM t
+-- !query 621 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 621 output
+true
+
+
+-- !query 622
+SELECT cast(1 as smallint) <=> cast(1 as decimal(5, 0)) FROM t
+-- !query 622 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) <=> CAST(1 AS DECIMAL(5,0))):boolean>
+-- !query 622 output
+true
+
+
+-- !query 623
+SELECT cast(1 as smallint) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 623 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 623 output
+true
+
+
+-- !query 624
+SELECT cast(1 as smallint) <=> cast(1 as decimal(20, 0)) FROM t
+-- !query 624 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 624 output
+true
+
+
+-- !query 625
+SELECT cast(1 as int) <=> cast(1 as decimal(3, 0)) FROM t
+-- !query 625 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 625 output
+true
+
+
+-- !query 626
+SELECT cast(1 as int) <=> cast(1 as decimal(5, 0)) FROM t
+-- !query 626 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 626 output
+true
+
+
+-- !query 627
+SELECT cast(1 as int) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 627 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) <=> CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 627 output
+true
+
+
+-- !query 628
+SELECT cast(1 as int) <=> cast(1 as decimal(20, 0)) FROM t
+-- !query 628 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 628 output
+true
+
+
+-- !query 629
+SELECT cast(1 as bigint) <=> cast(1 as decimal(3, 0)) FROM t
+-- !query 629 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 629 output
+true
+
+
+-- !query 630
+SELECT cast(1 as bigint) <=> cast(1 as decimal(5, 0)) FROM t
+-- !query 630 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 630 output
+true
+
+
+-- !query 631
+SELECT cast(1 as bigint) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 631 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 631 output
+true
+
+
+-- !query 632
+SELECT cast(1 as bigint) <=> cast(1 as decimal(20, 0)) FROM t
+-- !query 632 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) <=> CAST(1 AS DECIMAL(20,0))):boolean>
+-- !query 632 output
+true
+
+
+-- !query 633
+SELECT cast(1 as float) <=> cast(1 as decimal(3, 0)) FROM t
+-- !query 633 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 633 output
+true
+
+
+-- !query 634
+SELECT cast(1 as float) <=> cast(1 as decimal(5, 0)) FROM t
+-- !query 634 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 634 output
+true
+
+
+-- !query 635
+SELECT cast(1 as float) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 635 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 635 output
+true
+
+
+-- !query 636
+SELECT cast(1 as float) <=> cast(1 as decimal(20, 0)) FROM t
+-- !query 636 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 636 output
+true
+
+
+-- !query 637
+SELECT cast(1 as double) <=> cast(1 as decimal(3, 0)) FROM t
+-- !query 637 schema
+struct<(CAST(1 AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 637 output
+true
+
+
+-- !query 638
+SELECT cast(1 as double) <=> cast(1 as decimal(5, 0)) FROM t
+-- !query 638 schema
+struct<(CAST(1 AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 638 output
+true
+
+
+-- !query 639
+SELECT cast(1 as double) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 639 schema
+struct<(CAST(1 AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 639 output
+true
+
+
+-- !query 640
+SELECT cast(1 as double) <=> cast(1 as decimal(20, 0)) FROM t
+-- !query 640 schema
+struct<(CAST(1 AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 640 output
+true
+
+
+-- !query 641
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(3, 0)) FROM t
+-- !query 641 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 641 output
+true
+
+
+-- !query 642
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(5, 0)) FROM t
+-- !query 642 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 642 output
+true
+
+
+-- !query 643
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 643 schema
+struct<(CAST(1 AS DECIMAL(10,0)) <=> CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 643 output
+true
+
+
+-- !query 644
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(20, 0)) FROM t
+-- !query 644 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 644 output
+true
+
+
+-- !query 645
+SELECT cast('1' as binary) <=> cast(1 as decimal(3, 0)) FROM t
+-- !query 645 schema
+struct<>
+-- !query 645 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 646
+SELECT cast('1' as binary) <=> cast(1 as decimal(5, 0)) FROM t
+-- !query 646 schema
+struct<>
+-- !query 646 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 647
+SELECT cast('1' as binary) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 647 schema
+struct<>
+-- !query 647 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 648
+SELECT cast('1' as binary) <=> cast(1 as decimal(20, 0)) FROM t
+-- !query 648 schema
+struct<>
+-- !query 648 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 649
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(3, 0)) FROM t
+-- !query 649 schema
+struct<>
+-- !query 649 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 650
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(5, 0)) FROM t
+-- !query 650 schema
+struct<>
+-- !query 650 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 651
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 651 schema
+struct<>
+-- !query 651 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 652
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(20, 0)) FROM t
+-- !query 652 schema
+struct<>
+-- !query 652 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 653
+SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(3, 0)) FROM t
+-- !query 653 schema
+struct<>
+-- !query 653 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 654
+SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(5, 0)) FROM t
+-- !query 654 schema
+struct<>
+-- !query 654 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 655
+SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 655 schema
+struct<>
+-- !query 655 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 656
+SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(20, 0)) FROM t
+-- !query 656 schema
+struct<>
+-- !query 656 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 657
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as tinyint) FROM t
+-- !query 657 schema
+struct<(CAST(1 AS DECIMAL(3,0)) <=> CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
+-- !query 657 output
+true
+
+
+-- !query 658
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as tinyint) FROM t
+-- !query 658 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) <=> CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 658 output
+true
+
+
+-- !query 659
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as tinyint) FROM t
+-- !query 659 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 659 output
+true
+
+
+-- !query 660
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as tinyint) FROM t
+-- !query 660 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 660 output
+true
+
+
+-- !query 661
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as smallint) FROM t
+-- !query 661 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) <=> CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 661 output
+true
+
+
+-- !query 662
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as smallint) FROM t
+-- !query 662 schema
+struct<(CAST(1 AS DECIMAL(5,0)) <=> CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
+-- !query 662 output
+true
+
+
+-- !query 663
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as smallint) FROM t
+-- !query 663 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 663 output
+true
+
+
+-- !query 664
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as smallint) FROM t
+-- !query 664 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 664 output
+true
+
+
+-- !query 665
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as int) FROM t
+-- !query 665 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) <=> CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 665 output
+true
+
+
+-- !query 666
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as int) FROM t
+-- !query 666 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) <=> CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 666 output
+true
+
+
+-- !query 667
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as int) FROM t
+-- !query 667 schema
+struct<(CAST(1 AS DECIMAL(10,0)) <=> CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
+-- !query 667 output
+true
+
+
+-- !query 668
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as int) FROM t
+-- !query 668 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 668 output
+true
+
+
+-- !query 669
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as bigint) FROM t
+-- !query 669 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 669 output
+true
+
+
+-- !query 670
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as bigint) FROM t
+-- !query 670 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 670 output
+true
+
+
+-- !query 671
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as bigint) FROM t
+-- !query 671 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 671 output
+true
+
+
+-- !query 672
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as bigint) FROM t
+-- !query 672 schema
+struct<(CAST(1 AS DECIMAL(20,0)) <=> CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
+-- !query 672 output
+true
+
+
+-- !query 673
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as float) FROM t
+-- !query 673 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <=> CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 673 output
+true
+
+
+-- !query 674
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as float) FROM t
+-- !query 674 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <=> CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 674 output
+true
+
+
+-- !query 675
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as float) FROM t
+-- !query 675 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <=> CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 675 output
+true
+
+
+-- !query 676
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as float) FROM t
+-- !query 676 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <=> CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 676 output
+true
+
+
+-- !query 677
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as double) FROM t
+-- !query 677 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
+-- !query 677 output
+true
+
+
+-- !query 678
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as double) FROM t
+-- !query 678 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
+-- !query 678 output
+true
+
+
+-- !query 679
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as double) FROM t
+-- !query 679 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
+-- !query 679 output
+true
+
+
+-- !query 680
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as double) FROM t
+-- !query 680 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
+-- !query 680 output
+true
+
+
+-- !query 681
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 681 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 681 output
+true
+
+
+-- !query 682
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 682 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 682 output
+true
+
+
+-- !query 683
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 683 schema
+struct<(CAST(1 AS DECIMAL(10,0)) <=> CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 683 output
+true
+
+
+-- !query 684
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as decimal(10, 0)) FROM t
+-- !query 684 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 684 output
+true
+
+
+-- !query 685
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as string) FROM t
+-- !query 685 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <=> CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 685 output
+true
+
+
+-- !query 686
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as string) FROM t
+-- !query 686 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <=> CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 686 output
+true
+
+
+-- !query 687
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as string) FROM t
+-- !query 687 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <=> CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 687 output
+true
+
+
+-- !query 688
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as string) FROM t
+-- !query 688 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <=> CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 688 output
+true
+
+
+-- !query 689
+SELECT cast(1 as decimal(3, 0))  <=> cast('1' as binary) FROM t
+-- !query 689 schema
+struct<>
+-- !query 689 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <=> CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 690
+SELECT cast(1 as decimal(5, 0))  <=> cast('1' as binary) FROM t
+-- !query 690 schema
+struct<>
+-- !query 690 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <=> CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 691
+SELECT cast(1 as decimal(10, 0)) <=> cast('1' as binary) FROM t
+-- !query 691 schema
+struct<>
+-- !query 691 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <=> CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 692
+SELECT cast(1 as decimal(20, 0)) <=> cast('1' as binary) FROM t
+-- !query 692 schema
+struct<>
+-- !query 692 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <=> CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 693
+SELECT cast(1 as decimal(3, 0))  <=> cast(1 as boolean) FROM t
+-- !query 693 schema
+struct<(CAST(1 AS DECIMAL(3,0)) <=> CAST(CAST(1 AS BOOLEAN) AS DECIMAL(3,0))):boolean>
+-- !query 693 output
+true
+
+
+-- !query 694
+SELECT cast(1 as decimal(5, 0))  <=> cast(1 as boolean) FROM t
+-- !query 694 schema
+struct<(CAST(1 AS DECIMAL(5,0)) <=> CAST(CAST(1 AS BOOLEAN) AS DECIMAL(5,0))):boolean>
+-- !query 694 output
+true
+
+
+-- !query 695
+SELECT cast(1 as decimal(10, 0)) <=> cast(1 as boolean) FROM t
+-- !query 695 schema
+struct<(CAST(1 AS DECIMAL(10,0)) <=> CAST(CAST(1 AS BOOLEAN) AS DECIMAL(10,0))):boolean>
+-- !query 695 output
+true
+
+
+-- !query 696
+SELECT cast(1 as decimal(20, 0)) <=> cast(1 as boolean) FROM t
+-- !query 696 schema
+struct<(CAST(1 AS DECIMAL(20,0)) <=> CAST(CAST(1 AS BOOLEAN) AS DECIMAL(20,0))):boolean>
+-- !query 696 output
+true
+
+
+-- !query 697
+SELECT cast(1 as decimal(3, 0))  <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 697 schema
+struct<>
+-- !query 697 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 698
+SELECT cast(1 as decimal(5, 0))  <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 698 schema
+struct<>
+-- !query 698 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 699
+SELECT cast(1 as decimal(10, 0)) <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 699 schema
+struct<>
+-- !query 699 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 700
+SELECT cast(1 as decimal(20, 0)) <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 700 schema
+struct<>
+-- !query 700 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 701
+SELECT cast(1 as decimal(3, 0))  <=> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 701 schema
+struct<>
+-- !query 701 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 702
+SELECT cast(1 as decimal(5, 0))  <=> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 702 schema
+struct<>
+-- !query 702 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 703
+SELECT cast(1 as decimal(10, 0)) <=> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 703 schema
+struct<>
+-- !query 703 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 704
+SELECT cast(1 as decimal(20, 0)) <=> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 704 schema
+struct<>
+-- !query 704 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 705
+SELECT cast(1 as tinyint) < cast(1 as decimal(3, 0)) FROM t
+-- !query 705 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) < CAST(1 AS DECIMAL(3,0))):boolean>
+-- !query 705 output
+false
+
+
+-- !query 706
+SELECT cast(1 as tinyint) < cast(1 as decimal(5, 0)) FROM t
+-- !query 706 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) < CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 706 output
+false
+
+
+-- !query 707
+SELECT cast(1 as tinyint) < cast(1 as decimal(10, 0)) FROM t
+-- !query 707 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 707 output
+false
+
+
+-- !query 708
+SELECT cast(1 as tinyint) < cast(1 as decimal(20, 0)) FROM t
+-- !query 708 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 708 output
+false
+
+
+-- !query 709
+SELECT cast(1 as smallint) < cast(1 as decimal(3, 0)) FROM t
+-- !query 709 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) < CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 709 output
+false
+
+
+-- !query 710
+SELECT cast(1 as smallint) < cast(1 as decimal(5, 0)) FROM t
+-- !query 710 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) < CAST(1 AS DECIMAL(5,0))):boolean>
+-- !query 710 output
+false
+
+
+-- !query 711
+SELECT cast(1 as smallint) < cast(1 as decimal(10, 0)) FROM t
+-- !query 711 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 711 output
+false
+
+
+-- !query 712
+SELECT cast(1 as smallint) < cast(1 as decimal(20, 0)) FROM t
+-- !query 712 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 712 output
+false
+
+
+-- !query 713
+SELECT cast(1 as int) < cast(1 as decimal(3, 0)) FROM t
+-- !query 713 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 713 output
+false
+
+
+-- !query 714
+SELECT cast(1 as int) < cast(1 as decimal(5, 0)) FROM t
+-- !query 714 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 714 output
+false
+
+
+-- !query 715
+SELECT cast(1 as int) < cast(1 as decimal(10, 0)) FROM t
+-- !query 715 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) < CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 715 output
+false
+
+
+-- !query 716
+SELECT cast(1 as int) < cast(1 as decimal(20, 0)) FROM t
+-- !query 716 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 716 output
+false
+
+
+-- !query 717
+SELECT cast(1 as bigint) < cast(1 as decimal(3, 0)) FROM t
+-- !query 717 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 717 output
+false
+
+
+-- !query 718
+SELECT cast(1 as bigint) < cast(1 as decimal(5, 0)) FROM t
+-- !query 718 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 718 output
+false
+
+
+-- !query 719
+SELECT cast(1 as bigint) < cast(1 as decimal(10, 0)) FROM t
+-- !query 719 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 719 output
+false
+
+
+-- !query 720
+SELECT cast(1 as bigint) < cast(1 as decimal(20, 0)) FROM t
+-- !query 720 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) < CAST(1 AS DECIMAL(20,0))):boolean>
+-- !query 720 output
+false
+
+
+-- !query 721
+SELECT cast(1 as float) < cast(1 as decimal(3, 0)) FROM t
+-- !query 721 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) < CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 721 output
+false
+
+
+-- !query 722
+SELECT cast(1 as float) < cast(1 as decimal(5, 0)) FROM t
+-- !query 722 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) < CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 722 output
+false
+
+
+-- !query 723
+SELECT cast(1 as float) < cast(1 as decimal(10, 0)) FROM t
+-- !query 723 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) < CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 723 output
+false
+
+
+-- !query 724
+SELECT cast(1 as float) < cast(1 as decimal(20, 0)) FROM t
+-- !query 724 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) < CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 724 output
+false
+
+
+-- !query 725
+SELECT cast(1 as double) < cast(1 as decimal(3, 0)) FROM t
+-- !query 725 schema
+struct<(CAST(1 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 725 output
+false
+
+
+-- !query 726
+SELECT cast(1 as double) < cast(1 as decimal(5, 0)) FROM t
+-- !query 726 schema
+struct<(CAST(1 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 726 output
+false
+
+
+-- !query 727
+SELECT cast(1 as double) < cast(1 as decimal(10, 0)) FROM t
+-- !query 727 schema
+struct<(CAST(1 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 727 output
+false
+
+
+-- !query 728
+SELECT cast(1 as double) < cast(1 as decimal(20, 0)) FROM t
+-- !query 728 schema
+struct<(CAST(1 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 728 output
+false
+
+
+-- !query 729
+SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(3, 0)) FROM t
+-- !query 729 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 729 output
+false
+
+
+-- !query 730
+SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(5, 0)) FROM t
+-- !query 730 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 730 output
+false
+
+
+-- !query 731
+SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(10, 0)) FROM t
+-- !query 731 schema
+struct<(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 731 output
+false
+
+
+-- !query 732
+SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(20, 0)) FROM t
+-- !query 732 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 732 output
+false
+
+
+-- !query 733
+SELECT cast('1' as binary) < cast(1 as decimal(3, 0)) FROM t
+-- !query 733 schema
+struct<>
+-- !query 733 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 734
+SELECT cast('1' as binary) < cast(1 as decimal(5, 0)) FROM t
+-- !query 734 schema
+struct<>
+-- !query 734 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 735
+SELECT cast('1' as binary) < cast(1 as decimal(10, 0)) FROM t
+-- !query 735 schema
+struct<>
+-- !query 735 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 736
+SELECT cast('1' as binary) < cast(1 as decimal(20, 0)) FROM t
+-- !query 736 schema
+struct<>
+-- !query 736 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 737
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(3, 0)) FROM t
+-- !query 737 schema
+struct<>
+-- !query 737 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 738
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(5, 0)) FROM t
+-- !query 738 schema
+struct<>
+-- !query 738 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 739
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(10, 0)) FROM t
+-- !query 739 schema
+struct<>
+-- !query 739 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 740
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(20, 0)) FROM t
+-- !query 740 schema
+struct<>
+-- !query 740 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 741
+SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(3, 0)) FROM t
+-- !query 741 schema
+struct<>
+-- !query 741 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 742
+SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(5, 0)) FROM t
+-- !query 742 schema
+struct<>
+-- !query 742 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 743
+SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(10, 0)) FROM t
+-- !query 743 schema
+struct<>
+-- !query 743 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 744
+SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(20, 0)) FROM t
+-- !query 744 schema
+struct<>
+-- !query 744 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 745
+SELECT cast(1 as decimal(3, 0))  < cast(1 as tinyint) FROM t
+-- !query 745 schema
+struct<(CAST(1 AS DECIMAL(3,0)) < CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
+-- !query 745 output
+false
+
+
+-- !query 746
+SELECT cast(1 as decimal(5, 0))  < cast(1 as tinyint) FROM t
+-- !query 746 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) < CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 746 output
+false
+
+
+-- !query 747
+SELECT cast(1 as decimal(10, 0)) < cast(1 as tinyint) FROM t
+-- !query 747 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 747 output
+false
+
+
+-- !query 748
+SELECT cast(1 as decimal(20, 0)) < cast(1 as tinyint) FROM t
+-- !query 748 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 748 output
+false
+
+
+-- !query 749
+SELECT cast(1 as decimal(3, 0))  < cast(1 as smallint) FROM t
+-- !query 749 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) < CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 749 output
+false
+
+
+-- !query 750
+SELECT cast(1 as decimal(5, 0))  < cast(1 as smallint) FROM t
+-- !query 750 schema
+struct<(CAST(1 AS DECIMAL(5,0)) < CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
+-- !query 750 output
+false
+
+
+-- !query 751
+SELECT cast(1 as decimal(10, 0)) < cast(1 as smallint) FROM t
+-- !query 751 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 751 output
+false
+
+
+-- !query 752
+SELECT cast(1 as decimal(20, 0)) < cast(1 as smallint) FROM t
+-- !query 752 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 752 output
+false
+
+
+-- !query 753
+SELECT cast(1 as decimal(3, 0))  < cast(1 as int) FROM t
+-- !query 753 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) < CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 753 output
+false
+
+
+-- !query 754
+SELECT cast(1 as decimal(5, 0))  < cast(1 as int) FROM t
+-- !query 754 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) < CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 754 output
+false
+
+
+-- !query 755
+SELECT cast(1 as decimal(10, 0)) < cast(1 as int) FROM t
+-- !query 755 schema
+struct<(CAST(1 AS DECIMAL(10,0)) < CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
+-- !query 755 output
+false
+
+
+-- !query 756
+SELECT cast(1 as decimal(20, 0)) < cast(1 as int) FROM t
+-- !query 756 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 756 output
+false
+
+
+-- !query 757
+SELECT cast(1 as decimal(3, 0))  < cast(1 as bigint) FROM t
+-- !query 757 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 757 output
+false
+
+
+-- !query 758
+SELECT cast(1 as decimal(5, 0))  < cast(1 as bigint) FROM t
+-- !query 758 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 758 output
+false
+
+
+-- !query 759
+SELECT cast(1 as decimal(10, 0)) < cast(1 as bigint) FROM t
+-- !query 759 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 759 output
+false
+
+
+-- !query 760
+SELECT cast(1 as decimal(20, 0)) < cast(1 as bigint) FROM t
+-- !query 760 schema
+struct<(CAST(1 AS DECIMAL(20,0)) < CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
+-- !query 760 output
+false
+
+
+-- !query 761
+SELECT cast(1 as decimal(3, 0))  < cast(1 as float) FROM t
+-- !query 761 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) < CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 761 output
+false
+
+
+-- !query 762
+SELECT cast(1 as decimal(5, 0))  < cast(1 as float) FROM t
+-- !query 762 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) < CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 762 output
+false
+
+
+-- !query 763
+SELECT cast(1 as decimal(10, 0)) < cast(1 as float) FROM t
+-- !query 763 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 763 output
+false
+
+
+-- !query 764
+SELECT cast(1 as decimal(20, 0)) < cast(1 as float) FROM t
+-- !query 764 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) < CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 764 output
+false
+
+
+-- !query 765
+SELECT cast(1 as decimal(3, 0))  < cast(1 as double) FROM t
+-- !query 765 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
+-- !query 765 output
+false
+
+
+-- !query 766
+SELECT cast(1 as decimal(5, 0))  < cast(1 as double) FROM t
+-- !query 766 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
+-- !query 766 output
+false
+
+
+-- !query 767
+SELECT cast(1 as decimal(10, 0)) < cast(1 as double) FROM t
+-- !query 767 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
+-- !query 767 output
+false
+
+
+-- !query 768
+SELECT cast(1 as decimal(20, 0)) < cast(1 as double) FROM t
+-- !query 768 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
+-- !query 768 output
+false
+
+
+-- !query 769
+SELECT cast(1 as decimal(3, 0))  < cast(1 as decimal(10, 0)) FROM t
+-- !query 769 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 769 output
+false
+
+
+-- !query 770
+SELECT cast(1 as decimal(5, 0))  < cast(1 as decimal(10, 0)) FROM t
+-- !query 770 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 770 output
+false
+
+
+-- !query 771
+SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(10, 0)) FROM t
+-- !query 771 schema
+struct<(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 771 output
+false
+
+
+-- !query 772
+SELECT cast(1 as decimal(20, 0)) < cast(1 as decimal(10, 0)) FROM t
+-- !query 772 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 772 output
+false
+
+
+-- !query 773
+SELECT cast(1 as decimal(3, 0))  < cast(1 as string) FROM t
+-- !query 773 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) < CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 773 output
+false
+
+
+-- !query 774
+SELECT cast(1 as decimal(5, 0))  < cast(1 as string) FROM t
+-- !query 774 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) < CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 774 output
+false
+
+
+-- !query 775
+SELECT cast(1 as decimal(10, 0)) < cast(1 as string) FROM t
+-- !query 775 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 775 output
+false
+
+
+-- !query 776
+SELECT cast(1 as decimal(20, 0)) < cast(1 as string) FROM t
+-- !query 776 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) < CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 776 output
+false
+
+
+-- !query 777
+SELECT cast(1 as decimal(3, 0))  < cast('1' as binary) FROM t
+-- !query 777 schema
+struct<>
+-- !query 777 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 778
+SELECT cast(1 as decimal(5, 0))  < cast('1' as binary) FROM t
+-- !query 778 schema
+struct<>
+-- !query 778 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 779
+SELECT cast(1 as decimal(10, 0)) < cast('1' as binary) FROM t
+-- !query 779 schema
+struct<>
+-- !query 779 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 780
+SELECT cast(1 as decimal(20, 0)) < cast('1' as binary) FROM t
+-- !query 780 schema
+struct<>
+-- !query 780 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 781
+SELECT cast(1 as decimal(3, 0))  < cast(1 as boolean) FROM t
+-- !query 781 schema
+struct<>
+-- !query 781 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+
+
+-- !query 782
+SELECT cast(1 as decimal(5, 0))  < cast(1 as boolean) FROM t
+-- !query 782 schema
+struct<>
+-- !query 782 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+
+
+-- !query 783
+SELECT cast(1 as decimal(10, 0)) < cast(1 as boolean) FROM t
+-- !query 783 schema
+struct<>
+-- !query 783 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 784
+SELECT cast(1 as decimal(20, 0)) < cast(1 as boolean) FROM t
+-- !query 784 schema
+struct<>
+-- !query 784 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+
+
+-- !query 785
+SELECT cast(1 as decimal(3, 0))  < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 785 schema
+struct<>
+-- !query 785 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 786
+SELECT cast(1 as decimal(5, 0))  < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 786 schema
+struct<>
+-- !query 786 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 787
+SELECT cast(1 as decimal(10, 0)) < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 787 schema
+struct<>
+-- !query 787 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 788
+SELECT cast(1 as decimal(20, 0)) < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 788 schema
+struct<>
+-- !query 788 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 789
+SELECT cast(1 as decimal(3, 0))  < cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 789 schema
+struct<>
+-- !query 789 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 790
+SELECT cast(1 as decimal(5, 0))  < cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 790 schema
+struct<>
+-- !query 790 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 791
+SELECT cast(1 as decimal(10, 0)) < cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 791 schema
+struct<>
+-- !query 791 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 792
+SELECT cast(1 as decimal(20, 0)) < cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 792 schema
+struct<>
+-- !query 792 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 793
+SELECT cast(1 as tinyint) <= cast(1 as decimal(3, 0)) FROM t
+-- !query 793 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) <= CAST(1 AS DECIMAL(3,0))):boolean>
+-- !query 793 output
+true
+
+
+-- !query 794
+SELECT cast(1 as tinyint) <= cast(1 as decimal(5, 0)) FROM t
+-- !query 794 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 794 output
+true
+
+
+-- !query 795
+SELECT cast(1 as tinyint) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 795 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 795 output
+true
+
+
+-- !query 796
+SELECT cast(1 as tinyint) <= cast(1 as decimal(20, 0)) FROM t
+-- !query 796 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 796 output
+true
+
+
+-- !query 797
+SELECT cast(1 as smallint) <= cast(1 as decimal(3, 0)) FROM t
+-- !query 797 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 797 output
+true
+
+
+-- !query 798
+SELECT cast(1 as smallint) <= cast(1 as decimal(5, 0)) FROM t
+-- !query 798 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) <= CAST(1 AS DECIMAL(5,0))):boolean>
+-- !query 798 output
+true
+
+
+-- !query 799
+SELECT cast(1 as smallint) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 799 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 799 output
+true
+
+
+-- !query 800
+SELECT cast(1 as smallint) <= cast(1 as decimal(20, 0)) FROM t
+-- !query 800 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 800 output
+true
+
+
+-- !query 801
+SELECT cast(1 as int) <= cast(1 as decimal(3, 0)) FROM t
+-- !query 801 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 801 output
+true
+
+
+-- !query 802
+SELECT cast(1 as int) <= cast(1 as decimal(5, 0)) FROM t
+-- !query 802 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 802 output
+true
+
+
+-- !query 803
+SELECT cast(1 as int) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 803 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) <= CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 803 output
+true
+
+
+-- !query 804
+SELECT cast(1 as int) <= cast(1 as decimal(20, 0)) FROM t
+-- !query 804 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 804 output
+true
+
+
+-- !query 805
+SELECT cast(1 as bigint) <= cast(1 as decimal(3, 0)) FROM t
+-- !query 805 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 805 output
+true
+
+
+-- !query 806
+SELECT cast(1 as bigint) <= cast(1 as decimal(5, 0)) FROM t
+-- !query 806 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 806 output
+true
+
+
+-- !query 807
+SELECT cast(1 as bigint) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 807 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 807 output
+true
+
+
+-- !query 808
+SELECT cast(1 as bigint) <= cast(1 as decimal(20, 0)) FROM t
+-- !query 808 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) <= CAST(1 AS DECIMAL(20,0))):boolean>
+-- !query 808 output
+true
+
+
+-- !query 809
+SELECT cast(1 as float) <= cast(1 as decimal(3, 0)) FROM t
+-- !query 809 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 809 output
+true
+
+
+-- !query 810
+SELECT cast(1 as float) <= cast(1 as decimal(5, 0)) FROM t
+-- !query 810 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 810 output
+true
+
+
+-- !query 811
+SELECT cast(1 as float) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 811 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 811 output
+true
+
+
+-- !query 812
+SELECT cast(1 as float) <= cast(1 as decimal(20, 0)) FROM t
+-- !query 812 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 812 output
+true
+
+
+-- !query 813
+SELECT cast(1 as double) <= cast(1 as decimal(3, 0)) FROM t
+-- !query 813 schema
+struct<(CAST(1 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 813 output
+true
+
+
+-- !query 814
+SELECT cast(1 as double) <= cast(1 as decimal(5, 0)) FROM t
+-- !query 814 schema
+struct<(CAST(1 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 814 output
+true
+
+
+-- !query 815
+SELECT cast(1 as double) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 815 schema
+struct<(CAST(1 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 815 output
+true
+
+
+-- !query 816
+SELECT cast(1 as double) <= cast(1 as decimal(20, 0)) FROM t
+-- !query 816 schema
+struct<(CAST(1 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 816 output
+true
+
+
+-- !query 817
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(3, 0)) FROM t
+-- !query 817 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 817 output
+true
+
+
+-- !query 818
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(5, 0)) FROM t
+-- !query 818 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 818 output
+true
+
+
+-- !query 819
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 819 schema
+struct<(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 819 output
+true
+
+
+-- !query 820
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(20, 0)) FROM t
+-- !query 820 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 820 output
+true
+
+
+-- !query 821
+SELECT cast('1' as binary) <= cast(1 as decimal(3, 0)) FROM t
+-- !query 821 schema
+struct<>
+-- !query 821 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 822
+SELECT cast('1' as binary) <= cast(1 as decimal(5, 0)) FROM t
+-- !query 822 schema
+struct<>
+-- !query 822 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 823
+SELECT cast('1' as binary) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 823 schema
+struct<>
+-- !query 823 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 824
+SELECT cast('1' as binary) <= cast(1 as decimal(20, 0)) FROM t
+-- !query 824 schema
+struct<>
+-- !query 824 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 825
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(3, 0)) FROM t
+-- !query 825 schema
+struct<>
+-- !query 825 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 826
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(5, 0)) FROM t
+-- !query 826 schema
+struct<>
+-- !query 826 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 827
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 827 schema
+struct<>
+-- !query 827 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 828
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(20, 0)) FROM t
+-- !query 828 schema
+struct<>
+-- !query 828 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 829
+SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(3, 0)) FROM t
+-- !query 829 schema
+struct<>
+-- !query 829 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 830
+SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(5, 0)) FROM t
+-- !query 830 schema
+struct<>
+-- !query 830 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 831
+SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 831 schema
+struct<>
+-- !query 831 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 832
+SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(20, 0)) FROM t
+-- !query 832 schema
+struct<>
+-- !query 832 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 833
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as tinyint) FROM t
+-- !query 833 schema
+struct<(CAST(1 AS DECIMAL(3,0)) <= CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
+-- !query 833 output
+true
+
+
+-- !query 834
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as tinyint) FROM t
+-- !query 834 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) <= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 834 output
+true
+
+
+-- !query 835
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as tinyint) FROM t
+-- !query 835 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 835 output
+true
+
+
+-- !query 836
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as tinyint) FROM t
+-- !query 836 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 836 output
+true
+
+
+-- !query 837
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as smallint) FROM t
+-- !query 837 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) <= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 837 output
+true
+
+
+-- !query 838
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as smallint) FROM t
+-- !query 838 schema
+struct<(CAST(1 AS DECIMAL(5,0)) <= CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
+-- !query 838 output
+true
+
+
+-- !query 839
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as smallint) FROM t
+-- !query 839 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 839 output
+true
+
+
+-- !query 840
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as smallint) FROM t
+-- !query 840 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 840 output
+true
+
+
+-- !query 841
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as int) FROM t
+-- !query 841 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) <= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 841 output
+true
+
+
+-- !query 842
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as int) FROM t
+-- !query 842 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) <= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 842 output
+true
+
+
+-- !query 843
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as int) FROM t
+-- !query 843 schema
+struct<(CAST(1 AS DECIMAL(10,0)) <= CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
+-- !query 843 output
+true
+
+
+-- !query 844
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as int) FROM t
+-- !query 844 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 844 output
+true
+
+
+-- !query 845
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as bigint) FROM t
+-- !query 845 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 845 output
+true
+
+
+-- !query 846
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as bigint) FROM t
+-- !query 846 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 846 output
+true
+
+
+-- !query 847
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as bigint) FROM t
+-- !query 847 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 847 output
+true
+
+
+-- !query 848
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as bigint) FROM t
+-- !query 848 schema
+struct<(CAST(1 AS DECIMAL(20,0)) <= CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
+-- !query 848 output
+true
+
+
+-- !query 849
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as float) FROM t
+-- !query 849 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 849 output
+true
+
+
+-- !query 850
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as float) FROM t
+-- !query 850 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 850 output
+true
+
+
+-- !query 851
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as float) FROM t
+-- !query 851 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 851 output
+true
+
+
+-- !query 852
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as float) FROM t
+-- !query 852 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 852 output
+true
+
+
+-- !query 853
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as double) FROM t
+-- !query 853 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
+-- !query 853 output
+true
+
+
+-- !query 854
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as double) FROM t
+-- !query 854 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
+-- !query 854 output
+true
+
+
+-- !query 855
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as double) FROM t
+-- !query 855 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
+-- !query 855 output
+true
+
+
+-- !query 856
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as double) FROM t
+-- !query 856 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
+-- !query 856 output
+true
+
+
+-- !query 857
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as decimal(10, 0)) FROM t
+-- !query 857 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 857 output
+true
+
+
+-- !query 858
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as decimal(10, 0)) FROM t
+-- !query 858 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 858 output
+true
+
+
+-- !query 859
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 859 schema
+struct<(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 859 output
+true
+
+
+-- !query 860
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as decimal(10, 0)) FROM t
+-- !query 860 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 860 output
+true
+
+
+-- !query 861
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as string) FROM t
+-- !query 861 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 861 output
+true
+
+
+-- !query 862
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as string) FROM t
+-- !query 862 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 862 output
+true
+
+
+-- !query 863
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as string) FROM t
+-- !query 863 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 863 output
+true
+
+
+-- !query 864
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as string) FROM t
+-- !query 864 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 864 output
+true
+
+
+-- !query 865
+SELECT cast(1 as decimal(3, 0))  <= cast('1' as binary) FROM t
+-- !query 865 schema
+struct<>
+-- !query 865 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 866
+SELECT cast(1 as decimal(5, 0))  <= cast('1' as binary) FROM t
+-- !query 866 schema
+struct<>
+-- !query 866 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 867
+SELECT cast(1 as decimal(10, 0)) <= cast('1' as binary) FROM t
+-- !query 867 schema
+struct<>
+-- !query 867 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 868
+SELECT cast(1 as decimal(20, 0)) <= cast('1' as binary) FROM t
+-- !query 868 schema
+struct<>
+-- !query 868 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 869
+SELECT cast(1 as decimal(3, 0))  <= cast(1 as boolean) FROM t
+-- !query 869 schema
+struct<>
+-- !query 869 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+
+
+-- !query 870
+SELECT cast(1 as decimal(5, 0))  <= cast(1 as boolean) FROM t
+-- !query 870 schema
+struct<>
+-- !query 870 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+
+
+-- !query 871
+SELECT cast(1 as decimal(10, 0)) <= cast(1 as boolean) FROM t
+-- !query 871 schema
+struct<>
+-- !query 871 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 872
+SELECT cast(1 as decimal(20, 0)) <= cast(1 as boolean) FROM t
+-- !query 872 schema
+struct<>
+-- !query 872 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+
+
+-- !query 873
+SELECT cast(1 as decimal(3, 0))  <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 873 schema
+struct<>
+-- !query 873 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 874
+SELECT cast(1 as decimal(5, 0))  <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 874 schema
+struct<>
+-- !query 874 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 875
+SELECT cast(1 as decimal(10, 0)) <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 875 schema
+struct<>
+-- !query 875 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 876
+SELECT cast(1 as decimal(20, 0)) <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 876 schema
+struct<>
+-- !query 876 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 877
+SELECT cast(1 as decimal(3, 0))  <= cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 877 schema
+struct<>
+-- !query 877 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 878
+SELECT cast(1 as decimal(5, 0))  <= cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 878 schema
+struct<>
+-- !query 878 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 879
+SELECT cast(1 as decimal(10, 0)) <= cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 879 schema
+struct<>
+-- !query 879 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 880
+SELECT cast(1 as decimal(20, 0)) <= cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 880 schema
+struct<>
+-- !query 880 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 881
+SELECT cast(1 as tinyint) > cast(1 as decimal(3, 0)) FROM t
+-- !query 881 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) > CAST(1 AS DECIMAL(3,0))):boolean>
+-- !query 881 output
+false
+
+
+-- !query 882
+SELECT cast(1 as tinyint) > cast(1 as decimal(5, 0)) FROM t
+-- !query 882 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) > CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 882 output
+false
+
+
+-- !query 883
+SELECT cast(1 as tinyint) > cast(1 as decimal(10, 0)) FROM t
+-- !query 883 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 883 output
+false
+
+
+-- !query 884
+SELECT cast(1 as tinyint) > cast(1 as decimal(20, 0)) FROM t
+-- !query 884 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 884 output
+false
+
+
+-- !query 885
+SELECT cast(1 as smallint) > cast(1 as decimal(3, 0)) FROM t
+-- !query 885 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) > CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 885 output
+false
+
+
+-- !query 886
+SELECT cast(1 as smallint) > cast(1 as decimal(5, 0)) FROM t
+-- !query 886 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) > CAST(1 AS DECIMAL(5,0))):boolean>
+-- !query 886 output
+false
+
+
+-- !query 887
+SELECT cast(1 as smallint) > cast(1 as decimal(10, 0)) FROM t
+-- !query 887 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 887 output
+false
+
+
+-- !query 888
+SELECT cast(1 as smallint) > cast(1 as decimal(20, 0)) FROM t
+-- !query 888 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 888 output
+false
+
+
+-- !query 889
+SELECT cast(1 as int) > cast(1 as decimal(3, 0)) FROM t
+-- !query 889 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 889 output
+false
+
+
+-- !query 890
+SELECT cast(1 as int) > cast(1 as decimal(5, 0)) FROM t
+-- !query 890 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 890 output
+false
+
+
+-- !query 891
+SELECT cast(1 as int) > cast(1 as decimal(10, 0)) FROM t
+-- !query 891 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) > CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 891 output
+false
+
+
+-- !query 892
+SELECT cast(1 as int) > cast(1 as decimal(20, 0)) FROM t
+-- !query 892 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 892 output
+false
+
+
+-- !query 893
+SELECT cast(1 as bigint) > cast(1 as decimal(3, 0)) FROM t
+-- !query 893 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 893 output
+false
+
+
+-- !query 894
+SELECT cast(1 as bigint) > cast(1 as decimal(5, 0)) FROM t
+-- !query 894 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 894 output
+false
+
+
+-- !query 895
+SELECT cast(1 as bigint) > cast(1 as decimal(10, 0)) FROM t
+-- !query 895 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 895 output
+false
+
+
+-- !query 896
+SELECT cast(1 as bigint) > cast(1 as decimal(20, 0)) FROM t
+-- !query 896 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) > CAST(1 AS DECIMAL(20,0))):boolean>
+-- !query 896 output
+false
+
+
+-- !query 897
+SELECT cast(1 as float) > cast(1 as decimal(3, 0)) FROM t
+-- !query 897 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) > CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 897 output
+false
+
+
+-- !query 898
+SELECT cast(1 as float) > cast(1 as decimal(5, 0)) FROM t
+-- !query 898 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) > CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 898 output
+false
+
+
+-- !query 899
+SELECT cast(1 as float) > cast(1 as decimal(10, 0)) FROM t
+-- !query 899 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) > CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 899 output
+false
+
+
+-- !query 900
+SELECT cast(1 as float) > cast(1 as decimal(20, 0)) FROM t
+-- !query 900 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) > CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 900 output
+false
+
+
+-- !query 901
+SELECT cast(1 as double) > cast(1 as decimal(3, 0)) FROM t
+-- !query 901 schema
+struct<(CAST(1 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 901 output
+false
+
+
+-- !query 902
+SELECT cast(1 as double) > cast(1 as decimal(5, 0)) FROM t
+-- !query 902 schema
+struct<(CAST(1 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 902 output
+false
+
+
+-- !query 903
+SELECT cast(1 as double) > cast(1 as decimal(10, 0)) FROM t
+-- !query 903 schema
+struct<(CAST(1 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 903 output
+false
+
+
+-- !query 904
+SELECT cast(1 as double) > cast(1 as decimal(20, 0)) FROM t
+-- !query 904 schema
+struct<(CAST(1 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 904 output
+false
+
+
+-- !query 905
+SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(3, 0)) FROM t
+-- !query 905 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 905 output
+false
+
+
+-- !query 906
+SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(5, 0)) FROM t
+-- !query 906 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 906 output
+false
+
+
+-- !query 907
+SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(10, 0)) FROM t
+-- !query 907 schema
+struct<(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 907 output
+false
+
+
+-- !query 908
+SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(20, 0)) FROM t
+-- !query 908 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 908 output
+false
+
+
+-- !query 909
+SELECT cast('1' as binary) > cast(1 as decimal(3, 0)) FROM t
+-- !query 909 schema
+struct<>
+-- !query 909 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 910
+SELECT cast('1' as binary) > cast(1 as decimal(5, 0)) FROM t
+-- !query 910 schema
+struct<>
+-- !query 910 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 911
+SELECT cast('1' as binary) > cast(1 as decimal(10, 0)) FROM t
+-- !query 911 schema
+struct<>
+-- !query 911 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 912
+SELECT cast('1' as binary) > cast(1 as decimal(20, 0)) FROM t
+-- !query 912 schema
+struct<>
+-- !query 912 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 913
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(3, 0)) FROM t
+-- !query 913 schema
+struct<>
+-- !query 913 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 914
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(5, 0)) FROM t
+-- !query 914 schema
+struct<>
+-- !query 914 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 915
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(10, 0)) FROM t
+-- !query 915 schema
+struct<>
+-- !query 915 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 916
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(20, 0)) FROM t
+-- !query 916 schema
+struct<>
+-- !query 916 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 917
+SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(3, 0)) FROM t
+-- !query 917 schema
+struct<>
+-- !query 917 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 918
+SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(5, 0)) FROM t
+-- !query 918 schema
+struct<>
+-- !query 918 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 919
+SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(10, 0)) FROM t
+-- !query 919 schema
+struct<>
+-- !query 919 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 920
+SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(20, 0)) FROM t
+-- !query 920 schema
+struct<>
+-- !query 920 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 921
+SELECT cast(1 as decimal(3, 0))  > cast(1 as tinyint) FROM t
+-- !query 921 schema
+struct<(CAST(1 AS DECIMAL(3,0)) > CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
+-- !query 921 output
+false
+
+
+-- !query 922
+SELECT cast(1 as decimal(5, 0))  > cast(1 as tinyint) FROM t
+-- !query 922 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) > CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 922 output
+false
+
+
+-- !query 923
+SELECT cast(1 as decimal(10, 0)) > cast(1 as tinyint) FROM t
+-- !query 923 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 923 output
+false
+
+
+-- !query 924
+SELECT cast(1 as decimal(20, 0)) > cast(1 as tinyint) FROM t
+-- !query 924 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 924 output
+false
+
+
+-- !query 925
+SELECT cast(1 as decimal(3, 0))  > cast(1 as smallint) FROM t
+-- !query 925 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) > CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 925 output
+false
+
+
+-- !query 926
+SELECT cast(1 as decimal(5, 0))  > cast(1 as smallint) FROM t
+-- !query 926 schema
+struct<(CAST(1 AS DECIMAL(5,0)) > CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
+-- !query 926 output
+false
+
+
+-- !query 927
+SELECT cast(1 as decimal(10, 0)) > cast(1 as smallint) FROM t
+-- !query 927 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 927 output
+false
+
+
+-- !query 928
+SELECT cast(1 as decimal(20, 0)) > cast(1 as smallint) FROM t
+-- !query 928 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 928 output
+false
+
+
+-- !query 929
+SELECT cast(1 as decimal(3, 0))  > cast(1 as int) FROM t
+-- !query 929 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) > CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 929 output
+false
+
+
+-- !query 930
+SELECT cast(1 as decimal(5, 0))  > cast(1 as int) FROM t
+-- !query 930 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) > CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 930 output
+false
+
+
+-- !query 931
+SELECT cast(1 as decimal(10, 0)) > cast(1 as int) FROM t
+-- !query 931 schema
+struct<(CAST(1 AS DECIMAL(10,0)) > CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
+-- !query 931 output
+false
+
+
+-- !query 932
+SELECT cast(1 as decimal(20, 0)) > cast(1 as int) FROM t
+-- !query 932 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 932 output
+false
+
+
+-- !query 933
+SELECT cast(1 as decimal(3, 0))  > cast(1 as bigint) FROM t
+-- !query 933 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 933 output
+false
+
+
+-- !query 934
+SELECT cast(1 as decimal(5, 0))  > cast(1 as bigint) FROM t
+-- !query 934 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 934 output
+false
+
+
+-- !query 935
+SELECT cast(1 as decimal(10, 0)) > cast(1 as bigint) FROM t
+-- !query 935 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 935 output
+false
+
+
+-- !query 936
+SELECT cast(1 as decimal(20, 0)) > cast(1 as bigint) FROM t
+-- !query 936 schema
+struct<(CAST(1 AS DECIMAL(20,0)) > CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
+-- !query 936 output
+false
+
+
+-- !query 937
+SELECT cast(1 as decimal(3, 0))  > cast(1 as float) FROM t
+-- !query 937 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) > CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 937 output
+false
+
+
+-- !query 938
+SELECT cast(1 as decimal(5, 0))  > cast(1 as float) FROM t
+-- !query 938 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) > CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 938 output
+false
+
+
+-- !query 939
+SELECT cast(1 as decimal(10, 0)) > cast(1 as float) FROM t
+-- !query 939 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 939 output
+false
+
+
+-- !query 940
+SELECT cast(1 as decimal(20, 0)) > cast(1 as float) FROM t
+-- !query 940 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) > CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 940 output
+false
+
+
+-- !query 941
+SELECT cast(1 as decimal(3, 0))  > cast(1 as double) FROM t
+-- !query 941 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
+-- !query 941 output
+false
+
+
+-- !query 942
+SELECT cast(1 as decimal(5, 0))  > cast(1 as double) FROM t
+-- !query 942 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
+-- !query 942 output
+false
+
+
+-- !query 943
+SELECT cast(1 as decimal(10, 0)) > cast(1 as double) FROM t
+-- !query 943 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
+-- !query 943 output
+false
+
+
+-- !query 944
+SELECT cast(1 as decimal(20, 0)) > cast(1 as double) FROM t
+-- !query 944 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
+-- !query 944 output
+false
+
+
+-- !query 945
+SELECT cast(1 as decimal(3, 0))  > cast(1 as decimal(10, 0)) FROM t
+-- !query 945 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 945 output
+false
+
+
+-- !query 946
+SELECT cast(1 as decimal(5, 0))  > cast(1 as decimal(10, 0)) FROM t
+-- !query 946 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 946 output
+false
+
+
+-- !query 947
+SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(10, 0)) FROM t
+-- !query 947 schema
+struct<(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 947 output
+false
+
+
+-- !query 948
+SELECT cast(1 as decimal(20, 0)) > cast(1 as decimal(10, 0)) FROM t
+-- !query 948 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 948 output
+false
+
+
+-- !query 949
+SELECT cast(1 as decimal(3, 0))  > cast(1 as string) FROM t
+-- !query 949 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) > CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 949 output
+false
+
+
+-- !query 950
+SELECT cast(1 as decimal(5, 0))  > cast(1 as string) FROM t
+-- !query 950 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) > CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 950 output
+false
+
+
+-- !query 951
+SELECT cast(1 as decimal(10, 0)) > cast(1 as string) FROM t
+-- !query 951 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 951 output
+false
+
+
+-- !query 952
+SELECT cast(1 as decimal(20, 0)) > cast(1 as string) FROM t
+-- !query 952 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) > CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 952 output
+false
+
+
+-- !query 953
+SELECT cast(1 as decimal(3, 0))  > cast('1' as binary) FROM t
+-- !query 953 schema
+struct<>
+-- !query 953 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 954
+SELECT cast(1 as decimal(5, 0))  > cast('1' as binary) FROM t
+-- !query 954 schema
+struct<>
+-- !query 954 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 955
+SELECT cast(1 as decimal(10, 0)) > cast('1' as binary) FROM t
+-- !query 955 schema
+struct<>
+-- !query 955 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 956
+SELECT cast(1 as decimal(20, 0)) > cast('1' as binary) FROM t
+-- !query 956 schema
+struct<>
+-- !query 956 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 957
+SELECT cast(1 as decimal(3, 0))  > cast(1 as boolean) FROM t
+-- !query 957 schema
+struct<>
+-- !query 957 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+
+
+-- !query 958
+SELECT cast(1 as decimal(5, 0))  > cast(1 as boolean) FROM t
+-- !query 958 schema
+struct<>
+-- !query 958 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+
+
+-- !query 959
+SELECT cast(1 as decimal(10, 0)) > cast(1 as boolean) FROM t
+-- !query 959 schema
+struct<>
+-- !query 959 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 960
+SELECT cast(1 as decimal(20, 0)) > cast(1 as boolean) FROM t
+-- !query 960 schema
+struct<>
+-- !query 960 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+
+
+-- !query 961
+SELECT cast(1 as decimal(3, 0))  > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 961 schema
+struct<>
+-- !query 961 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 962
+SELECT cast(1 as decimal(5, 0))  > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 962 schema
+struct<>
+-- !query 962 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 963
+SELECT cast(1 as decimal(10, 0)) > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 963 schema
+struct<>
+-- !query 963 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 964
+SELECT cast(1 as decimal(20, 0)) > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 964 schema
+struct<>
+-- !query 964 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 965
+SELECT cast(1 as decimal(3, 0))  > cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 965 schema
+struct<>
+-- !query 965 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 966
+SELECT cast(1 as decimal(5, 0))  > cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 966 schema
+struct<>
+-- !query 966 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 967
+SELECT cast(1 as decimal(10, 0)) > cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 967 schema
+struct<>
+-- !query 967 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 968
+SELECT cast(1 as decimal(20, 0)) > cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 968 schema
+struct<>
+-- !query 968 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 969
+SELECT cast(1 as tinyint) >= cast(1 as decimal(3, 0)) FROM t
+-- !query 969 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) >= CAST(1 AS DECIMAL(3,0))):boolean>
+-- !query 969 output
+true
+
+
+-- !query 970
+SELECT cast(1 as tinyint) >= cast(1 as decimal(5, 0)) FROM t
+-- !query 970 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 970 output
+true
+
+
+-- !query 971
+SELECT cast(1 as tinyint) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 971 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 971 output
+true
+
+
+-- !query 972
+SELECT cast(1 as tinyint) >= cast(1 as decimal(20, 0)) FROM t
+-- !query 972 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 972 output
+true
+
+
+-- !query 973
+SELECT cast(1 as smallint) >= cast(1 as decimal(3, 0)) FROM t
+-- !query 973 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 973 output
+true
+
+
+-- !query 974
+SELECT cast(1 as smallint) >= cast(1 as decimal(5, 0)) FROM t
+-- !query 974 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) >= CAST(1 AS DECIMAL(5,0))):boolean>
+-- !query 974 output
+true
+
+
+-- !query 975
+SELECT cast(1 as smallint) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 975 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 975 output
+true
+
+
+-- !query 976
+SELECT cast(1 as smallint) >= cast(1 as decimal(20, 0)) FROM t
+-- !query 976 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 976 output
+true
+
+
+-- !query 977
+SELECT cast(1 as int) >= cast(1 as decimal(3, 0)) FROM t
+-- !query 977 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 977 output
+true
+
+
+-- !query 978
+SELECT cast(1 as int) >= cast(1 as decimal(5, 0)) FROM t
+-- !query 978 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 978 output
+true
+
+
+-- !query 979
+SELECT cast(1 as int) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 979 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) >= CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 979 output
+true
+
+
+-- !query 980
+SELECT cast(1 as int) >= cast(1 as decimal(20, 0)) FROM t
+-- !query 980 schema
+struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 980 output
+true
+
+
+-- !query 981
+SELECT cast(1 as bigint) >= cast(1 as decimal(3, 0)) FROM t
+-- !query 981 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 981 output
+true
+
+
+-- !query 982
+SELECT cast(1 as bigint) >= cast(1 as decimal(5, 0)) FROM t
+-- !query 982 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 982 output
+true
+
+
+-- !query 983
+SELECT cast(1 as bigint) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 983 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 983 output
+true
+
+
+-- !query 984
+SELECT cast(1 as bigint) >= cast(1 as decimal(20, 0)) FROM t
+-- !query 984 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) >= CAST(1 AS DECIMAL(20,0))):boolean>
+-- !query 984 output
+true
+
+
+-- !query 985
+SELECT cast(1 as float) >= cast(1 as decimal(3, 0)) FROM t
+-- !query 985 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 985 output
+true
+
+
+-- !query 986
+SELECT cast(1 as float) >= cast(1 as decimal(5, 0)) FROM t
+-- !query 986 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 986 output
+true
+
+
+-- !query 987
+SELECT cast(1 as float) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 987 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 987 output
+true
+
+
+-- !query 988
+SELECT cast(1 as float) >= cast(1 as decimal(20, 0)) FROM t
+-- !query 988 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 988 output
+true
+
+
+-- !query 989
+SELECT cast(1 as double) >= cast(1 as decimal(3, 0)) FROM t
+-- !query 989 schema
+struct<(CAST(1 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
+-- !query 989 output
+true
+
+
+-- !query 990
+SELECT cast(1 as double) >= cast(1 as decimal(5, 0)) FROM t
+-- !query 990 schema
+struct<(CAST(1 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
+-- !query 990 output
+true
+
+
+-- !query 991
+SELECT cast(1 as double) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 991 schema
+struct<(CAST(1 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 991 output
+true
+
+
+-- !query 992
+SELECT cast(1 as double) >= cast(1 as decimal(20, 0)) FROM t
+-- !query 992 schema
+struct<(CAST(1 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
+-- !query 992 output
+true
+
+
+-- !query 993
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(3, 0)) FROM t
+-- !query 993 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 993 output
+true
+
+
+-- !query 994
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(5, 0)) FROM t
+-- !query 994 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 994 output
+true
+
+
+-- !query 995
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 995 schema
+struct<(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 995 output
+true
+
+
+-- !query 996
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(20, 0)) FROM t
+-- !query 996 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 996 output
+true
+
+
+-- !query 997
+SELECT cast('1' as binary) >= cast(1 as decimal(3, 0)) FROM t
+-- !query 997 schema
+struct<>
+-- !query 997 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 998
+SELECT cast('1' as binary) >= cast(1 as decimal(5, 0)) FROM t
+-- !query 998 schema
+struct<>
+-- !query 998 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 999
+SELECT cast('1' as binary) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 999 schema
+struct<>
+-- !query 999 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 1000
+SELECT cast('1' as binary) >= cast(1 as decimal(20, 0)) FROM t
+-- !query 1000 schema
+struct<>
+-- !query 1000 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 1001
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(3, 0)) FROM t
+-- !query 1001 schema
+struct<>
+-- !query 1001 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 1002
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(5, 0)) FROM t
+-- !query 1002 schema
+struct<>
+-- !query 1002 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 1003
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 1003 schema
+struct<>
+-- !query 1003 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 1004
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(20, 0)) FROM t
+-- !query 1004 schema
+struct<>
+-- !query 1004 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 1005
+SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(3, 0)) FROM t
+-- !query 1005 schema
+struct<>
+-- !query 1005 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 1006
+SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(5, 0)) FROM t
+-- !query 1006 schema
+struct<>
+-- !query 1006 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 1007
+SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 1007 schema
+struct<>
+-- !query 1007 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 1008
+SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(20, 0)) FROM t
+-- !query 1008 schema
+struct<>
+-- !query 1008 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 1009
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as tinyint) FROM t
+-- !query 1009 schema
+struct<(CAST(1 AS DECIMAL(3,0)) >= CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
+-- !query 1009 output
+true
+
+
+-- !query 1010
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as tinyint) FROM t
+-- !query 1010 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) >= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
+-- !query 1010 output
+true
+
+
+-- !query 1011
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as tinyint) FROM t
+-- !query 1011 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
+-- !query 1011 output
+true
+
+
+-- !query 1012
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as tinyint) FROM t
+-- !query 1012 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
+-- !query 1012 output
+true
+
+
+-- !query 1013
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as smallint) FROM t
+-- !query 1013 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) >= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
+-- !query 1013 output
+true
+
+
+-- !query 1014
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as smallint) FROM t
+-- !query 1014 schema
+struct<(CAST(1 AS DECIMAL(5,0)) >= CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
+-- !query 1014 output
+true
+
+
+-- !query 1015
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as smallint) FROM t
+-- !query 1015 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
+-- !query 1015 output
+true
+
+
+-- !query 1016
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as smallint) FROM t
+-- !query 1016 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
+-- !query 1016 output
+true
+
+
+-- !query 1017
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as int) FROM t
+-- !query 1017 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) >= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 1017 output
+true
+
+
+-- !query 1018
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as int) FROM t
+-- !query 1018 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) >= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 1018 output
+true
+
+
+-- !query 1019
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as int) FROM t
+-- !query 1019 schema
+struct<(CAST(1 AS DECIMAL(10,0)) >= CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
+-- !query 1019 output
+true
+
+
+-- !query 1020
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as int) FROM t
+-- !query 1020 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 1020 output
+true
+
+
+-- !query 1021
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as bigint) FROM t
+-- !query 1021 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 1021 output
+true
+
+
+-- !query 1022
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as bigint) FROM t
+-- !query 1022 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 1022 output
+true
+
+
+-- !query 1023
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as bigint) FROM t
+-- !query 1023 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
+-- !query 1023 output
+true
+
+
+-- !query 1024
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as bigint) FROM t
+-- !query 1024 schema
+struct<(CAST(1 AS DECIMAL(20,0)) >= CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
+-- !query 1024 output
+true
+
+
+-- !query 1025
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as float) FROM t
+-- !query 1025 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) >= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 1025 output
+true
+
+
+-- !query 1026
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as float) FROM t
+-- !query 1026 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) >= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 1026 output
+true
+
+
+-- !query 1027
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as float) FROM t
+-- !query 1027 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 1027 output
+true
+
+
+-- !query 1028
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as float) FROM t
+-- !query 1028 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) >= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
+-- !query 1028 output
+true
+
+
+-- !query 1029
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as double) FROM t
+-- !query 1029 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
+-- !query 1029 output
+true
+
+
+-- !query 1030
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as double) FROM t
+-- !query 1030 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
+-- !query 1030 output
+true
+
+
+-- !query 1031
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as double) FROM t
+-- !query 1031 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
+-- !query 1031 output
+true
+
+
+-- !query 1032
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as double) FROM t
+-- !query 1032 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
+-- !query 1032 output
+true
+
+
+-- !query 1033
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as decimal(10, 0)) FROM t
+-- !query 1033 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 1033 output
+true
+
+
+-- !query 1034
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as decimal(10, 0)) FROM t
+-- !query 1034 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
+-- !query 1034 output
+true
+
+
+-- !query 1035
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 1035 schema
+struct<(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS DECIMAL(10,0))):boolean>
+-- !query 1035 output
+true
+
+
+-- !query 1036
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as decimal(10, 0)) FROM t
+-- !query 1036 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
+-- !query 1036 output
+true
+
+
+-- !query 1037
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as string) FROM t
+-- !query 1037 schema
+struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) >= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 1037 output
+true
+
+
+-- !query 1038
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as string) FROM t
+-- !query 1038 schema
+struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) >= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 1038 output
+true
+
+
+-- !query 1039
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as string) FROM t
+-- !query 1039 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 1039 output
+true
+
+
+-- !query 1040
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as string) FROM t
+-- !query 1040 schema
+struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) >= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
+-- !query 1040 output
+true
+
+
+-- !query 1041
+SELECT cast(1 as decimal(3, 0))  >= cast('1' as binary) FROM t
+-- !query 1041 schema
+struct<>
+-- !query 1041 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 1042
+SELECT cast(1 as decimal(5, 0))  >= cast('1' as binary) FROM t
+-- !query 1042 schema
+struct<>
+-- !query 1042 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 1043
+SELECT cast(1 as decimal(10, 0)) >= cast('1' as binary) FROM t
+-- !query 1043 schema
+struct<>
+-- !query 1043 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 1044
+SELECT cast(1 as decimal(20, 0)) >= cast('1' as binary) FROM t
+-- !query 1044 schema
+struct<>
+-- !query 1044 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 1045
+SELECT cast(1 as decimal(3, 0))  >= cast(1 as boolean) FROM t
+-- !query 1045 schema
+struct<>
+-- !query 1045 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+
+
+-- !query 1046
+SELECT cast(1 as decimal(5, 0))  >= cast(1 as boolean) FROM t
+-- !query 1046 schema
+struct<>
+-- !query 1046 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+
+
+-- !query 1047
+SELECT cast(1 as decimal(10, 0)) >= cast(1 as boolean) FROM t
+-- !query 1047 schema
+struct<>
+-- !query 1047 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 1048
+SELECT cast(1 as decimal(20, 0)) >= cast(1 as boolean) FROM t
+-- !query 1048 schema
+struct<>
+-- !query 1048 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+
+
+-- !query 1049
+SELECT cast(1 as decimal(3, 0))  >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 1049 schema
+struct<>
+-- !query 1049 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 1050
+SELECT cast(1 as decimal(5, 0))  >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 1050 schema
+struct<>
+-- !query 1050 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 1051
+SELECT cast(1 as decimal(10, 0)) >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 1051 schema
+struct<>
+-- !query 1051 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 1052
+SELECT cast(1 as decimal(20, 0)) >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 1052 schema
+struct<>
+-- !query 1052 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 1053
+SELECT cast(1 as decimal(3, 0))  >= cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 1053 schema
+struct<>
+-- !query 1053 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 1054
+SELECT cast(1 as decimal(5, 0))  >= cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 1054 schema
+struct<>
+-- !query 1054 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 1055
+SELECT cast(1 as decimal(10, 0)) >= cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 1055 schema
+struct<>
+-- !query 1055 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 1056
+SELECT cast(1 as decimal(20, 0)) >= cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 1056 schema
+struct<>
+-- !query 1056 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+
+
+-- !query 1057
+SELECT cast(1 as tinyint) <> cast(1 as decimal(3, 0)) FROM t
+-- !query 1057 schema
+struct<(NOT (CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) = CAST(1 AS DECIMAL(3,0)))):boolean>
+-- !query 1057 output
+false
+
+
+-- !query 1058
+SELECT cast(1 as tinyint) <> cast(1 as decimal(5, 0)) FROM t
+-- !query 1058 schema
+struct<(NOT (CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)))):boolean>
+-- !query 1058 output
+false
+
+
+-- !query 1059
+SELECT cast(1 as tinyint) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1059 schema
+struct<(NOT (CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1059 output
+false
+
+
+-- !query 1060
+SELECT cast(1 as tinyint) <> cast(1 as decimal(20, 0)) FROM t
+-- !query 1060 schema
+struct<(NOT (CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1060 output
+false
+
+
+-- !query 1061
+SELECT cast(1 as smallint) <> cast(1 as decimal(3, 0)) FROM t
+-- !query 1061 schema
+struct<(NOT (CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)))):boolean>
+-- !query 1061 output
+false
+
+
+-- !query 1062
+SELECT cast(1 as smallint) <> cast(1 as decimal(5, 0)) FROM t
+-- !query 1062 schema
+struct<(NOT (CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) = CAST(1 AS DECIMAL(5,0)))):boolean>
+-- !query 1062 output
+false
+
+
+-- !query 1063
+SELECT cast(1 as smallint) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1063 schema
+struct<(NOT (CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1063 output
+false
+
+
+-- !query 1064
+SELECT cast(1 as smallint) <> cast(1 as decimal(20, 0)) FROM t
+-- !query 1064 schema
+struct<(NOT (CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1064 output
+false
+
+
+-- !query 1065
+SELECT cast(1 as int) <> cast(1 as decimal(3, 0)) FROM t
+-- !query 1065 schema
+struct<(NOT (CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1065 output
+false
+
+
+-- !query 1066
+SELECT cast(1 as int) <> cast(1 as decimal(5, 0)) FROM t
+-- !query 1066 schema
+struct<(NOT (CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1066 output
+false
+
+
+-- !query 1067
+SELECT cast(1 as int) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1067 schema
+struct<(NOT (CAST(CAST(1 AS INT) AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0)))):boolean>
+-- !query 1067 output
+false
+
+
+-- !query 1068
+SELECT cast(1 as int) <> cast(1 as decimal(20, 0)) FROM t
+-- !query 1068 schema
+struct<(NOT (CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1068 output
+false
+
+
+-- !query 1069
+SELECT cast(1 as bigint) <> cast(1 as decimal(3, 0)) FROM t
+-- !query 1069 schema
+struct<(NOT (CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1069 output
+false
+
+
+-- !query 1070
+SELECT cast(1 as bigint) <> cast(1 as decimal(5, 0)) FROM t
+-- !query 1070 schema
+struct<(NOT (CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1070 output
+false
+
+
+-- !query 1071
+SELECT cast(1 as bigint) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1071 schema
+struct<(NOT (CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1071 output
+false
+
+
+-- !query 1072
+SELECT cast(1 as bigint) <> cast(1 as decimal(20, 0)) FROM t
+-- !query 1072 schema
+struct<(NOT (CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) = CAST(1 AS DECIMAL(20,0)))):boolean>
+-- !query 1072 output
+false
+
+
+-- !query 1073
+SELECT cast(1 as float) <> cast(1 as decimal(3, 0)) FROM t
+-- !query 1073 schema
+struct<(NOT (CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE))):boolean>
+-- !query 1073 output
+false
+
+
+-- !query 1074
+SELECT cast(1 as float) <> cast(1 as decimal(5, 0)) FROM t
+-- !query 1074 schema
+struct<(NOT (CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE))):boolean>
+-- !query 1074 output
+false
+
+
+-- !query 1075
+SELECT cast(1 as float) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1075 schema
+struct<(NOT (CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
+-- !query 1075 output
+false
+
+
+-- !query 1076
+SELECT cast(1 as float) <> cast(1 as decimal(20, 0)) FROM t
+-- !query 1076 schema
+struct<(NOT (CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE))):boolean>
+-- !query 1076 output
+false
+
+
+-- !query 1077
+SELECT cast(1 as double) <> cast(1 as decimal(3, 0)) FROM t
+-- !query 1077 schema
+struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE))):boolean>
+-- !query 1077 output
+false
+
+
+-- !query 1078
+SELECT cast(1 as double) <> cast(1 as decimal(5, 0)) FROM t
+-- !query 1078 schema
+struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE))):boolean>
+-- !query 1078 output
+false
+
+
+-- !query 1079
+SELECT cast(1 as double) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1079 schema
+struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
+-- !query 1079 output
+false
+
+
+-- !query 1080
+SELECT cast(1 as double) <> cast(1 as decimal(20, 0)) FROM t
+-- !query 1080 schema
+struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE))):boolean>
+-- !query 1080 output
+false
+
+
+-- !query 1081
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(3, 0)) FROM t
+-- !query 1081 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1081 output
+false
+
+
+-- !query 1082
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(5, 0)) FROM t
+-- !query 1082 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1082 output
+false
+
+
+-- !query 1083
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1083 schema
+struct<(NOT (CAST(1 AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0)))):boolean>
+-- !query 1083 output
+false
+
+
+-- !query 1084
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(20, 0)) FROM t
+-- !query 1084 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1084 output
+false
+
+
+-- !query 1085
+SELECT cast('1' as binary) <> cast(1 as decimal(3, 0)) FROM t
+-- !query 1085 schema
+struct<>
+-- !query 1085 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 1086
+SELECT cast('1' as binary) <> cast(1 as decimal(5, 0)) FROM t
+-- !query 1086 schema
+struct<>
+-- !query 1086 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 1087
+SELECT cast('1' as binary) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1087 schema
+struct<>
+-- !query 1087 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 1088
+SELECT cast('1' as binary) <> cast(1 as decimal(20, 0)) FROM t
+-- !query 1088 schema
+struct<>
+-- !query 1088 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 1089
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(3, 0)) FROM t
+-- !query 1089 schema
+struct<>
+-- !query 1089 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 1090
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(5, 0)) FROM t
+-- !query 1090 schema
+struct<>
+-- !query 1090 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 1091
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1091 schema
+struct<>
+-- !query 1091 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 1092
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(20, 0)) FROM t
+-- !query 1092 schema
+struct<>
+-- !query 1092 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 1093
+SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(3, 0)) FROM t
+-- !query 1093 schema
+struct<>
+-- !query 1093 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+
+
+-- !query 1094
+SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(5, 0)) FROM t
+-- !query 1094 schema
+struct<>
+-- !query 1094 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+
+
+-- !query 1095
+SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1095 schema
+struct<>
+-- !query 1095 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 1096
+SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(20, 0)) FROM t
+-- !query 1096 schema
+struct<>
+-- !query 1096 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+
+
+-- !query 1097
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as tinyint) FROM t
+-- !query 1097 schema
+struct<(NOT (CAST(1 AS DECIMAL(3,0)) = CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)))):boolean>
+-- !query 1097 output
+false
+
+
+-- !query 1098
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as tinyint) FROM t
+-- !query 1098 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)))):boolean>
+-- !query 1098 output
+false
+
+
+-- !query 1099
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as tinyint) FROM t
+-- !query 1099 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1099 output
+false
+
+
+-- !query 1100
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as tinyint) FROM t
+-- !query 1100 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1100 output
+false
+
+
+-- !query 1101
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as smallint) FROM t
+-- !query 1101 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)))):boolean>
+-- !query 1101 output
+false
+
+
+-- !query 1102
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as smallint) FROM t
+-- !query 1102 schema
+struct<(NOT (CAST(1 AS DECIMAL(5,0)) = CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)))):boolean>
+-- !query 1102 output
+false
+
+
+-- !query 1103
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as smallint) FROM t
+-- !query 1103 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1103 output
+false
+
+
+-- !query 1104
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as smallint) FROM t
+-- !query 1104 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1104 output
+false
+
+
+-- !query 1105
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as int) FROM t
+-- !query 1105 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1105 output
+false
+
+
+-- !query 1106
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as int) FROM t
+-- !query 1106 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1106 output
+false
+
+
+-- !query 1107
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as int) FROM t
+-- !query 1107 schema
+struct<(NOT (CAST(1 AS DECIMAL(10,0)) = CAST(CAST(1 AS INT) AS DECIMAL(10,0)))):boolean>
+-- !query 1107 output
+false
+
+
+-- !query 1108
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as int) FROM t
+-- !query 1108 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1108 output
+false
+
+
+-- !query 1109
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as bigint) FROM t
+-- !query 1109 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1109 output
+false
+
+
+-- !query 1110
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as bigint) FROM t
+-- !query 1110 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1110 output
+false
+
+
+-- !query 1111
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as bigint) FROM t
+-- !query 1111 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1111 output
+false
+
+
+-- !query 1112
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as bigint) FROM t
+-- !query 1112 schema
+struct<(NOT (CAST(1 AS DECIMAL(20,0)) = CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)))):boolean>
+-- !query 1112 output
+false
+
+
+-- !query 1113
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as float) FROM t
+-- !query 1113 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
+-- !query 1113 output
+false
+
+
+-- !query 1114
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as float) FROM t
+-- !query 1114 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
+-- !query 1114 output
+false
+
+
+-- !query 1115
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as float) FROM t
+-- !query 1115 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
+-- !query 1115 output
+false
+
+
+-- !query 1116
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as float) FROM t
+-- !query 1116 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
+-- !query 1116 output
+false
+
+
+-- !query 1117
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as double) FROM t
+-- !query 1117 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
+-- !query 1117 output
+false
+
+
+-- !query 1118
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as double) FROM t
+-- !query 1118 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
+-- !query 1118 output
+false
+
+
+-- !query 1119
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as double) FROM t
+-- !query 1119 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
+-- !query 1119 output
+false
+
+
+-- !query 1120
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as double) FROM t
+-- !query 1120 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
+-- !query 1120 output
+false
+
+
+-- !query 1121
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1121 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1121 output
+false
+
+
+-- !query 1122
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1122 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 1122 output
+false
+
+
+-- !query 1123
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1123 schema
+struct<(NOT (CAST(1 AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0)))):boolean>
+-- !query 1123 output
+false
+
+
+-- !query 1124
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as decimal(10, 0)) FROM t
+-- !query 1124 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 1124 output
+false
+
+
+-- !query 1125
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as string) FROM t
+-- !query 1125 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE))):boolean>
+-- !query 1125 output
+false
+
+
+-- !query 1126
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as string) FROM t
+-- !query 1126 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE))):boolean>
+-- !query 1126 output
+false
+
+
+-- !query 1127
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as string) FROM t
+-- !query 1127 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE))):boolean>
+-- !query 1127 output
+false
+
+
+-- !query 1128
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as string) FROM t
+-- !query 1128 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE))):boolean>
+-- !query 1128 output
+false
+
+
+-- !query 1129
+SELECT cast(1 as decimal(3, 0))  <> cast('1' as binary) FROM t
+-- !query 1129 schema
+struct<>
+-- !query 1129 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+
+
+-- !query 1130
+SELECT cast(1 as decimal(5, 0))  <> cast('1' as binary) FROM t
+-- !query 1130 schema
+struct<>
+-- !query 1130 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+
+
+-- !query 1131
+SELECT cast(1 as decimal(10, 0)) <> cast('1' as binary) FROM t
+-- !query 1131 schema
+struct<>
+-- !query 1131 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 1132
+SELECT cast(1 as decimal(20, 0)) <> cast('1' as binary) FROM t
+-- !query 1132 schema
+struct<>
+-- !query 1132 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+
+
+-- !query 1133
+SELECT cast(1 as decimal(3, 0))  <> cast(1 as boolean) FROM t
+-- !query 1133 schema
+struct<(NOT (CAST(1 AS DECIMAL(3,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(3,0)))):boolean>
+-- !query 1133 output
+false
+
+
+-- !query 1134
+SELECT cast(1 as decimal(5, 0))  <> cast(1 as boolean) FROM t
+-- !query 1134 schema
+struct<(NOT (CAST(1 AS DECIMAL(5,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(5,0)))):boolean>
+-- !query 1134 output
+false
+
+
+-- !query 1135
+SELECT cast(1 as decimal(10, 0)) <> cast(1 as boolean) FROM t
+-- !query 1135 schema
+struct<(NOT (CAST(1 AS DECIMAL(10,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(10,0)))):boolean>
+-- !query 1135 output
+false
+
+
+-- !query 1136
+SELECT cast(1 as decimal(20, 0)) <> cast(1 as boolean) FROM t
+-- !query 1136 schema
+struct<(NOT (CAST(1 AS DECIMAL(20,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(20,0)))):boolean>
+-- !query 1136 output
+false
+
+
+-- !query 1137
+SELECT cast(1 as decimal(3, 0))  <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 1137 schema
+struct<>
+-- !query 1137 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+
+
+-- !query 1138
+SELECT cast(1 as decimal(5, 0))  <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 1138 schema
+struct<>
+-- !query 1138 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+
+
+-- !query 1139
+SELECT cast(1 as decimal(10, 0)) <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 1139 schema
+struct<>
+-- !query 1139 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 1140
+SELECT cast(1 as decimal(20, 0)) <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 1140 schema
+struct<>
+-- !query 1140 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+
+
+-- !query 1141
+SELECT cast(1 as decimal(3, 0))  <> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 1141 schema
+struct<>
+-- !query 1141 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+
+
+-- !query 1142
+SELECT cast(1 as decimal(5, 0))  <> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 1142 schema
+struct<>
+-- !query 1142 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+
+
+-- !query 1143
+SELECT cast(1 as decimal(10, 0)) <> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 1143 schema
+struct<>
+-- !query 1143 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 1144
+SELECT cast(1 as decimal(20, 0)) <> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 1144 schema
+struct<>
+-- !query 1144 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out
new file mode 100644
index 0000000000000..017e0fea30e90
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out
@@ -0,0 +1,1242 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 145
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT cast(1 as tinyint) / cast(1 as tinyint) FROM t
+-- !query 1 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 1 output
+1.0
+
+
+-- !query 2
+SELECT cast(1 as tinyint) / cast(1 as smallint) FROM t
+-- !query 2 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 2 output
+1.0
+
+
+-- !query 3
+SELECT cast(1 as tinyint) / cast(1 as int) FROM t
+-- !query 3 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 3 output
+1.0
+
+
+-- !query 4
+SELECT cast(1 as tinyint) / cast(1 as bigint) FROM t
+-- !query 4 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 4 output
+1.0
+
+
+-- !query 5
+SELECT cast(1 as tinyint) / cast(1 as float) FROM t
+-- !query 5 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 5 output
+1.0
+
+
+-- !query 6
+SELECT cast(1 as tinyint) / cast(1 as double) FROM t
+-- !query 6 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
+-- !query 6 output
+1.0
+
+
+-- !query 7
+SELECT cast(1 as tinyint) / cast(1 as decimal(10, 0)) FROM t
+-- !query 7 schema
+struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,11)>
+-- !query 7 output
+1
+
+
+-- !query 8
+SELECT cast(1 as tinyint) / cast(1 as string) FROM t
+-- !query 8 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(CAST(1 AS STRING) AS DOUBLE) AS DOUBLE)):double>
+-- !query 8 output
+1.0
+
+
+-- !query 9
+SELECT cast(1 as tinyint) / cast('1' as binary) FROM t
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST('1' AS BINARY))' (tinyint and binary).; line 1 pos 7
+
+
+-- !query 10
+SELECT cast(1 as tinyint) / cast(1 as boolean) FROM t
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST(1 AS BOOLEAN))' (tinyint and boolean).; line 1 pos 7
+
+
+-- !query 11
+SELECT cast(1 as tinyint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (tinyint and timestamp).; line 1 pos 7
+
+
+-- !query 12
+SELECT cast(1 as tinyint) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00' AS DATE))' (tinyint and date).; line 1 pos 7
+
+
+-- !query 13
+SELECT cast(1 as smallint) / cast(1 as tinyint) FROM t
+-- !query 13 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 13 output
+1.0
+
+
+-- !query 14
+SELECT cast(1 as smallint) / cast(1 as smallint) FROM t
+-- !query 14 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 14 output
+1.0
+
+
+-- !query 15
+SELECT cast(1 as smallint) / cast(1 as int) FROM t
+-- !query 15 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 15 output
+1.0
+
+
+-- !query 16
+SELECT cast(1 as smallint) / cast(1 as bigint) FROM t
+-- !query 16 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 16 output
+1.0
+
+
+-- !query 17
+SELECT cast(1 as smallint) / cast(1 as float) FROM t
+-- !query 17 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 17 output
+1.0
+
+
+-- !query 18
+SELECT cast(1 as smallint) / cast(1 as double) FROM t
+-- !query 18 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
+-- !query 18 output
+1.0
+
+
+-- !query 19
+SELECT cast(1 as smallint) / cast(1 as decimal(10, 0)) FROM t
+-- !query 19 schema
+struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,11)>
+-- !query 19 output
+1
+
+
+-- !query 20
+SELECT cast(1 as smallint) / cast(1 as string) FROM t
+-- !query 20 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(CAST(1 AS STRING) AS DOUBLE) AS DOUBLE)):double>
+-- !query 20 output
+1.0
+
+
+-- !query 21
+SELECT cast(1 as smallint) / cast('1' as binary) FROM t
+-- !query 21 schema
+struct<>
+-- !query 21 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST('1' AS BINARY))' (smallint and binary).; line 1 pos 7
+
+
+-- !query 22
+SELECT cast(1 as smallint) / cast(1 as boolean) FROM t
+-- !query 22 schema
+struct<>
+-- !query 22 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST(1 AS BOOLEAN))' (smallint and boolean).; line 1 pos 7
+
+
+-- !query 23
+SELECT cast(1 as smallint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (smallint and timestamp).; line 1 pos 7
+
+
+-- !query 24
+SELECT cast(1 as smallint) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00' AS DATE))' (smallint and date).; line 1 pos 7
+
+
+-- !query 25
+SELECT cast(1 as int) / cast(1 as tinyint) FROM t
+-- !query 25 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 25 output
+1.0
+
+
+-- !query 26
+SELECT cast(1 as int) / cast(1 as smallint) FROM t
+-- !query 26 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 26 output
+1.0
+
+
+-- !query 27
+SELECT cast(1 as int) / cast(1 as int) FROM t
+-- !query 27 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 27 output
+1.0
+
+
+-- !query 28
+SELECT cast(1 as int) / cast(1 as bigint) FROM t
+-- !query 28 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 28 output
+1.0
+
+
+-- !query 29
+SELECT cast(1 as int) / cast(1 as float) FROM t
+-- !query 29 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 29 output
+1.0
+
+
+-- !query 30
+SELECT cast(1 as int) / cast(1 as double) FROM t
+-- !query 30 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
+-- !query 30 output
+1.0
+
+
+-- !query 31
+SELECT cast(1 as int) / cast(1 as decimal(10, 0)) FROM t
+-- !query 31 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) / CAST(1 AS DECIMAL(10,0))):decimal(21,11)>
+-- !query 31 output
+1
+
+
+-- !query 32
+SELECT cast(1 as int) / cast(1 as string) FROM t
+-- !query 32 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(CAST(1 AS STRING) AS DOUBLE) AS DOUBLE)):double>
+-- !query 32 output
+1.0
+
+
+-- !query 33
+SELECT cast(1 as int) / cast('1' as binary) FROM t
+-- !query 33 schema
+struct<>
+-- !query 33 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST('1' AS BINARY))' (int and binary).; line 1 pos 7
+
+
+-- !query 34
+SELECT cast(1 as int) / cast(1 as boolean) FROM t
+-- !query 34 schema
+struct<>
+-- !query 34 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST(1 AS BOOLEAN))' (int and boolean).; line 1 pos 7
+
+
+-- !query 35
+SELECT cast(1 as int) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 35 schema
+struct<>
+-- !query 35 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (int and timestamp).; line 1 pos 7
+
+
+-- !query 36
+SELECT cast(1 as int) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 36 schema
+struct<>
+-- !query 36 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00' AS DATE))' (int and date).; line 1 pos 7
+
+
+-- !query 37
+SELECT cast(1 as bigint) / cast(1 as tinyint) FROM t
+-- !query 37 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 37 output
+1.0
+
+
+-- !query 38
+SELECT cast(1 as bigint) / cast(1 as smallint) FROM t
+-- !query 38 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 38 output
+1.0
+
+
+-- !query 39
+SELECT cast(1 as bigint) / cast(1 as int) FROM t
+-- !query 39 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 39 output
+1.0
+
+
+-- !query 40
+SELECT cast(1 as bigint) / cast(1 as bigint) FROM t
+-- !query 40 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 40 output
+1.0
+
+
+-- !query 41
+SELECT cast(1 as bigint) / cast(1 as float) FROM t
+-- !query 41 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 41 output
+1.0
+
+
+-- !query 42
+SELECT cast(1 as bigint) / cast(1 as double) FROM t
+-- !query 42 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
+-- !query 42 output
+1.0
+
+
+-- !query 43
+SELECT cast(1 as bigint) / cast(1 as decimal(10, 0)) FROM t
+-- !query 43 schema
+struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,11)>
+-- !query 43 output
+1
+
+
+-- !query 44
+SELECT cast(1 as bigint) / cast(1 as string) FROM t
+-- !query 44 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(CAST(1 AS STRING) AS DOUBLE) AS DOUBLE)):double>
+-- !query 44 output
+1.0
+
+
+-- !query 45
+SELECT cast(1 as bigint) / cast('1' as binary) FROM t
+-- !query 45 schema
+struct<>
+-- !query 45 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST('1' AS BINARY))' (bigint and binary).; line 1 pos 7
+
+
+-- !query 46
+SELECT cast(1 as bigint) / cast(1 as boolean) FROM t
+-- !query 46 schema
+struct<>
+-- !query 46 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST(1 AS BOOLEAN))' (bigint and boolean).; line 1 pos 7
+
+
+-- !query 47
+SELECT cast(1 as bigint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 47 schema
+struct<>
+-- !query 47 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (bigint and timestamp).; line 1 pos 7
+
+
+-- !query 48
+SELECT cast(1 as bigint) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 48 schema
+struct<>
+-- !query 48 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00' AS DATE))' (bigint and date).; line 1 pos 7
+
+
+-- !query 49
+SELECT cast(1 as float) / cast(1 as tinyint) FROM t
+-- !query 49 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 49 output
+1.0
+
+
+-- !query 50
+SELECT cast(1 as float) / cast(1 as smallint) FROM t
+-- !query 50 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 50 output
+1.0
+
+
+-- !query 51
+SELECT cast(1 as float) / cast(1 as int) FROM t
+-- !query 51 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 51 output
+1.0
+
+
+-- !query 52
+SELECT cast(1 as float) / cast(1 as bigint) FROM t
+-- !query 52 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 52 output
+1.0
+
+
+-- !query 53
+SELECT cast(1 as float) / cast(1 as float) FROM t
+-- !query 53 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 53 output
+1.0
+
+
+-- !query 54
+SELECT cast(1 as float) / cast(1 as double) FROM t
+-- !query 54 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
+-- !query 54 output
+1.0
+
+
+-- !query 55
+SELECT cast(1 as float) / cast(1 as decimal(10, 0)) FROM t
+-- !query 55 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) AS DOUBLE)):double>
+-- !query 55 output
+1.0
+
+
+-- !query 56
+SELECT cast(1 as float) / cast(1 as string) FROM t
+-- !query 56 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS STRING) AS DOUBLE) AS DOUBLE)):double>
+-- !query 56 output
+1.0
+
+
+-- !query 57
+SELECT cast(1 as float) / cast('1' as binary) FROM t
+-- !query 57 schema
+struct<>
+-- !query 57 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST('1' AS BINARY))' (float and binary).; line 1 pos 7
+
+
+-- !query 58
+SELECT cast(1 as float) / cast(1 as boolean) FROM t
+-- !query 58 schema
+struct<>
+-- !query 58 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST(1 AS BOOLEAN))' (float and boolean).; line 1 pos 7
+
+
+-- !query 59
+SELECT cast(1 as float) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 59 schema
+struct<>
+-- !query 59 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (float and timestamp).; line 1 pos 7
+
+
+-- !query 60
+SELECT cast(1 as float) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 60 schema
+struct<>
+-- !query 60 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00' AS DATE))' (float and date).; line 1 pos 7
+
+
+-- !query 61
+SELECT cast(1 as double) / cast(1 as tinyint) FROM t
+-- !query 61 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 61 output
+1.0
+
+
+-- !query 62
+SELECT cast(1 as double) / cast(1 as smallint) FROM t
+-- !query 62 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 62 output
+1.0
+
+
+-- !query 63
+SELECT cast(1 as double) / cast(1 as int) FROM t
+-- !query 63 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 63 output
+1.0
+
+
+-- !query 64
+SELECT cast(1 as double) / cast(1 as bigint) FROM t
+-- !query 64 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 64 output
+1.0
+
+
+-- !query 65
+SELECT cast(1 as double) / cast(1 as float) FROM t
+-- !query 65 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 65 output
+1.0
+
+
+-- !query 66
+SELECT cast(1 as double) / cast(1 as double) FROM t
+-- !query 66 schema
+struct<(CAST(1 AS DOUBLE) / CAST(1 AS DOUBLE)):double>
+-- !query 66 output
+1.0
+
+
+-- !query 67
+SELECT cast(1 as double) / cast(1 as decimal(10, 0)) FROM t
+-- !query 67 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 67 output
+1.0
+
+
+-- !query 68
+SELECT cast(1 as double) / cast(1 as string) FROM t
+-- !query 68 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 68 output
+1.0
+
+
+-- !query 69
+SELECT cast(1 as double) / cast('1' as binary) FROM t
+-- !query 69 schema
+struct<>
+-- !query 69 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+
+
+-- !query 70
+SELECT cast(1 as double) / cast(1 as boolean) FROM t
+-- !query 70 schema
+struct<>
+-- !query 70 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+
+
+-- !query 71
+SELECT cast(1 as double) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 71 schema
+struct<>
+-- !query 71 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+
+
+-- !query 72
+SELECT cast(1 as double) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 72 schema
+struct<>
+-- !query 72 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+
+
+-- !query 73
+SELECT cast(1 as decimal(10, 0)) / cast(1 as tinyint) FROM t
+-- !query 73 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(16,6)>
+-- !query 73 output
+1
+
+
+-- !query 74
+SELECT cast(1 as decimal(10, 0)) / cast(1 as smallint) FROM t
+-- !query 74 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,6)>
+-- !query 74 output
+1
+
+
+-- !query 75
+SELECT cast(1 as decimal(10, 0)) / cast(1 as int) FROM t
+-- !query 75 schema
+struct<(CAST(1 AS DECIMAL(10,0)) / CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(21,11)>
+-- !query 75 output
+1
+
+
+-- !query 76
+SELECT cast(1 as decimal(10, 0)) / cast(1 as bigint) FROM t
+-- !query 76 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,21)>
+-- !query 76 output
+1
+
+
+-- !query 77
+SELECT cast(1 as decimal(10, 0)) / cast(1 as float) FROM t
+-- !query 77 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 77 output
+1.0
+
+
+-- !query 78
+SELECT cast(1 as decimal(10, 0)) / cast(1 as double) FROM t
+-- !query 78 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
+-- !query 78 output
+1.0
+
+
+-- !query 79
+SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(10, 0)) FROM t
+-- !query 79 schema
+struct<(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS DECIMAL(10,0))):decimal(21,11)>
+-- !query 79 output
+1
+
+
+-- !query 80
+SELECT cast(1 as decimal(10, 0)) / cast(1 as string) FROM t
+-- !query 80 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 80 output
+1.0
+
+
+-- !query 81
+SELECT cast(1 as decimal(10, 0)) / cast('1' as binary) FROM t
+-- !query 81 schema
+struct<>
+-- !query 81 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 82
+SELECT cast(1 as decimal(10, 0)) / cast(1 as boolean) FROM t
+-- !query 82 schema
+struct<>
+-- !query 82 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 83
+SELECT cast(1 as decimal(10, 0)) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 83 schema
+struct<>
+-- !query 83 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 84
+SELECT cast(1 as decimal(10, 0)) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 84 schema
+struct<>
+-- !query 84 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 85
+SELECT cast(1 as string) / cast(1 as tinyint) FROM t
+-- !query 85 schema
+struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 85 output
+1.0
+
+
+-- !query 86
+SELECT cast(1 as string) / cast(1 as smallint) FROM t
+-- !query 86 schema
+struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 86 output
+1.0
+
+
+-- !query 87
+SELECT cast(1 as string) / cast(1 as int) FROM t
+-- !query 87 schema
+struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 87 output
+1.0
+
+
+-- !query 88
+SELECT cast(1 as string) / cast(1 as bigint) FROM t
+-- !query 88 schema
+struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 88 output
+1.0
+
+
+-- !query 89
+SELECT cast(1 as string) / cast(1 as float) FROM t
+-- !query 89 schema
+struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 89 output
+1.0
+
+
+-- !query 90
+SELECT cast(1 as string) / cast(1 as double) FROM t
+-- !query 90 schema
+struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
+-- !query 90 output
+1.0
+
+
+-- !query 91
+SELECT cast(1 as string) / cast(1 as decimal(10, 0)) FROM t
+-- !query 91 schema
+struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 91 output
+1.0
+
+
+-- !query 92
+SELECT cast(1 as string) / cast(1 as string) FROM t
+-- !query 92 schema
+struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
+-- !query 92 output
+1.0
+
+
+-- !query 93
+SELECT cast(1 as string) / cast('1' as binary) FROM t
+-- !query 93 schema
+struct<>
+-- !query 93 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+
+
+-- !query 94
+SELECT cast(1 as string) / cast(1 as boolean) FROM t
+-- !query 94 schema
+struct<>
+-- !query 94 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+
+
+-- !query 95
+SELECT cast(1 as string) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 95 schema
+struct<>
+-- !query 95 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+
+
+-- !query 96
+SELECT cast(1 as string) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 96 schema
+struct<>
+-- !query 96 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+
+
+-- !query 97
+SELECT cast('1' as binary) / cast(1 as tinyint) FROM t
+-- !query 97 schema
+struct<>
+-- !query 97 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS TINYINT))' (binary and tinyint).; line 1 pos 7
+
+
+-- !query 98
+SELECT cast('1' as binary) / cast(1 as smallint) FROM t
+-- !query 98 schema
+struct<>
+-- !query 98 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS SMALLINT))' (binary and smallint).; line 1 pos 7
+
+
+-- !query 99
+SELECT cast('1' as binary) / cast(1 as int) FROM t
+-- !query 99 schema
+struct<>
+-- !query 99 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS INT))' (binary and int).; line 1 pos 7
+
+
+-- !query 100
+SELECT cast('1' as binary) / cast(1 as bigint) FROM t
+-- !query 100 schema
+struct<>
+-- !query 100 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS BIGINT))' (binary and bigint).; line 1 pos 7
+
+
+-- !query 101
+SELECT cast('1' as binary) / cast(1 as float) FROM t
+-- !query 101 schema
+struct<>
+-- !query 101 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS FLOAT))' (binary and float).; line 1 pos 7
+
+
+-- !query 102
+SELECT cast('1' as binary) / cast(1 as double) FROM t
+-- !query 102 schema
+struct<>
+-- !query 102 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DOUBLE))' (binary and double).; line 1 pos 7
+
+
+-- !query 103
+SELECT cast('1' as binary) / cast(1 as decimal(10, 0)) FROM t
+-- !query 103 schema
+struct<>
+-- !query 103 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 104
+SELECT cast('1' as binary) / cast(1 as string) FROM t
+-- !query 104 schema
+struct<>
+-- !query 104 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(CAST(1 AS STRING) AS DOUBLE))' (binary and double).; line 1 pos 7
+
+
+-- !query 105
+SELECT cast('1' as binary) / cast('1' as binary) FROM t
+-- !query 105 schema
+struct<>
+-- !query 105 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST('1' AS BINARY))' due to data type mismatch: '(CAST('1' AS BINARY) / CAST('1' AS BINARY))' requires (double or decimal) type, not binary; line 1 pos 7
+
+
+-- !query 106
+SELECT cast('1' as binary) / cast(1 as boolean) FROM t
+-- !query 106 schema
+struct<>
+-- !query 106 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS BOOLEAN))' (binary and boolean).; line 1 pos 7
+
+
+-- !query 107
+SELECT cast('1' as binary) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 107 schema
+struct<>
+-- !query 107 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (binary and timestamp).; line 1 pos 7
+
+
+-- !query 108
+SELECT cast('1' as binary) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 108 schema
+struct<>
+-- !query 108 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00' AS DATE))' (binary and date).; line 1 pos 7
+
+
+-- !query 109
+SELECT cast(1 as boolean) / cast(1 as tinyint) FROM t
+-- !query 109 schema
+struct<>
+-- !query 109 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS TINYINT))' (boolean and tinyint).; line 1 pos 7
+
+
+-- !query 110
+SELECT cast(1 as boolean) / cast(1 as smallint) FROM t
+-- !query 110 schema
+struct<>
+-- !query 110 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS SMALLINT))' (boolean and smallint).; line 1 pos 7
+
+
+-- !query 111
+SELECT cast(1 as boolean) / cast(1 as int) FROM t
+-- !query 111 schema
+struct<>
+-- !query 111 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS INT))' (boolean and int).; line 1 pos 7
+
+
+-- !query 112
+SELECT cast(1 as boolean) / cast(1 as bigint) FROM t
+-- !query 112 schema
+struct<>
+-- !query 112 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS BIGINT))' (boolean and bigint).; line 1 pos 7
+
+
+-- !query 113
+SELECT cast(1 as boolean) / cast(1 as float) FROM t
+-- !query 113 schema
+struct<>
+-- !query 113 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS FLOAT))' (boolean and float).; line 1 pos 7
+
+
+-- !query 114
+SELECT cast(1 as boolean) / cast(1 as double) FROM t
+-- !query 114 schema
+struct<>
+-- !query 114 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS DOUBLE))' (boolean and double).; line 1 pos 7
+
+
+-- !query 115
+SELECT cast(1 as boolean) / cast(1 as decimal(10, 0)) FROM t
+-- !query 115 schema
+struct<>
+-- !query 115 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS DECIMAL(10,0)))' (boolean and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 116
+SELECT cast(1 as boolean) / cast(1 as string) FROM t
+-- !query 116 schema
+struct<>
+-- !query 116 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(CAST(1 AS STRING) AS DOUBLE))' (boolean and double).; line 1 pos 7
+
+
+-- !query 117
+SELECT cast(1 as boolean) / cast('1' as binary) FROM t
+-- !query 117 schema
+struct<>
+-- !query 117 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('1' AS BINARY))' (boolean and binary).; line 1 pos 7
+
+
+-- !query 118
+SELECT cast(1 as boolean) / cast(1 as boolean) FROM t
+-- !query 118 schema
+struct<>
+-- !query 118 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS BOOLEAN))' due to data type mismatch: '(CAST(1 AS BOOLEAN) / CAST(1 AS BOOLEAN))' requires (double or decimal) type, not boolean; line 1 pos 7
+
+
+-- !query 119
+SELECT cast(1 as boolean) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 119 schema
+struct<>
+-- !query 119 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
+
+
+-- !query 120
+SELECT cast(1 as boolean) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 120 schema
+struct<>
+-- !query 120 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
+
+
+-- !query 121
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as tinyint) FROM t
+-- !query 121 schema
+struct<>
+-- !query 121 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS TINYINT))' (timestamp and tinyint).; line 1 pos 7
+
+
+-- !query 122
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as smallint) FROM t
+-- !query 122 schema
+struct<>
+-- !query 122 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS SMALLINT))' (timestamp and smallint).; line 1 pos 7
+
+
+-- !query 123
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as int) FROM t
+-- !query 123 schema
+struct<>
+-- !query 123 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS INT))' (timestamp and int).; line 1 pos 7
+
+
+-- !query 124
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as bigint) FROM t
+-- !query 124 schema
+struct<>
+-- !query 124 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BIGINT))' (timestamp and bigint).; line 1 pos 7
+
+
+-- !query 125
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as float) FROM t
+-- !query 125 schema
+struct<>
+-- !query 125 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS FLOAT))' (timestamp and float).; line 1 pos 7
+
+
+-- !query 126
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as double) FROM t
+-- !query 126 schema
+struct<>
+-- !query 126 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DOUBLE))' (timestamp and double).; line 1 pos 7
+
+
+-- !query 127
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as decimal(10, 0)) FROM t
+-- !query 127 schema
+struct<>
+-- !query 127 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 128
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as string) FROM t
+-- !query 128 schema
+struct<>
+-- !query 128 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(CAST(1 AS STRING) AS DOUBLE))' (timestamp and double).; line 1 pos 7
+
+
+-- !query 129
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('1' as binary) FROM t
+-- !query 129 schema
+struct<>
+-- !query 129 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS BINARY))' (timestamp and binary).; line 1 pos 7
+
+
+-- !query 130
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as boolean) FROM t
+-- !query 130 schema
+struct<>
+-- !query 130 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BOOLEAN))' (timestamp and boolean).; line 1 pos 7
+
+
+-- !query 131
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 131 schema
+struct<>
+-- !query 131 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' requires (double or decimal) type, not timestamp; line 1 pos 7
+
+
+-- !query 132
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 132 schema
+struct<>
+-- !query 132 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00' AS DATE))' (timestamp and date).; line 1 pos 7
+
+
+-- !query 133
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as tinyint) FROM t
+-- !query 133 schema
+struct<>
+-- !query 133 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS TINYINT))' (date and tinyint).; line 1 pos 7
+
+
+-- !query 134
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as smallint) FROM t
+-- !query 134 schema
+struct<>
+-- !query 134 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS SMALLINT))' (date and smallint).; line 1 pos 7
+
+
+-- !query 135
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as int) FROM t
+-- !query 135 schema
+struct<>
+-- !query 135 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS INT))' (date and int).; line 1 pos 7
+
+
+-- !query 136
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as bigint) FROM t
+-- !query 136 schema
+struct<>
+-- !query 136 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BIGINT))' (date and bigint).; line 1 pos 7
+
+
+-- !query 137
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as float) FROM t
+-- !query 137 schema
+struct<>
+-- !query 137 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS FLOAT))' (date and float).; line 1 pos 7
+
+
+-- !query 138
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as double) FROM t
+-- !query 138 schema
+struct<>
+-- !query 138 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DOUBLE))' (date and double).; line 1 pos 7
+
+
+-- !query 139
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as decimal(10, 0)) FROM t
+-- !query 139 schema
+struct<>
+-- !query 139 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 140
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as string) FROM t
+-- !query 140 schema
+struct<>
+-- !query 140 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(CAST(1 AS STRING) AS DOUBLE))' (date and double).; line 1 pos 7
+
+
+-- !query 141
+SELECT cast('2017-12-11 09:30:00' as date) / cast('1' as binary) FROM t
+-- !query 141 schema
+struct<>
+-- !query 141 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS BINARY))' (date and binary).; line 1 pos 7
+
+
+-- !query 142
+SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as boolean) FROM t
+-- !query 142 schema
+struct<>
+-- !query 142 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BOOLEAN))' (date and boolean).; line 1 pos 7
+
+
+-- !query 143
+SELECT cast('2017-12-11 09:30:00' as date) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 143 schema
+struct<>
+-- !query 143 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (date and timestamp).; line 1 pos 7
+
+
+-- !query 144
+SELECT cast('2017-12-11 09:30:00' as date) / cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 144 schema
+struct<>
+-- !query 144 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00' AS DATE))' requires (double or decimal) type, not date; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/elt.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/elt.sql.out
new file mode 100644
index 0000000000000..b62e1b6826045
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/elt.sql.out
@@ -0,0 +1,115 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 6
+
+
+-- !query 0
+SELECT elt(2, col1, col2, col3, col4, col5) col
+FROM (
+  SELECT
+    'prefix_' col1,
+    id col2,
+    string(id + 1) col3,
+    encode(string(id + 2), 'utf-8') col4,
+    CAST(id AS DOUBLE) col5
+  FROM range(10)
+)
+-- !query 0 schema
+struct<col:string>
+-- !query 0 output
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query 1
+SELECT elt(3, col1, col2, col3, col4) col
+FROM (
+  SELECT
+    string(id) col1,
+    string(id + 1) col2,
+    encode(string(id + 2), 'utf-8') col3,
+    encode(string(id + 3), 'utf-8') col4
+  FROM range(10)
+)
+-- !query 1 schema
+struct<col:string>
+-- !query 1 output
+10
+11
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query 2
+set spark.sql.function.eltOutputAsString=true
+-- !query 2 schema
+struct<key:string,value:string>
+-- !query 2 output
+spark.sql.function.eltOutputAsString	true
+
+
+-- !query 3
+SELECT elt(1, col1, col2) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2
+  FROM range(10)
+)
+-- !query 3 schema
+struct<col:string>
+-- !query 3 output
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query 4
+set spark.sql.function.eltOutputAsString=false
+-- !query 4 schema
+struct<key:string,value:string>
+-- !query 4 output
+spark.sql.function.eltOutputAsString	false
+
+
+-- !query 5
+SELECT elt(2, col1, col2) col
+FROM (
+  SELECT
+    encode(string(id), 'utf-8') col1,
+    encode(string(id + 1), 'utf-8') col2
+  FROM range(10)
+)
+-- !query 5 schema
+struct<col:binary>
+-- !query 5 output
+1
+10
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out
new file mode 100644
index 0000000000000..7097027872707
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out
@@ -0,0 +1,1232 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 145
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT IF(true, cast(1 as tinyint), cast(2 as tinyint)) FROM t
+-- !query 1 schema
+struct<(IF(true, CAST(1 AS TINYINT), CAST(2 AS TINYINT))):tinyint>
+-- !query 1 output
+1
+
+
+-- !query 2
+SELECT IF(true, cast(1 as tinyint), cast(2 as smallint)) FROM t
+-- !query 2 schema
+struct<(IF(true, CAST(CAST(1 AS TINYINT) AS SMALLINT), CAST(2 AS SMALLINT))):smallint>
+-- !query 2 output
+1
+
+
+-- !query 3
+SELECT IF(true, cast(1 as tinyint), cast(2 as int)) FROM t
+-- !query 3 schema
+struct<(IF(true, CAST(CAST(1 AS TINYINT) AS INT), CAST(2 AS INT))):int>
+-- !query 3 output
+1
+
+
+-- !query 4
+SELECT IF(true, cast(1 as tinyint), cast(2 as bigint)) FROM t
+-- !query 4 schema
+struct<(IF(true, CAST(CAST(1 AS TINYINT) AS BIGINT), CAST(2 AS BIGINT))):bigint>
+-- !query 4 output
+1
+
+
+-- !query 5
+SELECT IF(true, cast(1 as tinyint), cast(2 as float)) FROM t
+-- !query 5 schema
+struct<(IF(true, CAST(CAST(1 AS TINYINT) AS FLOAT), CAST(2 AS FLOAT))):float>
+-- !query 5 output
+1.0
+
+
+-- !query 6
+SELECT IF(true, cast(1 as tinyint), cast(2 as double)) FROM t
+-- !query 6 schema
+struct<(IF(true, CAST(CAST(1 AS TINYINT) AS DOUBLE), CAST(2 AS DOUBLE))):double>
+-- !query 6 output
+1.0
+
+
+-- !query 7
+SELECT IF(true, cast(1 as tinyint), cast(2 as decimal(10, 0))) FROM t
+-- !query 7 schema
+struct<(IF(true, CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)), CAST(2 AS DECIMAL(10,0)))):decimal(10,0)>
+-- !query 7 output
+1
+
+
+-- !query 8
+SELECT IF(true, cast(1 as tinyint), cast(2 as string)) FROM t
+-- !query 8 schema
+struct<(IF(true, CAST(CAST(1 AS TINYINT) AS STRING), CAST(2 AS STRING))):string>
+-- !query 8 output
+1
+
+
+-- !query 9
+SELECT IF(true, cast(1 as tinyint), cast('2' as binary)) FROM t
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST('2' AS BINARY)))' (tinyint and binary).; line 1 pos 7
+
+
+-- !query 10
+SELECT IF(true, cast(1 as tinyint), cast(2 as boolean)) FROM t
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST(2 AS BOOLEAN)))' (tinyint and boolean).; line 1 pos 7
+
+
+-- !query 11
+SELECT IF(true, cast(1 as tinyint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (tinyint and timestamp).; line 1 pos 7
+
+
+-- !query 12
+SELECT IF(true, cast(1 as tinyint), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00' AS DATE)))' (tinyint and date).; line 1 pos 7
+
+
+-- !query 13
+SELECT IF(true, cast(1 as smallint), cast(2 as tinyint)) FROM t
+-- !query 13 schema
+struct<(IF(true, CAST(1 AS SMALLINT), CAST(CAST(2 AS TINYINT) AS SMALLINT))):smallint>
+-- !query 13 output
+1
+
+
+-- !query 14
+SELECT IF(true, cast(1 as smallint), cast(2 as smallint)) FROM t
+-- !query 14 schema
+struct<(IF(true, CAST(1 AS SMALLINT), CAST(2 AS SMALLINT))):smallint>
+-- !query 14 output
+1
+
+
+-- !query 15
+SELECT IF(true, cast(1 as smallint), cast(2 as int)) FROM t
+-- !query 15 schema
+struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS INT), CAST(2 AS INT))):int>
+-- !query 15 output
+1
+
+
+-- !query 16
+SELECT IF(true, cast(1 as smallint), cast(2 as bigint)) FROM t
+-- !query 16 schema
+struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS BIGINT), CAST(2 AS BIGINT))):bigint>
+-- !query 16 output
+1
+
+
+-- !query 17
+SELECT IF(true, cast(1 as smallint), cast(2 as float)) FROM t
+-- !query 17 schema
+struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS FLOAT), CAST(2 AS FLOAT))):float>
+-- !query 17 output
+1.0
+
+
+-- !query 18
+SELECT IF(true, cast(1 as smallint), cast(2 as double)) FROM t
+-- !query 18 schema
+struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS DOUBLE), CAST(2 AS DOUBLE))):double>
+-- !query 18 output
+1.0
+
+
+-- !query 19
+SELECT IF(true, cast(1 as smallint), cast(2 as decimal(10, 0))) FROM t
+-- !query 19 schema
+struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)), CAST(2 AS DECIMAL(10,0)))):decimal(10,0)>
+-- !query 19 output
+1
+
+
+-- !query 20
+SELECT IF(true, cast(1 as smallint), cast(2 as string)) FROM t
+-- !query 20 schema
+struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS STRING), CAST(2 AS STRING))):string>
+-- !query 20 output
+1
+
+
+-- !query 21
+SELECT IF(true, cast(1 as smallint), cast('2' as binary)) FROM t
+-- !query 21 schema
+struct<>
+-- !query 21 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST('2' AS BINARY)))' (smallint and binary).; line 1 pos 7
+
+
+-- !query 22
+SELECT IF(true, cast(1 as smallint), cast(2 as boolean)) FROM t
+-- !query 22 schema
+struct<>
+-- !query 22 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST(2 AS BOOLEAN)))' (smallint and boolean).; line 1 pos 7
+
+
+-- !query 23
+SELECT IF(true, cast(1 as smallint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (smallint and timestamp).; line 1 pos 7
+
+
+-- !query 24
+SELECT IF(true, cast(1 as smallint), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00' AS DATE)))' (smallint and date).; line 1 pos 7
+
+
+-- !query 25
+SELECT IF(true, cast(1 as int), cast(2 as tinyint)) FROM t
+-- !query 25 schema
+struct<(IF(true, CAST(1 AS INT), CAST(CAST(2 AS TINYINT) AS INT))):int>
+-- !query 25 output
+1
+
+
+-- !query 26
+SELECT IF(true, cast(1 as int), cast(2 as smallint)) FROM t
+-- !query 26 schema
+struct<(IF(true, CAST(1 AS INT), CAST(CAST(2 AS SMALLINT) AS INT))):int>
+-- !query 26 output
+1
+
+
+-- !query 27
+SELECT IF(true, cast(1 as int), cast(2 as int)) FROM t
+-- !query 27 schema
+struct<(IF(true, CAST(1 AS INT), CAST(2 AS INT))):int>
+-- !query 27 output
+1
+
+
+-- !query 28
+SELECT IF(true, cast(1 as int), cast(2 as bigint)) FROM t
+-- !query 28 schema
+struct<(IF(true, CAST(CAST(1 AS INT) AS BIGINT), CAST(2 AS BIGINT))):bigint>
+-- !query 28 output
+1
+
+
+-- !query 29
+SELECT IF(true, cast(1 as int), cast(2 as float)) FROM t
+-- !query 29 schema
+struct<(IF(true, CAST(CAST(1 AS INT) AS FLOAT), CAST(2 AS FLOAT))):float>
+-- !query 29 output
+1.0
+
+
+-- !query 30
+SELECT IF(true, cast(1 as int), cast(2 as double)) FROM t
+-- !query 30 schema
+struct<(IF(true, CAST(CAST(1 AS INT) AS DOUBLE), CAST(2 AS DOUBLE))):double>
+-- !query 30 output
+1.0
+
+
+-- !query 31
+SELECT IF(true, cast(1 as int), cast(2 as decimal(10, 0))) FROM t
+-- !query 31 schema
+struct<(IF(true, CAST(CAST(1 AS INT) AS DECIMAL(10,0)), CAST(2 AS DECIMAL(10,0)))):decimal(10,0)>
+-- !query 31 output
+1
+
+
+-- !query 32
+SELECT IF(true, cast(1 as int), cast(2 as string)) FROM t
+-- !query 32 schema
+struct<(IF(true, CAST(CAST(1 AS INT) AS STRING), CAST(2 AS STRING))):string>
+-- !query 32 output
+1
+
+
+-- !query 33
+SELECT IF(true, cast(1 as int), cast('2' as binary)) FROM t
+-- !query 33 schema
+struct<>
+-- !query 33 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS INT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST('2' AS BINARY)))' (int and binary).; line 1 pos 7
+
+
+-- !query 34
+SELECT IF(true, cast(1 as int), cast(2 as boolean)) FROM t
+-- !query 34 schema
+struct<>
+-- !query 34 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS INT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST(2 AS BOOLEAN)))' (int and boolean).; line 1 pos 7
+
+
+-- !query 35
+SELECT IF(true, cast(1 as int), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 35 schema
+struct<>
+-- !query 35 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (int and timestamp).; line 1 pos 7
+
+
+-- !query 36
+SELECT IF(true, cast(1 as int), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 36 schema
+struct<>
+-- !query 36 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00' AS DATE)))' (int and date).; line 1 pos 7
+
+
+-- !query 37
+SELECT IF(true, cast(1 as bigint), cast(2 as tinyint)) FROM t
+-- !query 37 schema
+struct<(IF(true, CAST(1 AS BIGINT), CAST(CAST(2 AS TINYINT) AS BIGINT))):bigint>
+-- !query 37 output
+1
+
+
+-- !query 38
+SELECT IF(true, cast(1 as bigint), cast(2 as smallint)) FROM t
+-- !query 38 schema
+struct<(IF(true, CAST(1 AS BIGINT), CAST(CAST(2 AS SMALLINT) AS BIGINT))):bigint>
+-- !query 38 output
+1
+
+
+-- !query 39
+SELECT IF(true, cast(1 as bigint), cast(2 as int)) FROM t
+-- !query 39 schema
+struct<(IF(true, CAST(1 AS BIGINT), CAST(CAST(2 AS INT) AS BIGINT))):bigint>
+-- !query 39 output
+1
+
+
+-- !query 40
+SELECT IF(true, cast(1 as bigint), cast(2 as bigint)) FROM t
+-- !query 40 schema
+struct<(IF(true, CAST(1 AS BIGINT), CAST(2 AS BIGINT))):bigint>
+-- !query 40 output
+1
+
+
+-- !query 41
+SELECT IF(true, cast(1 as bigint), cast(2 as float)) FROM t
+-- !query 41 schema
+struct<(IF(true, CAST(CAST(1 AS BIGINT) AS FLOAT), CAST(2 AS FLOAT))):float>
+-- !query 41 output
+1.0
+
+
+-- !query 42
+SELECT IF(true, cast(1 as bigint), cast(2 as double)) FROM t
+-- !query 42 schema
+struct<(IF(true, CAST(CAST(1 AS BIGINT) AS DOUBLE), CAST(2 AS DOUBLE))):double>
+-- !query 42 output
+1.0
+
+
+-- !query 43
+SELECT IF(true, cast(1 as bigint), cast(2 as decimal(10, 0))) FROM t
+-- !query 43 schema
+struct<(IF(true, CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)), CAST(CAST(2 AS DECIMAL(10,0)) AS DECIMAL(20,0)))):decimal(20,0)>
+-- !query 43 output
+1
+
+
+-- !query 44
+SELECT IF(true, cast(1 as bigint), cast(2 as string)) FROM t
+-- !query 44 schema
+struct<(IF(true, CAST(CAST(1 AS BIGINT) AS STRING), CAST(2 AS STRING))):string>
+-- !query 44 output
+1
+
+
+-- !query 45
+SELECT IF(true, cast(1 as bigint), cast('2' as binary)) FROM t
+-- !query 45 schema
+struct<>
+-- !query 45 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST('2' AS BINARY)))' (bigint and binary).; line 1 pos 7
+
+
+-- !query 46
+SELECT IF(true, cast(1 as bigint), cast(2 as boolean)) FROM t
+-- !query 46 schema
+struct<>
+-- !query 46 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST(2 AS BOOLEAN)))' (bigint and boolean).; line 1 pos 7
+
+
+-- !query 47
+SELECT IF(true, cast(1 as bigint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 47 schema
+struct<>
+-- !query 47 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (bigint and timestamp).; line 1 pos 7
+
+
+-- !query 48
+SELECT IF(true, cast(1 as bigint), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 48 schema
+struct<>
+-- !query 48 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00' AS DATE)))' (bigint and date).; line 1 pos 7
+
+
+-- !query 49
+SELECT IF(true, cast(1 as float), cast(2 as tinyint)) FROM t
+-- !query 49 schema
+struct<(IF(true, CAST(1 AS FLOAT), CAST(CAST(2 AS TINYINT) AS FLOAT))):float>
+-- !query 49 output
+1.0
+
+
+-- !query 50
+SELECT IF(true, cast(1 as float), cast(2 as smallint)) FROM t
+-- !query 50 schema
+struct<(IF(true, CAST(1 AS FLOAT), CAST(CAST(2 AS SMALLINT) AS FLOAT))):float>
+-- !query 50 output
+1.0
+
+
+-- !query 51
+SELECT IF(true, cast(1 as float), cast(2 as int)) FROM t
+-- !query 51 schema
+struct<(IF(true, CAST(1 AS FLOAT), CAST(CAST(2 AS INT) AS FLOAT))):float>
+-- !query 51 output
+1.0
+
+
+-- !query 52
+SELECT IF(true, cast(1 as float), cast(2 as bigint)) FROM t
+-- !query 52 schema
+struct<(IF(true, CAST(1 AS FLOAT), CAST(CAST(2 AS BIGINT) AS FLOAT))):float>
+-- !query 52 output
+1.0
+
+
+-- !query 53
+SELECT IF(true, cast(1 as float), cast(2 as float)) FROM t
+-- !query 53 schema
+struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS FLOAT))):float>
+-- !query 53 output
+1.0
+
+
+-- !query 54
+SELECT IF(true, cast(1 as float), cast(2 as double)) FROM t
+-- !query 54 schema
+struct<(IF(true, CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(2 AS DOUBLE))):double>
+-- !query 54 output
+1.0
+
+
+-- !query 55
+SELECT IF(true, cast(1 as float), cast(2 as decimal(10, 0))) FROM t
+-- !query 55 schema
+struct<(IF(true, CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(2 AS DECIMAL(10,0)) AS DOUBLE))):double>
+-- !query 55 output
+1.0
+
+
+-- !query 56
+SELECT IF(true, cast(1 as float), cast(2 as string)) FROM t
+-- !query 56 schema
+struct<(IF(true, CAST(CAST(1 AS FLOAT) AS STRING), CAST(2 AS STRING))):string>
+-- !query 56 output
+1.0
+
+
+-- !query 57
+SELECT IF(true, cast(1 as float), cast('2' as binary)) FROM t
+-- !query 57 schema
+struct<>
+-- !query 57 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST('2' AS BINARY)))' (float and binary).; line 1 pos 7
+
+
+-- !query 58
+SELECT IF(true, cast(1 as float), cast(2 as boolean)) FROM t
+-- !query 58 schema
+struct<>
+-- !query 58 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST(2 AS BOOLEAN)))' (float and boolean).; line 1 pos 7
+
+
+-- !query 59
+SELECT IF(true, cast(1 as float), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 59 schema
+struct<>
+-- !query 59 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (float and timestamp).; line 1 pos 7
+
+
+-- !query 60
+SELECT IF(true, cast(1 as float), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 60 schema
+struct<>
+-- !query 60 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00' AS DATE)))' (float and date).; line 1 pos 7
+
+
+-- !query 61
+SELECT IF(true, cast(1 as double), cast(2 as tinyint)) FROM t
+-- !query 61 schema
+struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS TINYINT) AS DOUBLE))):double>
+-- !query 61 output
+1.0
+
+
+-- !query 62
+SELECT IF(true, cast(1 as double), cast(2 as smallint)) FROM t
+-- !query 62 schema
+struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS SMALLINT) AS DOUBLE))):double>
+-- !query 62 output
+1.0
+
+
+-- !query 63
+SELECT IF(true, cast(1 as double), cast(2 as int)) FROM t
+-- !query 63 schema
+struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS INT) AS DOUBLE))):double>
+-- !query 63 output
+1.0
+
+
+-- !query 64
+SELECT IF(true, cast(1 as double), cast(2 as bigint)) FROM t
+-- !query 64 schema
+struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS BIGINT) AS DOUBLE))):double>
+-- !query 64 output
+1.0
+
+
+-- !query 65
+SELECT IF(true, cast(1 as double), cast(2 as float)) FROM t
+-- !query 65 schema
+struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS FLOAT) AS DOUBLE))):double>
+-- !query 65 output
+1.0
+
+
+-- !query 66
+SELECT IF(true, cast(1 as double), cast(2 as double)) FROM t
+-- !query 66 schema
+struct<(IF(true, CAST(1 AS DOUBLE), CAST(2 AS DOUBLE))):double>
+-- !query 66 output
+1.0
+
+
+-- !query 67
+SELECT IF(true, cast(1 as double), cast(2 as decimal(10, 0))) FROM t
+-- !query 67 schema
+struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS DECIMAL(10,0)) AS DOUBLE))):double>
+-- !query 67 output
+1.0
+
+
+-- !query 68
+SELECT IF(true, cast(1 as double), cast(2 as string)) FROM t
+-- !query 68 schema
+struct<(IF(true, CAST(CAST(1 AS DOUBLE) AS STRING), CAST(2 AS STRING))):string>
+-- !query 68 output
+1.0
+
+
+-- !query 69
+SELECT IF(true, cast(1 as double), cast('2' as binary)) FROM t
+-- !query 69 schema
+struct<>
+-- !query 69 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST('2' AS BINARY)))' (double and binary).; line 1 pos 7
+
+
+-- !query 70
+SELECT IF(true, cast(1 as double), cast(2 as boolean)) FROM t
+-- !query 70 schema
+struct<>
+-- !query 70 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST(2 AS BOOLEAN)))' (double and boolean).; line 1 pos 7
+
+
+-- !query 71
+SELECT IF(true, cast(1 as double), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 71 schema
+struct<>
+-- !query 71 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (double and timestamp).; line 1 pos 7
+
+
+-- !query 72
+SELECT IF(true, cast(1 as double), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 72 schema
+struct<>
+-- !query 72 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE)))' (double and date).; line 1 pos 7
+
+
+-- !query 73
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as tinyint)) FROM t
+-- !query 73 schema
+struct<(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(CAST(2 AS TINYINT) AS DECIMAL(10,0)))):decimal(10,0)>
+-- !query 73 output
+1
+
+
+-- !query 74
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as smallint)) FROM t
+-- !query 74 schema
+struct<(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(CAST(2 AS SMALLINT) AS DECIMAL(10,0)))):decimal(10,0)>
+-- !query 74 output
+1
+
+
+-- !query 75
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as int)) FROM t
+-- !query 75 schema
+struct<(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(CAST(2 AS INT) AS DECIMAL(10,0)))):decimal(10,0)>
+-- !query 75 output
+1
+
+
+-- !query 76
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as bigint)) FROM t
+-- !query 76 schema
+struct<(IF(true, CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)), CAST(CAST(2 AS BIGINT) AS DECIMAL(20,0)))):decimal(20,0)>
+-- !query 76 output
+1
+
+
+-- !query 77
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as float)) FROM t
+-- !query 77 schema
+struct<(IF(true, CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(2 AS FLOAT) AS DOUBLE))):double>
+-- !query 77 output
+1.0
+
+
+-- !query 78
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as double)) FROM t
+-- !query 78 schema
+struct<(IF(true, CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(2 AS DOUBLE))):double>
+-- !query 78 output
+1.0
+
+
+-- !query 79
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as decimal(10, 0))) FROM t
+-- !query 79 schema
+struct<(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS DECIMAL(10,0)))):decimal(10,0)>
+-- !query 79 output
+1
+
+
+-- !query 80
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as string)) FROM t
+-- !query 80 schema
+struct<(IF(true, CAST(CAST(1 AS DECIMAL(10,0)) AS STRING), CAST(2 AS STRING))):string>
+-- !query 80 output
+1
+
+
+-- !query 81
+SELECT IF(true, cast(1 as decimal(10, 0)), cast('2' as binary)) FROM t
+-- !query 81 schema
+struct<>
+-- !query 81 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2' AS BINARY)))' (decimal(10,0) and binary).; line 1 pos 7
+
+
+-- !query 82
+SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as boolean)) FROM t
+-- !query 82 schema
+struct<>
+-- !query 82 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS BOOLEAN)))' (decimal(10,0) and boolean).; line 1 pos 7
+
+
+-- !query 83
+SELECT IF(true, cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 83 schema
+struct<>
+-- !query 83 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (decimal(10,0) and timestamp).; line 1 pos 7
+
+
+-- !query 84
+SELECT IF(true, cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 84 schema
+struct<>
+-- !query 84 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE)))' (decimal(10,0) and date).; line 1 pos 7
+
+
+-- !query 85
+SELECT IF(true, cast(1 as string), cast(2 as tinyint)) FROM t
+-- !query 85 schema
+struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS TINYINT) AS STRING))):string>
+-- !query 85 output
+1
+
+
+-- !query 86
+SELECT IF(true, cast(1 as string), cast(2 as smallint)) FROM t
+-- !query 86 schema
+struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS SMALLINT) AS STRING))):string>
+-- !query 86 output
+1
+
+
+-- !query 87
+SELECT IF(true, cast(1 as string), cast(2 as int)) FROM t
+-- !query 87 schema
+struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS INT) AS STRING))):string>
+-- !query 87 output
+1
+
+
+-- !query 88
+SELECT IF(true, cast(1 as string), cast(2 as bigint)) FROM t
+-- !query 88 schema
+struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS BIGINT) AS STRING))):string>
+-- !query 88 output
+1
+
+
+-- !query 89
+SELECT IF(true, cast(1 as string), cast(2 as float)) FROM t
+-- !query 89 schema
+struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS FLOAT) AS STRING))):string>
+-- !query 89 output
+1
+
+
+-- !query 90
+SELECT IF(true, cast(1 as string), cast(2 as double)) FROM t
+-- !query 90 schema
+struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS DOUBLE) AS STRING))):string>
+-- !query 90 output
+1
+
+
+-- !query 91
+SELECT IF(true, cast(1 as string), cast(2 as decimal(10, 0))) FROM t
+-- !query 91 schema
+struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS DECIMAL(10,0)) AS STRING))):string>
+-- !query 91 output
+1
+
+
+-- !query 92
+SELECT IF(true, cast(1 as string), cast(2 as string)) FROM t
+-- !query 92 schema
+struct<(IF(true, CAST(1 AS STRING), CAST(2 AS STRING))):string>
+-- !query 92 output
+1
+
+
+-- !query 93
+SELECT IF(true, cast(1 as string), cast('2' as binary)) FROM t
+-- !query 93 schema
+struct<>
+-- !query 93 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS STRING), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS STRING), CAST('2' AS BINARY)))' (string and binary).; line 1 pos 7
+
+
+-- !query 94
+SELECT IF(true, cast(1 as string), cast(2 as boolean)) FROM t
+-- !query 94 schema
+struct<>
+-- !query 94 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS STRING), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS STRING), CAST(2 AS BOOLEAN)))' (string and boolean).; line 1 pos 7
+
+
+-- !query 95
+SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 95 schema
+struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING))):string>
+-- !query 95 output
+1
+
+
+-- !query 96
+SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 96 schema
+struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING))):string>
+-- !query 96 output
+1
+
+
+-- !query 97
+SELECT IF(true, cast('1' as binary), cast(2 as tinyint)) FROM t
+-- !query 97 schema
+struct<>
+-- !query 97 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS TINYINT)))' (binary and tinyint).; line 1 pos 7
+
+
+-- !query 98
+SELECT IF(true, cast('1' as binary), cast(2 as smallint)) FROM t
+-- !query 98 schema
+struct<>
+-- !query 98 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS SMALLINT)))' (binary and smallint).; line 1 pos 7
+
+
+-- !query 99
+SELECT IF(true, cast('1' as binary), cast(2 as int)) FROM t
+-- !query 99 schema
+struct<>
+-- !query 99 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS INT)))' (binary and int).; line 1 pos 7
+
+
+-- !query 100
+SELECT IF(true, cast('1' as binary), cast(2 as bigint)) FROM t
+-- !query 100 schema
+struct<>
+-- !query 100 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS BIGINT)))' (binary and bigint).; line 1 pos 7
+
+
+-- !query 101
+SELECT IF(true, cast('1' as binary), cast(2 as float)) FROM t
+-- !query 101 schema
+struct<>
+-- !query 101 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS FLOAT)))' (binary and float).; line 1 pos 7
+
+
+-- !query 102
+SELECT IF(true, cast('1' as binary), cast(2 as double)) FROM t
+-- !query 102 schema
+struct<>
+-- !query 102 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS DOUBLE)))' (binary and double).; line 1 pos 7
+
+
+-- !query 103
+SELECT IF(true, cast('1' as binary), cast(2 as decimal(10, 0))) FROM t
+-- !query 103 schema
+struct<>
+-- !query 103 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS DECIMAL(10,0))))' (binary and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 104
+SELECT IF(true, cast('1' as binary), cast(2 as string)) FROM t
+-- !query 104 schema
+struct<>
+-- !query 104 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS STRING)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS STRING)))' (binary and string).; line 1 pos 7
+
+
+-- !query 105
+SELECT IF(true, cast('1' as binary), cast('2' as binary)) FROM t
+-- !query 105 schema
+struct<(IF(true, CAST(1 AS BINARY), CAST(2 AS BINARY))):binary>
+-- !query 105 output
+1
+
+
+-- !query 106
+SELECT IF(true, cast('1' as binary), cast(2 as boolean)) FROM t
+-- !query 106 schema
+struct<>
+-- !query 106 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS BOOLEAN)))' (binary and boolean).; line 1 pos 7
+
+
+-- !query 107
+SELECT IF(true, cast('1' as binary), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 107 schema
+struct<>
+-- !query 107 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (binary and timestamp).; line 1 pos 7
+
+
+-- !query 108
+SELECT IF(true, cast('1' as binary), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 108 schema
+struct<>
+-- !query 108 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00' AS DATE)))' (binary and date).; line 1 pos 7
+
+
+-- !query 109
+SELECT IF(true, cast(1 as boolean), cast(2 as tinyint)) FROM t
+-- !query 109 schema
+struct<>
+-- !query 109 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS TINYINT)))' (boolean and tinyint).; line 1 pos 7
+
+
+-- !query 110
+SELECT IF(true, cast(1 as boolean), cast(2 as smallint)) FROM t
+-- !query 110 schema
+struct<>
+-- !query 110 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS SMALLINT)))' (boolean and smallint).; line 1 pos 7
+
+
+-- !query 111
+SELECT IF(true, cast(1 as boolean), cast(2 as int)) FROM t
+-- !query 111 schema
+struct<>
+-- !query 111 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS INT)))' (boolean and int).; line 1 pos 7
+
+
+-- !query 112
+SELECT IF(true, cast(1 as boolean), cast(2 as bigint)) FROM t
+-- !query 112 schema
+struct<>
+-- !query 112 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS BIGINT)))' (boolean and bigint).; line 1 pos 7
+
+
+-- !query 113
+SELECT IF(true, cast(1 as boolean), cast(2 as float)) FROM t
+-- !query 113 schema
+struct<>
+-- !query 113 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS FLOAT)))' (boolean and float).; line 1 pos 7
+
+
+-- !query 114
+SELECT IF(true, cast(1 as boolean), cast(2 as double)) FROM t
+-- !query 114 schema
+struct<>
+-- !query 114 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DOUBLE)))' (boolean and double).; line 1 pos 7
+
+
+-- !query 115
+SELECT IF(true, cast(1 as boolean), cast(2 as decimal(10, 0))) FROM t
+-- !query 115 schema
+struct<>
+-- !query 115 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DECIMAL(10,0))))' (boolean and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 116
+SELECT IF(true, cast(1 as boolean), cast(2 as string)) FROM t
+-- !query 116 schema
+struct<>
+-- !query 116 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS STRING)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS STRING)))' (boolean and string).; line 1 pos 7
+
+
+-- !query 117
+SELECT IF(true, cast(1 as boolean), cast('2' as binary)) FROM t
+-- !query 117 schema
+struct<>
+-- !query 117 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST('2' AS BINARY)))' (boolean and binary).; line 1 pos 7
+
+
+-- !query 118
+SELECT IF(true, cast(1 as boolean), cast(2 as boolean)) FROM t
+-- !query 118 schema
+struct<(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS BOOLEAN))):boolean>
+-- !query 118 output
+true
+
+
+-- !query 119
+SELECT IF(true, cast(1 as boolean), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 119 schema
+struct<>
+-- !query 119 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (boolean and timestamp).; line 1 pos 7
+
+
+-- !query 120
+SELECT IF(true, cast(1 as boolean), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 120 schema
+struct<>
+-- !query 120 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00' AS DATE)))' (boolean and date).; line 1 pos 7
+
+
+-- !query 121
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as tinyint)) FROM t
+-- !query 121 schema
+struct<>
+-- !query 121 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS TINYINT)))' (timestamp and tinyint).; line 1 pos 7
+
+
+-- !query 122
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as smallint)) FROM t
+-- !query 122 schema
+struct<>
+-- !query 122 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS SMALLINT)))' (timestamp and smallint).; line 1 pos 7
+
+
+-- !query 123
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as int)) FROM t
+-- !query 123 schema
+struct<>
+-- !query 123 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS INT)))' (timestamp and int).; line 1 pos 7
+
+
+-- !query 124
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as bigint)) FROM t
+-- !query 124 schema
+struct<>
+-- !query 124 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BIGINT)))' (timestamp and bigint).; line 1 pos 7
+
+
+-- !query 125
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as float)) FROM t
+-- !query 125 schema
+struct<>
+-- !query 125 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS FLOAT)))' (timestamp and float).; line 1 pos 7
+
+
+-- !query 126
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as double)) FROM t
+-- !query 126 schema
+struct<>
+-- !query 126 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DOUBLE)))' (timestamp and double).; line 1 pos 7
+
+
+-- !query 127
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as decimal(10, 0))) FROM t
+-- !query 127 schema
+struct<>
+-- !query 127 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DECIMAL(10,0))))' (timestamp and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 128
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as string)) FROM t
+-- !query 128 schema
+struct<(IF(true, CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING), CAST(2 AS STRING))):string>
+-- !query 128 output
+2017-12-12 09:30:00
+
+
+-- !query 129
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast('2' as binary)) FROM t
+-- !query 129 schema
+struct<>
+-- !query 129 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST('2' AS BINARY)))' (timestamp and binary).; line 1 pos 7
+
+
+-- !query 130
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as boolean)) FROM t
+-- !query 130 schema
+struct<>
+-- !query 130 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BOOLEAN)))' (timestamp and boolean).; line 1 pos 7
+
+
+-- !query 131
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 131 schema
+struct<(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):timestamp>
+-- !query 131 output
+2017-12-12 09:30:00
+
+
+-- !query 132
+SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 132 schema
+struct<(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP))):timestamp>
+-- !query 132 output
+2017-12-12 09:30:00
+
+
+-- !query 133
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as tinyint)) FROM t
+-- !query 133 schema
+struct<>
+-- !query 133 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS TINYINT)))' (date and tinyint).; line 1 pos 7
+
+
+-- !query 134
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as smallint)) FROM t
+-- !query 134 schema
+struct<>
+-- !query 134 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS SMALLINT)))' (date and smallint).; line 1 pos 7
+
+
+-- !query 135
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as int)) FROM t
+-- !query 135 schema
+struct<>
+-- !query 135 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS INT)))' (date and int).; line 1 pos 7
+
+
+-- !query 136
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as bigint)) FROM t
+-- !query 136 schema
+struct<>
+-- !query 136 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BIGINT)))' (date and bigint).; line 1 pos 7
+
+
+-- !query 137
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as float)) FROM t
+-- !query 137 schema
+struct<>
+-- !query 137 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS FLOAT)))' (date and float).; line 1 pos 7
+
+
+-- !query 138
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as double)) FROM t
+-- !query 138 schema
+struct<>
+-- !query 138 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DOUBLE)))' (date and double).; line 1 pos 7
+
+
+-- !query 139
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as decimal(10, 0))) FROM t
+-- !query 139 schema
+struct<>
+-- !query 139 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DECIMAL(10,0))))' (date and decimal(10,0)).; line 1 pos 7
+
+
+-- !query 140
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as string)) FROM t
+-- !query 140 schema
+struct<(IF(true, CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING), CAST(2 AS STRING))):string>
+-- !query 140 output
+2017-12-12
+
+
+-- !query 141
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast('2' as binary)) FROM t
+-- !query 141 schema
+struct<>
+-- !query 141 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST('2' AS BINARY)))' (date and binary).; line 1 pos 7
+
+
+-- !query 142
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as boolean)) FROM t
+-- !query 142 schema
+struct<>
+-- !query 142 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BOOLEAN)))' (date and boolean).; line 1 pos 7
+
+
+-- !query 143
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 143 schema
+struct<(IF(true, CAST(CAST(2017-12-12 09:30:00 AS DATE) AS TIMESTAMP), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):timestamp>
+-- !query 143 output
+2017-12-12 00:00:00
+
+
+-- !query 144
+SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 144 schema
+struct<(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2017-12-11 09:30:00 AS DATE))):date>
+-- !query 144 output
+2017-12-12
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
new file mode 100644
index 0000000000000..44fa48e2697b3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
@@ -0,0 +1,354 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 44
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT 1 + '2' FROM t
+-- !query 1 schema
+struct<(CAST(1 AS DOUBLE) + CAST(2 AS DOUBLE)):double>
+-- !query 1 output
+3.0
+
+
+-- !query 2
+SELECT 1 - '2' FROM t
+-- !query 2 schema
+struct<(CAST(1 AS DOUBLE) - CAST(2 AS DOUBLE)):double>
+-- !query 2 output
+-1.0
+
+
+-- !query 3
+SELECT 1 * '2' FROM t
+-- !query 3 schema
+struct<(CAST(1 AS DOUBLE) * CAST(2 AS DOUBLE)):double>
+-- !query 3 output
+2.0
+
+
+-- !query 4
+SELECT 4 / '2' FROM t
+-- !query 4 schema
+struct<(CAST(4 AS DOUBLE) / CAST(CAST(2 AS DOUBLE) AS DOUBLE)):double>
+-- !query 4 output
+2.0
+
+
+-- !query 5
+SELECT 1.1 + '2' FROM t
+-- !query 5 schema
+struct<(CAST(1.1 AS DOUBLE) + CAST(2 AS DOUBLE)):double>
+-- !query 5 output
+3.1
+
+
+-- !query 6
+SELECT 1.1 - '2' FROM t
+-- !query 6 schema
+struct<(CAST(1.1 AS DOUBLE) - CAST(2 AS DOUBLE)):double>
+-- !query 6 output
+-0.8999999999999999
+
+
+-- !query 7
+SELECT 1.1 * '2' FROM t
+-- !query 7 schema
+struct<(CAST(1.1 AS DOUBLE) * CAST(2 AS DOUBLE)):double>
+-- !query 7 output
+2.2
+
+
+-- !query 8
+SELECT 4.4 / '2' FROM t
+-- !query 8 schema
+struct<(CAST(4.4 AS DOUBLE) / CAST(2 AS DOUBLE)):double>
+-- !query 8 output
+2.2
+
+
+-- !query 9
+SELECT 1.1 + '2.2' FROM t
+-- !query 9 schema
+struct<(CAST(1.1 AS DOUBLE) + CAST(2.2 AS DOUBLE)):double>
+-- !query 9 output
+3.3000000000000003
+
+
+-- !query 10
+SELECT 1.1 - '2.2' FROM t
+-- !query 10 schema
+struct<(CAST(1.1 AS DOUBLE) - CAST(2.2 AS DOUBLE)):double>
+-- !query 10 output
+-1.1
+
+
+-- !query 11
+SELECT 1.1 * '2.2' FROM t
+-- !query 11 schema
+struct<(CAST(1.1 AS DOUBLE) * CAST(2.2 AS DOUBLE)):double>
+-- !query 11 output
+2.4200000000000004
+
+
+-- !query 12
+SELECT 4.4 / '2.2' FROM t
+-- !query 12 schema
+struct<(CAST(4.4 AS DOUBLE) / CAST(2.2 AS DOUBLE)):double>
+-- !query 12 output
+2.0
+
+
+-- !query 13
+SELECT '$' || cast(1 as smallint) || '$' FROM t
+-- !query 13 schema
+struct<concat(concat($, CAST(CAST(1 AS SMALLINT) AS STRING)), $):string>
+-- !query 13 output
+$1$
+
+
+-- !query 14
+SELECT '$' || 1 || '$' FROM t
+-- !query 14 schema
+struct<concat(concat($, CAST(1 AS STRING)), $):string>
+-- !query 14 output
+$1$
+
+
+-- !query 15
+SELECT '$' || cast(1 as bigint) || '$' FROM t
+-- !query 15 schema
+struct<concat(concat($, CAST(CAST(1 AS BIGINT) AS STRING)), $):string>
+-- !query 15 output
+$1$
+
+
+-- !query 16
+SELECT '$' || cast(1.1 as float) || '$' FROM t
+-- !query 16 schema
+struct<concat(concat($, CAST(CAST(1.1 AS FLOAT) AS STRING)), $):string>
+-- !query 16 output
+$1.1$
+
+
+-- !query 17
+SELECT '$' || cast(1.1 as double) || '$' FROM t
+-- !query 17 schema
+struct<concat(concat($, CAST(CAST(1.1 AS DOUBLE) AS STRING)), $):string>
+-- !query 17 output
+$1.1$
+
+
+-- !query 18
+SELECT '$' || 1.1 || '$' FROM t
+-- !query 18 schema
+struct<concat(concat($, CAST(1.1 AS STRING)), $):string>
+-- !query 18 output
+$1.1$
+
+
+-- !query 19
+SELECT '$' || cast(1.1 as decimal(8,3)) || '$' FROM t
+-- !query 19 schema
+struct<concat(concat($, CAST(CAST(1.1 AS DECIMAL(8,3)) AS STRING)), $):string>
+-- !query 19 output
+$1.100$
+
+
+-- !query 20
+SELECT '$' || 'abcd' || '$' FROM t
+-- !query 20 schema
+struct<concat(concat($, abcd), $):string>
+-- !query 20 output
+$abcd$
+
+
+-- !query 21
+SELECT '$' || date('1996-09-09') || '$' FROM t
+-- !query 21 schema
+struct<concat(concat($, CAST(CAST(1996-09-09 AS DATE) AS STRING)), $):string>
+-- !query 21 output
+$1996-09-09$
+
+
+-- !query 22
+SELECT '$' || timestamp('1996-09-09 10:11:12.4' )|| '$' FROM t
+-- !query 22 schema
+struct<concat(concat($, CAST(CAST(1996-09-09 10:11:12.4 AS TIMESTAMP) AS STRING)), $):string>
+-- !query 22 output
+$1996-09-09 10:11:12.4$
+
+
+-- !query 23
+SELECT length(cast(1 as smallint)) FROM t
+-- !query 23 schema
+struct<length(CAST(CAST(1 AS SMALLINT) AS STRING)):int>
+-- !query 23 output
+1
+
+
+-- !query 24
+SELECT length(cast(1 as int)) FROM t
+-- !query 24 schema
+struct<length(CAST(CAST(1 AS INT) AS STRING)):int>
+-- !query 24 output
+1
+
+
+-- !query 25
+SELECT length(cast(1 as bigint)) FROM t
+-- !query 25 schema
+struct<length(CAST(CAST(1 AS BIGINT) AS STRING)):int>
+-- !query 25 output
+1
+
+
+-- !query 26
+SELECT length(cast(1.1 as float)) FROM t
+-- !query 26 schema
+struct<length(CAST(CAST(1.1 AS FLOAT) AS STRING)):int>
+-- !query 26 output
+3
+
+
+-- !query 27
+SELECT length(cast(1.1 as double)) FROM t
+-- !query 27 schema
+struct<length(CAST(CAST(1.1 AS DOUBLE) AS STRING)):int>
+-- !query 27 output
+3
+
+
+-- !query 28
+SELECT length(1.1) FROM t
+-- !query 28 schema
+struct<length(CAST(1.1 AS STRING)):int>
+-- !query 28 output
+3
+
+
+-- !query 29
+SELECT length(cast(1.1 as decimal(8,3))) FROM t
+-- !query 29 schema
+struct<length(CAST(CAST(1.1 AS DECIMAL(8,3)) AS STRING)):int>
+-- !query 29 output
+5
+
+
+-- !query 30
+SELECT length('four') FROM t
+-- !query 30 schema
+struct<length(four):int>
+-- !query 30 output
+4
+
+
+-- !query 31
+SELECT length(date('1996-09-10')) FROM t
+-- !query 31 schema
+struct<length(CAST(CAST(1996-09-10 AS DATE) AS STRING)):int>
+-- !query 31 output
+10
+
+
+-- !query 32
+SELECT length(timestamp('1996-09-10 10:11:12.4')) FROM t
+-- !query 32 schema
+struct<length(CAST(CAST(1996-09-10 10:11:12.4 AS TIMESTAMP) AS STRING)):int>
+-- !query 32 output
+21
+
+
+-- !query 33
+SELECT year( '1996-01-10') FROM t
+-- !query 33 schema
+struct<year(CAST(1996-01-10 AS DATE)):int>
+-- !query 33 output
+1996
+
+
+-- !query 34
+SELECT month( '1996-01-10') FROM t
+-- !query 34 schema
+struct<month(CAST(1996-01-10 AS DATE)):int>
+-- !query 34 output
+1
+
+
+-- !query 35
+SELECT day( '1996-01-10') FROM t
+-- !query 35 schema
+struct<dayofmonth(CAST(1996-01-10 AS DATE)):int>
+-- !query 35 output
+10
+
+
+-- !query 36
+SELECT hour( '10:11:12') FROM t
+-- !query 36 schema
+struct<hour(CAST(10:11:12 AS TIMESTAMP)):int>
+-- !query 36 output
+10
+
+
+-- !query 37
+SELECT minute( '10:11:12') FROM t
+-- !query 37 schema
+struct<minute(CAST(10:11:12 AS TIMESTAMP)):int>
+-- !query 37 output
+11
+
+
+-- !query 38
+SELECT second( '10:11:12') FROM t
+-- !query 38 schema
+struct<second(CAST(10:11:12 AS TIMESTAMP)):int>
+-- !query 38 output
+12
+
+
+-- !query 39
+select 1 like '%' FROM t
+-- !query 39 schema
+struct<CAST(1 AS STRING) LIKE %:boolean>
+-- !query 39 output
+true
+
+
+-- !query 40
+select date('1996-09-10') like '19%' FROM t
+-- !query 40 schema
+struct<CAST(CAST(1996-09-10 AS DATE) AS STRING) LIKE 19%:boolean>
+-- !query 40 output
+true
+
+
+-- !query 41
+select '1' like 1 FROM t
+-- !query 41 schema
+struct<1 LIKE CAST(1 AS STRING):boolean>
+-- !query 41 output
+true
+
+
+-- !query 42
+select '1 ' like 1 FROM t
+-- !query 42 schema
+struct<1  LIKE CAST(1 AS STRING):boolean>
+-- !query 42 output
+false
+
+
+-- !query 43
+select '1996-09-10' like date('1996-09-10') FROM t
+-- !query 43 schema
+struct<1996-09-10 LIKE CAST(CAST(1996-09-10 AS DATE) AS STRING):boolean>
+-- !query 43 output
+true
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out
new file mode 100644
index 0000000000000..875ccc1341ec4
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out
@@ -0,0 +1,2454 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 289
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT cast(1 as tinyint) in (cast(1 as tinyint)) FROM t
+-- !query 1 schema
+struct<(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT))):boolean>
+-- !query 1 output
+true
+
+
+-- !query 2
+SELECT cast(1 as tinyint) in (cast(1 as smallint)) FROM t
+-- !query 2 schema
+struct<(CAST(CAST(1 AS TINYINT) AS SMALLINT) IN (CAST(CAST(1 AS SMALLINT) AS SMALLINT))):boolean>
+-- !query 2 output
+true
+
+
+-- !query 3
+SELECT cast(1 as tinyint) in (cast(1 as int)) FROM t
+-- !query 3 schema
+struct<(CAST(CAST(1 AS TINYINT) AS INT) IN (CAST(CAST(1 AS INT) AS INT))):boolean>
+-- !query 3 output
+true
+
+
+-- !query 4
+SELECT cast(1 as tinyint) in (cast(1 as bigint)) FROM t
+-- !query 4 schema
+struct<(CAST(CAST(1 AS TINYINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
+-- !query 4 output
+true
+
+
+-- !query 5
+SELECT cast(1 as tinyint) in (cast(1 as float)) FROM t
+-- !query 5 schema
+struct<(CAST(CAST(1 AS TINYINT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
+-- !query 5 output
+true
+
+
+-- !query 6
+SELECT cast(1 as tinyint) in (cast(1 as double)) FROM t
+-- !query 6 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 6 output
+true
+
+
+-- !query 7
+SELECT cast(1 as tinyint) in (cast(1 as decimal(10, 0))) FROM t
+-- !query 7 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 7 output
+true
+
+
+-- !query 8
+SELECT cast(1 as tinyint) in (cast(1 as string)) FROM t
+-- !query 8 schema
+struct<(CAST(CAST(1 AS TINYINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 8 output
+true
+
+
+-- !query 9
+SELECT cast(1 as tinyint) in (cast('1' as binary)) FROM t
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: tinyint != binary; line 1 pos 26
+
+
+-- !query 10
+SELECT cast(1 as tinyint) in (cast(1 as boolean)) FROM t
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: tinyint != boolean; line 1 pos 26
+
+
+-- !query 11
+SELECT cast(1 as tinyint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: tinyint != timestamp; line 1 pos 26
+
+
+-- !query 12
+SELECT cast(1 as tinyint) in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: tinyint != date; line 1 pos 26
+
+
+-- !query 13
+SELECT cast(1 as smallint) in (cast(1 as tinyint)) FROM t
+-- !query 13 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS SMALLINT) IN (CAST(CAST(1 AS TINYINT) AS SMALLINT))):boolean>
+-- !query 13 output
+true
+
+
+-- !query 14
+SELECT cast(1 as smallint) in (cast(1 as smallint)) FROM t
+-- !query 14 schema
+struct<(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT))):boolean>
+-- !query 14 output
+true
+
+
+-- !query 15
+SELECT cast(1 as smallint) in (cast(1 as int)) FROM t
+-- !query 15 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS INT) IN (CAST(CAST(1 AS INT) AS INT))):boolean>
+-- !query 15 output
+true
+
+
+-- !query 16
+SELECT cast(1 as smallint) in (cast(1 as bigint)) FROM t
+-- !query 16 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
+-- !query 16 output
+true
+
+
+-- !query 17
+SELECT cast(1 as smallint) in (cast(1 as float)) FROM t
+-- !query 17 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
+-- !query 17 output
+true
+
+
+-- !query 18
+SELECT cast(1 as smallint) in (cast(1 as double)) FROM t
+-- !query 18 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 18 output
+true
+
+
+-- !query 19
+SELECT cast(1 as smallint) in (cast(1 as decimal(10, 0))) FROM t
+-- !query 19 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 19 output
+true
+
+
+-- !query 20
+SELECT cast(1 as smallint) in (cast(1 as string)) FROM t
+-- !query 20 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 20 output
+true
+
+
+-- !query 21
+SELECT cast(1 as smallint) in (cast('1' as binary)) FROM t
+-- !query 21 schema
+struct<>
+-- !query 21 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: smallint != binary; line 1 pos 27
+
+
+-- !query 22
+SELECT cast(1 as smallint) in (cast(1 as boolean)) FROM t
+-- !query 22 schema
+struct<>
+-- !query 22 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: smallint != boolean; line 1 pos 27
+
+
+-- !query 23
+SELECT cast(1 as smallint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: smallint != timestamp; line 1 pos 27
+
+
+-- !query 24
+SELECT cast(1 as smallint) in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: smallint != date; line 1 pos 27
+
+
+-- !query 25
+SELECT cast(1 as int) in (cast(1 as tinyint)) FROM t
+-- !query 25 schema
+struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS TINYINT) AS INT))):boolean>
+-- !query 25 output
+true
+
+
+-- !query 26
+SELECT cast(1 as int) in (cast(1 as smallint)) FROM t
+-- !query 26 schema
+struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS SMALLINT) AS INT))):boolean>
+-- !query 26 output
+true
+
+
+-- !query 27
+SELECT cast(1 as int) in (cast(1 as int)) FROM t
+-- !query 27 schema
+struct<(CAST(1 AS INT) IN (CAST(1 AS INT))):boolean>
+-- !query 27 output
+true
+
+
+-- !query 28
+SELECT cast(1 as int) in (cast(1 as bigint)) FROM t
+-- !query 28 schema
+struct<(CAST(CAST(1 AS INT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
+-- !query 28 output
+true
+
+
+-- !query 29
+SELECT cast(1 as int) in (cast(1 as float)) FROM t
+-- !query 29 schema
+struct<(CAST(CAST(1 AS INT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
+-- !query 29 output
+true
+
+
+-- !query 30
+SELECT cast(1 as int) in (cast(1 as double)) FROM t
+-- !query 30 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 30 output
+true
+
+
+-- !query 31
+SELECT cast(1 as int) in (cast(1 as decimal(10, 0))) FROM t
+-- !query 31 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 31 output
+true
+
+
+-- !query 32
+SELECT cast(1 as int) in (cast(1 as string)) FROM t
+-- !query 32 schema
+struct<(CAST(CAST(1 AS INT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 32 output
+true
+
+
+-- !query 33
+SELECT cast(1 as int) in (cast('1' as binary)) FROM t
+-- !query 33 schema
+struct<>
+-- !query 33 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: int != binary; line 1 pos 22
+
+
+-- !query 34
+SELECT cast(1 as int) in (cast(1 as boolean)) FROM t
+-- !query 34 schema
+struct<>
+-- !query 34 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: int != boolean; line 1 pos 22
+
+
+-- !query 35
+SELECT cast(1 as int) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 35 schema
+struct<>
+-- !query 35 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: int != timestamp; line 1 pos 22
+
+
+-- !query 36
+SELECT cast(1 as int) in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 36 schema
+struct<>
+-- !query 36 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: int != date; line 1 pos 22
+
+
+-- !query 37
+SELECT cast(1 as bigint) in (cast(1 as tinyint)) FROM t
+-- !query 37 schema
+struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS TINYINT) AS BIGINT))):boolean>
+-- !query 37 output
+true
+
+
+-- !query 38
+SELECT cast(1 as bigint) in (cast(1 as smallint)) FROM t
+-- !query 38 schema
+struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS SMALLINT) AS BIGINT))):boolean>
+-- !query 38 output
+true
+
+
+-- !query 39
+SELECT cast(1 as bigint) in (cast(1 as int)) FROM t
+-- !query 39 schema
+struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS INT) AS BIGINT))):boolean>
+-- !query 39 output
+true
+
+
+-- !query 40
+SELECT cast(1 as bigint) in (cast(1 as bigint)) FROM t
+-- !query 40 schema
+struct<(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT))):boolean>
+-- !query 40 output
+true
+
+
+-- !query 41
+SELECT cast(1 as bigint) in (cast(1 as float)) FROM t
+-- !query 41 schema
+struct<(CAST(CAST(1 AS BIGINT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
+-- !query 41 output
+true
+
+
+-- !query 42
+SELECT cast(1 as bigint) in (cast(1 as double)) FROM t
+-- !query 42 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 42 output
+true
+
+
+-- !query 43
+SELECT cast(1 as bigint) in (cast(1 as decimal(10, 0))) FROM t
+-- !query 43 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 43 output
+true
+
+
+-- !query 44
+SELECT cast(1 as bigint) in (cast(1 as string)) FROM t
+-- !query 44 schema
+struct<(CAST(CAST(1 AS BIGINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 44 output
+true
+
+
+-- !query 45
+SELECT cast(1 as bigint) in (cast('1' as binary)) FROM t
+-- !query 45 schema
+struct<>
+-- !query 45 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: bigint != binary; line 1 pos 25
+
+
+-- !query 46
+SELECT cast(1 as bigint) in (cast(1 as boolean)) FROM t
+-- !query 46 schema
+struct<>
+-- !query 46 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: bigint != boolean; line 1 pos 25
+
+
+-- !query 47
+SELECT cast(1 as bigint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 47 schema
+struct<>
+-- !query 47 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: bigint != timestamp; line 1 pos 25
+
+
+-- !query 48
+SELECT cast(1 as bigint) in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 48 schema
+struct<>
+-- !query 48 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: bigint != date; line 1 pos 25
+
+
+-- !query 49
+SELECT cast(1 as float) in (cast(1 as tinyint)) FROM t
+-- !query 49 schema
+struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS TINYINT) AS FLOAT))):boolean>
+-- !query 49 output
+true
+
+
+-- !query 50
+SELECT cast(1 as float) in (cast(1 as smallint)) FROM t
+-- !query 50 schema
+struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS SMALLINT) AS FLOAT))):boolean>
+-- !query 50 output
+true
+
+
+-- !query 51
+SELECT cast(1 as float) in (cast(1 as int)) FROM t
+-- !query 51 schema
+struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS INT) AS FLOAT))):boolean>
+-- !query 51 output
+true
+
+
+-- !query 52
+SELECT cast(1 as float) in (cast(1 as bigint)) FROM t
+-- !query 52 schema
+struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS BIGINT) AS FLOAT))):boolean>
+-- !query 52 output
+true
+
+
+-- !query 53
+SELECT cast(1 as float) in (cast(1 as float)) FROM t
+-- !query 53 schema
+struct<(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT))):boolean>
+-- !query 53 output
+true
+
+
+-- !query 54
+SELECT cast(1 as float) in (cast(1 as double)) FROM t
+-- !query 54 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 54 output
+true
+
+
+-- !query 55
+SELECT cast(1 as float) in (cast(1 as decimal(10, 0))) FROM t
+-- !query 55 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
+-- !query 55 output
+true
+
+
+-- !query 56
+SELECT cast(1 as float) in (cast(1 as string)) FROM t
+-- !query 56 schema
+struct<(CAST(CAST(1 AS FLOAT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 56 output
+false
+
+
+-- !query 57
+SELECT cast(1 as float) in (cast('1' as binary)) FROM t
+-- !query 57 schema
+struct<>
+-- !query 57 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: float != binary; line 1 pos 24
+
+
+-- !query 58
+SELECT cast(1 as float) in (cast(1 as boolean)) FROM t
+-- !query 58 schema
+struct<>
+-- !query 58 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: float != boolean; line 1 pos 24
+
+
+-- !query 59
+SELECT cast(1 as float) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 59 schema
+struct<>
+-- !query 59 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: float != timestamp; line 1 pos 24
+
+
+-- !query 60
+SELECT cast(1 as float) in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 60 schema
+struct<>
+-- !query 60 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: float != date; line 1 pos 24
+
+
+-- !query 61
+SELECT cast(1 as double) in (cast(1 as tinyint)) FROM t
+-- !query 61 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS TINYINT) AS DOUBLE))):boolean>
+-- !query 61 output
+true
+
+
+-- !query 62
+SELECT cast(1 as double) in (cast(1 as smallint)) FROM t
+-- !query 62 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS SMALLINT) AS DOUBLE))):boolean>
+-- !query 62 output
+true
+
+
+-- !query 63
+SELECT cast(1 as double) in (cast(1 as int)) FROM t
+-- !query 63 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS INT) AS DOUBLE))):boolean>
+-- !query 63 output
+true
+
+
+-- !query 64
+SELECT cast(1 as double) in (cast(1 as bigint)) FROM t
+-- !query 64 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS BIGINT) AS DOUBLE))):boolean>
+-- !query 64 output
+true
+
+
+-- !query 65
+SELECT cast(1 as double) in (cast(1 as float)) FROM t
+-- !query 65 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
+-- !query 65 output
+true
+
+
+-- !query 66
+SELECT cast(1 as double) in (cast(1 as double)) FROM t
+-- !query 66 schema
+struct<(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE))):boolean>
+-- !query 66 output
+true
+
+
+-- !query 67
+SELECT cast(1 as double) in (cast(1 as decimal(10, 0))) FROM t
+-- !query 67 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
+-- !query 67 output
+true
+
+
+-- !query 68
+SELECT cast(1 as double) in (cast(1 as string)) FROM t
+-- !query 68 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 68 output
+false
+
+
+-- !query 69
+SELECT cast(1 as double) in (cast('1' as binary)) FROM t
+-- !query 69 schema
+struct<>
+-- !query 69 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: double != binary; line 1 pos 25
+
+
+-- !query 70
+SELECT cast(1 as double) in (cast(1 as boolean)) FROM t
+-- !query 70 schema
+struct<>
+-- !query 70 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: double != boolean; line 1 pos 25
+
+
+-- !query 71
+SELECT cast(1 as double) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 71 schema
+struct<>
+-- !query 71 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: double != timestamp; line 1 pos 25
+
+
+-- !query 72
+SELECT cast(1 as double) in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 72 schema
+struct<>
+-- !query 72 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: double != date; line 1 pos 25
+
+
+-- !query 73
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as tinyint)) FROM t
+-- !query 73 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)))):boolean>
+-- !query 73 output
+true
+
+
+-- !query 74
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as smallint)) FROM t
+-- !query 74 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)))):boolean>
+-- !query 74 output
+true
+
+
+-- !query 75
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as int)) FROM t
+-- !query 75 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS INT) AS DECIMAL(10,0)))):boolean>
+-- !query 75 output
+true
+
+
+-- !query 76
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as bigint)) FROM t
+-- !query 76 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) IN (CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)))):boolean>
+-- !query 76 output
+true
+
+
+-- !query 77
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as float)) FROM t
+-- !query 77 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
+-- !query 77 output
+true
+
+
+-- !query 78
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as double)) FROM t
+-- !query 78 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 78 output
+true
+
+
+-- !query 79
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0))) FROM t
+-- !query 79 schema
+struct<(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)))):boolean>
+-- !query 79 output
+true
+
+
+-- !query 80
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as string)) FROM t
+-- !query 80 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 80 output
+true
+
+
+-- !query 81
+SELECT cast(1 as decimal(10, 0)) in (cast('1' as binary)) FROM t
+-- !query 81 schema
+struct<>
+-- !query 81 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != binary; line 1 pos 33
+
+
+-- !query 82
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as boolean)) FROM t
+-- !query 82 schema
+struct<>
+-- !query 82 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != boolean; line 1 pos 33
+
+
+-- !query 83
+SELECT cast(1 as decimal(10, 0)) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 83 schema
+struct<>
+-- !query 83 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != timestamp; line 1 pos 33
+
+
+-- !query 84
+SELECT cast(1 as decimal(10, 0)) in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 84 schema
+struct<>
+-- !query 84 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != date; line 1 pos 33
+
+
+-- !query 85
+SELECT cast(1 as string) in (cast(1 as tinyint)) FROM t
+-- !query 85 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS TINYINT) AS STRING))):boolean>
+-- !query 85 output
+true
+
+
+-- !query 86
+SELECT cast(1 as string) in (cast(1 as smallint)) FROM t
+-- !query 86 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS SMALLINT) AS STRING))):boolean>
+-- !query 86 output
+true
+
+
+-- !query 87
+SELECT cast(1 as string) in (cast(1 as int)) FROM t
+-- !query 87 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS INT) AS STRING))):boolean>
+-- !query 87 output
+true
+
+
+-- !query 88
+SELECT cast(1 as string) in (cast(1 as bigint)) FROM t
+-- !query 88 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS BIGINT) AS STRING))):boolean>
+-- !query 88 output
+true
+
+
+-- !query 89
+SELECT cast(1 as string) in (cast(1 as float)) FROM t
+-- !query 89 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS FLOAT) AS STRING))):boolean>
+-- !query 89 output
+false
+
+
+-- !query 90
+SELECT cast(1 as string) in (cast(1 as double)) FROM t
+-- !query 90 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS DOUBLE) AS STRING))):boolean>
+-- !query 90 output
+false
+
+
+-- !query 91
+SELECT cast(1 as string) in (cast(1 as decimal(10, 0))) FROM t
+-- !query 91 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS STRING))):boolean>
+-- !query 91 output
+true
+
+
+-- !query 92
+SELECT cast(1 as string) in (cast(1 as string)) FROM t
+-- !query 92 schema
+struct<(CAST(1 AS STRING) IN (CAST(1 AS STRING))):boolean>
+-- !query 92 output
+true
+
+
+-- !query 93
+SELECT cast(1 as string) in (cast('1' as binary)) FROM t
+-- !query 93 schema
+struct<>
+-- !query 93 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS STRING) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: string != binary; line 1 pos 25
+
+
+-- !query 94
+SELECT cast(1 as string) in (cast(1 as boolean)) FROM t
+-- !query 94 schema
+struct<>
+-- !query 94 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: string != boolean; line 1 pos 25
+
+
+-- !query 95
+SELECT cast(1 as string) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 95 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING))):boolean>
+-- !query 95 output
+false
+
+
+-- !query 96
+SELECT cast(1 as string) in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 96 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING))):boolean>
+-- !query 96 output
+false
+
+
+-- !query 97
+SELECT cast('1' as binary) in (cast(1 as tinyint)) FROM t
+-- !query 97 schema
+struct<>
+-- !query 97 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: binary != tinyint; line 1 pos 27
+
+
+-- !query 98
+SELECT cast('1' as binary) in (cast(1 as smallint)) FROM t
+-- !query 98 schema
+struct<>
+-- !query 98 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: binary != smallint; line 1 pos 27
+
+
+-- !query 99
+SELECT cast('1' as binary) in (cast(1 as int)) FROM t
+-- !query 99 schema
+struct<>
+-- !query 99 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: binary != int; line 1 pos 27
+
+
+-- !query 100
+SELECT cast('1' as binary) in (cast(1 as bigint)) FROM t
+-- !query 100 schema
+struct<>
+-- !query 100 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: binary != bigint; line 1 pos 27
+
+
+-- !query 101
+SELECT cast('1' as binary) in (cast(1 as float)) FROM t
+-- !query 101 schema
+struct<>
+-- !query 101 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: binary != float; line 1 pos 27
+
+
+-- !query 102
+SELECT cast('1' as binary) in (cast(1 as double)) FROM t
+-- !query 102 schema
+struct<>
+-- !query 102 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: binary != double; line 1 pos 27
+
+
+-- !query 103
+SELECT cast('1' as binary) in (cast(1 as decimal(10, 0))) FROM t
+-- !query 103 schema
+struct<>
+-- !query 103 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: binary != decimal(10,0); line 1 pos 27
+
+
+-- !query 104
+SELECT cast('1' as binary) in (cast(1 as string)) FROM t
+-- !query 104 schema
+struct<>
+-- !query 104 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: binary != string; line 1 pos 27
+
+
+-- !query 105
+SELECT cast('1' as binary) in (cast('1' as binary)) FROM t
+-- !query 105 schema
+struct<(CAST(1 AS BINARY) IN (CAST(1 AS BINARY))):boolean>
+-- !query 105 output
+true
+
+
+-- !query 106
+SELECT cast('1' as binary) in (cast(1 as boolean)) FROM t
+-- !query 106 schema
+struct<>
+-- !query 106 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: binary != boolean; line 1 pos 27
+
+
+-- !query 107
+SELECT cast('1' as binary) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 107 schema
+struct<>
+-- !query 107 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: binary != timestamp; line 1 pos 27
+
+
+-- !query 108
+SELECT cast('1' as binary) in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 108 schema
+struct<>
+-- !query 108 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: binary != date; line 1 pos 27
+
+
+-- !query 109
+SELECT true in (cast(1 as tinyint)) FROM t
+-- !query 109 schema
+struct<>
+-- !query 109 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true IN (CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: boolean != tinyint; line 1 pos 12
+
+
+-- !query 110
+SELECT true in (cast(1 as smallint)) FROM t
+-- !query 110 schema
+struct<>
+-- !query 110 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true IN (CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: boolean != smallint; line 1 pos 12
+
+
+-- !query 111
+SELECT true in (cast(1 as int)) FROM t
+-- !query 111 schema
+struct<>
+-- !query 111 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true IN (CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: boolean != int; line 1 pos 12
+
+
+-- !query 112
+SELECT true in (cast(1 as bigint)) FROM t
+-- !query 112 schema
+struct<>
+-- !query 112 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true IN (CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: boolean != bigint; line 1 pos 12
+
+
+-- !query 113
+SELECT true in (cast(1 as float)) FROM t
+-- !query 113 schema
+struct<>
+-- !query 113 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true IN (CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: boolean != float; line 1 pos 12
+
+
+-- !query 114
+SELECT true in (cast(1 as double)) FROM t
+-- !query 114 schema
+struct<>
+-- !query 114 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true IN (CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: boolean != double; line 1 pos 12
+
+
+-- !query 115
+SELECT true in (cast(1 as decimal(10, 0))) FROM t
+-- !query 115 schema
+struct<>
+-- !query 115 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true IN (CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: boolean != decimal(10,0); line 1 pos 12
+
+
+-- !query 116
+SELECT true in (cast(1 as string)) FROM t
+-- !query 116 schema
+struct<>
+-- !query 116 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true IN (CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: boolean != string; line 1 pos 12
+
+
+-- !query 117
+SELECT true in (cast('1' as binary)) FROM t
+-- !query 117 schema
+struct<>
+-- !query 117 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: boolean != binary; line 1 pos 12
+
+
+-- !query 118
+SELECT true in (cast(1 as boolean)) FROM t
+-- !query 118 schema
+struct<(true IN (CAST(1 AS BOOLEAN))):boolean>
+-- !query 118 output
+true
+
+
+-- !query 119
+SELECT true in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 119 schema
+struct<>
+-- !query 119 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: boolean != timestamp; line 1 pos 12
+
+
+-- !query 120
+SELECT true in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 120 schema
+struct<>
+-- !query 120 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(true IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: boolean != date; line 1 pos 12
+
+
+-- !query 121
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as tinyint)) FROM t
+-- !query 121 schema
+struct<>
+-- !query 121 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != tinyint; line 1 pos 50
+
+
+-- !query 122
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as smallint)) FROM t
+-- !query 122 schema
+struct<>
+-- !query 122 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != smallint; line 1 pos 50
+
+
+-- !query 123
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as int)) FROM t
+-- !query 123 schema
+struct<>
+-- !query 123 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS INT)))' due to data type mismatch: Arguments must be same type but were: timestamp != int; line 1 pos 50
+
+
+-- !query 124
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as bigint)) FROM t
+-- !query 124 schema
+struct<>
+-- !query 124 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != bigint; line 1 pos 50
+
+
+-- !query 125
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as float)) FROM t
+-- !query 125 schema
+struct<>
+-- !query 125 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: timestamp != float; line 1 pos 50
+
+
+-- !query 126
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as double)) FROM t
+-- !query 126 schema
+struct<>
+-- !query 126 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: timestamp != double; line 1 pos 50
+
+
+-- !query 127
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as decimal(10, 0))) FROM t
+-- !query 127 schema
+struct<>
+-- !query 127 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: timestamp != decimal(10,0); line 1 pos 50
+
+
+-- !query 128
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as string)) FROM t
+-- !query 128 schema
+struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING) IN (CAST(CAST(2 AS STRING) AS STRING))):boolean>
+-- !query 128 output
+false
+
+
+-- !query 129
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2' as binary)) FROM t
+-- !query 129 schema
+struct<>
+-- !query 129 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: timestamp != binary; line 1 pos 50
+
+
+-- !query 130
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as boolean)) FROM t
+-- !query 130 schema
+struct<>
+-- !query 130 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: timestamp != boolean; line 1 pos 50
+
+
+-- !query 131
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 131 schema
+struct<(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):boolean>
+-- !query 131 output
+false
+
+
+-- !query 132
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 132 schema
+struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP) IN (CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP))):boolean>
+-- !query 132 output
+false
+
+
+-- !query 133
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as tinyint)) FROM t
+-- !query 133 schema
+struct<>
+-- !query 133 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: date != tinyint; line 1 pos 43
+
+
+-- !query 134
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as smallint)) FROM t
+-- !query 134 schema
+struct<>
+-- !query 134 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: date != smallint; line 1 pos 43
+
+
+-- !query 135
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as int)) FROM t
+-- !query 135 schema
+struct<>
+-- !query 135 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS INT)))' due to data type mismatch: Arguments must be same type but were: date != int; line 1 pos 43
+
+
+-- !query 136
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as bigint)) FROM t
+-- !query 136 schema
+struct<>
+-- !query 136 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: date != bigint; line 1 pos 43
+
+
+-- !query 137
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as float)) FROM t
+-- !query 137 schema
+struct<>
+-- !query 137 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: date != float; line 1 pos 43
+
+
+-- !query 138
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as double)) FROM t
+-- !query 138 schema
+struct<>
+-- !query 138 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: date != double; line 1 pos 43
+
+
+-- !query 139
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as decimal(10, 0))) FROM t
+-- !query 139 schema
+struct<>
+-- !query 139 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: date != decimal(10,0); line 1 pos 43
+
+
+-- !query 140
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as string)) FROM t
+-- !query 140 schema
+struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING) IN (CAST(CAST(2 AS STRING) AS STRING))):boolean>
+-- !query 140 output
+false
+
+
+-- !query 141
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2' as binary)) FROM t
+-- !query 141 schema
+struct<>
+-- !query 141 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: date != binary; line 1 pos 43
+
+
+-- !query 142
+SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as boolean)) FROM t
+-- !query 142 schema
+struct<>
+-- !query 142 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: date != boolean; line 1 pos 43
+
+
+-- !query 143
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 143 schema
+struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS TIMESTAMP) IN (CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP))):boolean>
+-- !query 143 output
+false
+
+
+-- !query 144
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 144 schema
+struct<(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-11 09:30:00 AS DATE))):boolean>
+-- !query 144 output
+false
+
+
+-- !query 145
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as tinyint)) FROM t
+-- !query 145 schema
+struct<(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(1 AS TINYINT))):boolean>
+-- !query 145 output
+true
+
+
+-- !query 146
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as smallint)) FROM t
+-- !query 146 schema
+struct<(CAST(CAST(1 AS TINYINT) AS SMALLINT) IN (CAST(CAST(1 AS TINYINT) AS SMALLINT), CAST(CAST(1 AS SMALLINT) AS SMALLINT))):boolean>
+-- !query 146 output
+true
+
+
+-- !query 147
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as int)) FROM t
+-- !query 147 schema
+struct<(CAST(CAST(1 AS TINYINT) AS INT) IN (CAST(CAST(1 AS TINYINT) AS INT), CAST(CAST(1 AS INT) AS INT))):boolean>
+-- !query 147 output
+true
+
+
+-- !query 148
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as bigint)) FROM t
+-- !query 148 schema
+struct<(CAST(CAST(1 AS TINYINT) AS BIGINT) IN (CAST(CAST(1 AS TINYINT) AS BIGINT), CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
+-- !query 148 output
+true
+
+
+-- !query 149
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as float)) FROM t
+-- !query 149 schema
+struct<(CAST(CAST(1 AS TINYINT) AS FLOAT) IN (CAST(CAST(1 AS TINYINT) AS FLOAT), CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
+-- !query 149 output
+true
+
+
+-- !query 150
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as double)) FROM t
+-- !query 150 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) IN (CAST(CAST(1 AS TINYINT) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 150 output
+true
+
+
+-- !query 151
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as decimal(10, 0))) FROM t
+-- !query 151 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 151 output
+true
+
+
+-- !query 152
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as string)) FROM t
+-- !query 152 schema
+struct<(CAST(CAST(1 AS TINYINT) AS STRING) IN (CAST(CAST(1 AS TINYINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 152 output
+true
+
+
+-- !query 153
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('1' as binary)) FROM t
+-- !query 153 schema
+struct<>
+-- !query 153 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: tinyint != binary; line 1 pos 26
+
+
+-- !query 154
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as boolean)) FROM t
+-- !query 154 schema
+struct<>
+-- !query 154 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: tinyint != boolean; line 1 pos 26
+
+
+-- !query 155
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 155 schema
+struct<>
+-- !query 155 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: tinyint != timestamp; line 1 pos 26
+
+
+-- !query 156
+SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 156 schema
+struct<>
+-- !query 156 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: tinyint != date; line 1 pos 26
+
+
+-- !query 157
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as tinyint)) FROM t
+-- !query 157 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS SMALLINT) IN (CAST(CAST(1 AS SMALLINT) AS SMALLINT), CAST(CAST(1 AS TINYINT) AS SMALLINT))):boolean>
+-- !query 157 output
+true
+
+
+-- !query 158
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as smallint)) FROM t
+-- !query 158 schema
+struct<(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(1 AS SMALLINT))):boolean>
+-- !query 158 output
+true
+
+
+-- !query 159
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as int)) FROM t
+-- !query 159 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS INT) IN (CAST(CAST(1 AS SMALLINT) AS INT), CAST(CAST(1 AS INT) AS INT))):boolean>
+-- !query 159 output
+true
+
+
+-- !query 160
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as bigint)) FROM t
+-- !query 160 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS BIGINT) IN (CAST(CAST(1 AS SMALLINT) AS BIGINT), CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
+-- !query 160 output
+true
+
+
+-- !query 161
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as float)) FROM t
+-- !query 161 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS FLOAT) IN (CAST(CAST(1 AS SMALLINT) AS FLOAT), CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
+-- !query 161 output
+true
+
+
+-- !query 162
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as double)) FROM t
+-- !query 162 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) IN (CAST(CAST(1 AS SMALLINT) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 162 output
+true
+
+
+-- !query 163
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as decimal(10, 0))) FROM t
+-- !query 163 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 163 output
+true
+
+
+-- !query 164
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as string)) FROM t
+-- !query 164 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS STRING) IN (CAST(CAST(1 AS SMALLINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 164 output
+true
+
+
+-- !query 165
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast('1' as binary)) FROM t
+-- !query 165 schema
+struct<>
+-- !query 165 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: smallint != binary; line 1 pos 27
+
+
+-- !query 166
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as boolean)) FROM t
+-- !query 166 schema
+struct<>
+-- !query 166 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: smallint != boolean; line 1 pos 27
+
+
+-- !query 167
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 167 schema
+struct<>
+-- !query 167 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: smallint != timestamp; line 1 pos 27
+
+
+-- !query 168
+SELECT cast(1 as smallint) in (cast(1 as smallint), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 168 schema
+struct<>
+-- !query 168 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: smallint != date; line 1 pos 27
+
+
+-- !query 169
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as tinyint)) FROM t
+-- !query 169 schema
+struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS INT) AS INT), CAST(CAST(1 AS TINYINT) AS INT))):boolean>
+-- !query 169 output
+true
+
+
+-- !query 170
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as smallint)) FROM t
+-- !query 170 schema
+struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS INT) AS INT), CAST(CAST(1 AS SMALLINT) AS INT))):boolean>
+-- !query 170 output
+true
+
+
+-- !query 171
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as int)) FROM t
+-- !query 171 schema
+struct<(CAST(1 AS INT) IN (CAST(1 AS INT), CAST(1 AS INT))):boolean>
+-- !query 171 output
+true
+
+
+-- !query 172
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as bigint)) FROM t
+-- !query 172 schema
+struct<(CAST(CAST(1 AS INT) AS BIGINT) IN (CAST(CAST(1 AS INT) AS BIGINT), CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
+-- !query 172 output
+true
+
+
+-- !query 173
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as float)) FROM t
+-- !query 173 schema
+struct<(CAST(CAST(1 AS INT) AS FLOAT) IN (CAST(CAST(1 AS INT) AS FLOAT), CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
+-- !query 173 output
+true
+
+
+-- !query 174
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as double)) FROM t
+-- !query 174 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) IN (CAST(CAST(1 AS INT) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 174 output
+true
+
+
+-- !query 175
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as decimal(10, 0))) FROM t
+-- !query 175 schema
+struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS INT) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
+-- !query 175 output
+true
+
+
+-- !query 176
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as string)) FROM t
+-- !query 176 schema
+struct<(CAST(CAST(1 AS INT) AS STRING) IN (CAST(CAST(1 AS INT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 176 output
+true
+
+
+-- !query 177
+SELECT cast(1 as int) in (cast(1 as int), cast('1' as binary)) FROM t
+-- !query 177 schema
+struct<>
+-- !query 177 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: int != binary; line 1 pos 22
+
+
+-- !query 178
+SELECT cast(1 as int) in (cast(1 as int), cast(1 as boolean)) FROM t
+-- !query 178 schema
+struct<>
+-- !query 178 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: int != boolean; line 1 pos 22
+
+
+-- !query 179
+SELECT cast(1 as int) in (cast(1 as int), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 179 schema
+struct<>
+-- !query 179 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: int != timestamp; line 1 pos 22
+
+
+-- !query 180
+SELECT cast(1 as int) in (cast(1 as int), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 180 schema
+struct<>
+-- !query 180 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: int != date; line 1 pos 22
+
+
+-- !query 181
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as tinyint)) FROM t
+-- !query 181 schema
+struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT), CAST(CAST(1 AS TINYINT) AS BIGINT))):boolean>
+-- !query 181 output
+true
+
+
+-- !query 182
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as smallint)) FROM t
+-- !query 182 schema
+struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT), CAST(CAST(1 AS SMALLINT) AS BIGINT))):boolean>
+-- !query 182 output
+true
+
+
+-- !query 183
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as int)) FROM t
+-- !query 183 schema
+struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT), CAST(CAST(1 AS INT) AS BIGINT))):boolean>
+-- !query 183 output
+true
+
+
+-- !query 184
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as bigint)) FROM t
+-- !query 184 schema
+struct<(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(1 AS BIGINT))):boolean>
+-- !query 184 output
+true
+
+
+-- !query 185
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as float)) FROM t
+-- !query 185 schema
+struct<(CAST(CAST(1 AS BIGINT) AS FLOAT) IN (CAST(CAST(1 AS BIGINT) AS FLOAT), CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
+-- !query 185 output
+true
+
+
+-- !query 186
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as double)) FROM t
+-- !query 186 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) IN (CAST(CAST(1 AS BIGINT) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 186 output
+true
+
+
+-- !query 187
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as decimal(10, 0))) FROM t
+-- !query 187 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) IN (CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)))):boolean>
+-- !query 187 output
+true
+
+
+-- !query 188
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as string)) FROM t
+-- !query 188 schema
+struct<(CAST(CAST(1 AS BIGINT) AS STRING) IN (CAST(CAST(1 AS BIGINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 188 output
+true
+
+
+-- !query 189
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast('1' as binary)) FROM t
+-- !query 189 schema
+struct<>
+-- !query 189 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: bigint != binary; line 1 pos 25
+
+
+-- !query 190
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as boolean)) FROM t
+-- !query 190 schema
+struct<>
+-- !query 190 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: bigint != boolean; line 1 pos 25
+
+
+-- !query 191
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 191 schema
+struct<>
+-- !query 191 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: bigint != timestamp; line 1 pos 25
+
+
+-- !query 192
+SELECT cast(1 as bigint) in (cast(1 as bigint), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 192 schema
+struct<>
+-- !query 192 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: bigint != date; line 1 pos 25
+
+
+-- !query 193
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as tinyint)) FROM t
+-- !query 193 schema
+struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT), CAST(CAST(1 AS TINYINT) AS FLOAT))):boolean>
+-- !query 193 output
+true
+
+
+-- !query 194
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as smallint)) FROM t
+-- !query 194 schema
+struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT), CAST(CAST(1 AS SMALLINT) AS FLOAT))):boolean>
+-- !query 194 output
+true
+
+
+-- !query 195
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as int)) FROM t
+-- !query 195 schema
+struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT), CAST(CAST(1 AS INT) AS FLOAT))):boolean>
+-- !query 195 output
+true
+
+
+-- !query 196
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as bigint)) FROM t
+-- !query 196 schema
+struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT), CAST(CAST(1 AS BIGINT) AS FLOAT))):boolean>
+-- !query 196 output
+true
+
+
+-- !query 197
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as float)) FROM t
+-- !query 197 schema
+struct<(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS FLOAT))):boolean>
+-- !query 197 output
+true
+
+
+-- !query 198
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as double)) FROM t
+-- !query 198 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) IN (CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 198 output
+true
+
+
+-- !query 199
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as decimal(10, 0))) FROM t
+-- !query 199 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) IN (CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
+-- !query 199 output
+true
+
+
+-- !query 200
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as string)) FROM t
+-- !query 200 schema
+struct<(CAST(CAST(1 AS FLOAT) AS STRING) IN (CAST(CAST(1 AS FLOAT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 200 output
+true
+
+
+-- !query 201
+SELECT cast(1 as float) in (cast(1 as float), cast('1' as binary)) FROM t
+-- !query 201 schema
+struct<>
+-- !query 201 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: float != binary; line 1 pos 24
+
+
+-- !query 202
+SELECT cast(1 as float) in (cast(1 as float), cast(1 as boolean)) FROM t
+-- !query 202 schema
+struct<>
+-- !query 202 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: float != boolean; line 1 pos 24
+
+
+-- !query 203
+SELECT cast(1 as float) in (cast(1 as float), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 203 schema
+struct<>
+-- !query 203 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: float != timestamp; line 1 pos 24
+
+
+-- !query 204
+SELECT cast(1 as float) in (cast(1 as float), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 204 schema
+struct<>
+-- !query 204 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: float != date; line 1 pos 24
+
+
+-- !query 205
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as tinyint)) FROM t
+-- !query 205 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS TINYINT) AS DOUBLE))):boolean>
+-- !query 205 output
+true
+
+
+-- !query 206
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as smallint)) FROM t
+-- !query 206 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS SMALLINT) AS DOUBLE))):boolean>
+-- !query 206 output
+true
+
+
+-- !query 207
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as int)) FROM t
+-- !query 207 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS INT) AS DOUBLE))):boolean>
+-- !query 207 output
+true
+
+
+-- !query 208
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as bigint)) FROM t
+-- !query 208 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS BIGINT) AS DOUBLE))):boolean>
+-- !query 208 output
+true
+
+
+-- !query 209
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as float)) FROM t
+-- !query 209 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
+-- !query 209 output
+true
+
+
+-- !query 210
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as double)) FROM t
+-- !query 210 schema
+struct<(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST(1 AS DOUBLE))):boolean>
+-- !query 210 output
+true
+
+
+-- !query 211
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as decimal(10, 0))) FROM t
+-- !query 211 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
+-- !query 211 output
+true
+
+
+-- !query 212
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as string)) FROM t
+-- !query 212 schema
+struct<(CAST(CAST(1 AS DOUBLE) AS STRING) IN (CAST(CAST(1 AS DOUBLE) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 212 output
+true
+
+
+-- !query 213
+SELECT cast(1 as double) in (cast(1 as double), cast('1' as binary)) FROM t
+-- !query 213 schema
+struct<>
+-- !query 213 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: double != binary; line 1 pos 25
+
+
+-- !query 214
+SELECT cast(1 as double) in (cast(1 as double), cast(1 as boolean)) FROM t
+-- !query 214 schema
+struct<>
+-- !query 214 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: double != boolean; line 1 pos 25
+
+
+-- !query 215
+SELECT cast(1 as double) in (cast(1 as double), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 215 schema
+struct<>
+-- !query 215 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: double != timestamp; line 1 pos 25
+
+
+-- !query 216
+SELECT cast(1 as double) in (cast(1 as double), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 216 schema
+struct<>
+-- !query 216 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: double != date; line 1 pos 25
+
+
+-- !query 217
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as tinyint)) FROM t
+-- !query 217 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)))):boolean>
+-- !query 217 output
+true
+
+
+-- !query 218
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as smallint)) FROM t
+-- !query 218 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)))):boolean>
+-- !query 218 output
+true
+
+
+-- !query 219
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as int)) FROM t
+-- !query 219 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS INT) AS DECIMAL(10,0)))):boolean>
+-- !query 219 output
+true
+
+
+-- !query 220
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as bigint)) FROM t
+-- !query 220 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)))):boolean>
+-- !query 220 output
+true
+
+
+-- !query 221
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as float)) FROM t
+-- !query 221 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
+-- !query 221 output
+true
+
+
+-- !query 222
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as double)) FROM t
+-- !query 222 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
+-- !query 222 output
+true
+
+
+-- !query 223
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as decimal(10, 0))) FROM t
+-- !query 223 schema
+struct<(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST(1 AS DECIMAL(10,0)))):boolean>
+-- !query 223 output
+true
+
+
+-- !query 224
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as string)) FROM t
+-- !query 224 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS STRING) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 224 output
+true
+
+
+-- !query 225
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('1' as binary)) FROM t
+-- !query 225 schema
+struct<>
+-- !query 225 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != binary; line 1 pos 33
+
+
+-- !query 226
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as boolean)) FROM t
+-- !query 226 schema
+struct<>
+-- !query 226 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != boolean; line 1 pos 33
+
+
+-- !query 227
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 227 schema
+struct<>
+-- !query 227 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != timestamp; line 1 pos 33
+
+
+-- !query 228
+SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 228 schema
+struct<>
+-- !query 228 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != date; line 1 pos 33
+
+
+-- !query 229
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as tinyint)) FROM t
+-- !query 229 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS TINYINT) AS STRING))):boolean>
+-- !query 229 output
+true
+
+
+-- !query 230
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as smallint)) FROM t
+-- !query 230 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS SMALLINT) AS STRING))):boolean>
+-- !query 230 output
+true
+
+
+-- !query 231
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as int)) FROM t
+-- !query 231 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS INT) AS STRING))):boolean>
+-- !query 231 output
+true
+
+
+-- !query 232
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as bigint)) FROM t
+-- !query 232 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS BIGINT) AS STRING))):boolean>
+-- !query 232 output
+true
+
+
+-- !query 233
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as float)) FROM t
+-- !query 233 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS FLOAT) AS STRING))):boolean>
+-- !query 233 output
+true
+
+
+-- !query 234
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as double)) FROM t
+-- !query 234 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS DOUBLE) AS STRING))):boolean>
+-- !query 234 output
+true
+
+
+-- !query 235
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as decimal(10, 0))) FROM t
+-- !query 235 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS DECIMAL(10,0)) AS STRING))):boolean>
+-- !query 235 output
+true
+
+
+-- !query 236
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as string)) FROM t
+-- !query 236 schema
+struct<(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS STRING))):boolean>
+-- !query 236 output
+true
+
+
+-- !query 237
+SELECT cast(1 as string) in (cast(1 as string), cast('1' as binary)) FROM t
+-- !query 237 schema
+struct<>
+-- !query 237 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: string != binary; line 1 pos 25
+
+
+-- !query 238
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as boolean)) FROM t
+-- !query 238 schema
+struct<>
+-- !query 238 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: string != boolean; line 1 pos 25
+
+
+-- !query 239
+SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 239 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING))):boolean>
+-- !query 239 output
+true
+
+
+-- !query 240
+SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 240 schema
+struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING))):boolean>
+-- !query 240 output
+true
+
+
+-- !query 241
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as tinyint)) FROM t
+-- !query 241 schema
+struct<>
+-- !query 241 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: binary != tinyint; line 1 pos 27
+
+
+-- !query 242
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as smallint)) FROM t
+-- !query 242 schema
+struct<>
+-- !query 242 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: binary != smallint; line 1 pos 27
+
+
+-- !query 243
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as int)) FROM t
+-- !query 243 schema
+struct<>
+-- !query 243 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: binary != int; line 1 pos 27
+
+
+-- !query 244
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as bigint)) FROM t
+-- !query 244 schema
+struct<>
+-- !query 244 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: binary != bigint; line 1 pos 27
+
+
+-- !query 245
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as float)) FROM t
+-- !query 245 schema
+struct<>
+-- !query 245 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: binary != float; line 1 pos 27
+
+
+-- !query 246
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as double)) FROM t
+-- !query 246 schema
+struct<>
+-- !query 246 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: binary != double; line 1 pos 27
+
+
+-- !query 247
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as decimal(10, 0))) FROM t
+-- !query 247 schema
+struct<>
+-- !query 247 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: binary != decimal(10,0); line 1 pos 27
+
+
+-- !query 248
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as string)) FROM t
+-- !query 248 schema
+struct<>
+-- !query 248 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: binary != string; line 1 pos 27
+
+
+-- !query 249
+SELECT cast('1' as binary) in (cast('1' as binary), cast('1' as binary)) FROM t
+-- !query 249 schema
+struct<(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS BINARY))):boolean>
+-- !query 249 output
+true
+
+
+-- !query 250
+SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as boolean)) FROM t
+-- !query 250 schema
+struct<>
+-- !query 250 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: binary != boolean; line 1 pos 27
+
+
+-- !query 251
+SELECT cast('1' as binary) in (cast('1' as binary), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 251 schema
+struct<>
+-- !query 251 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: binary != timestamp; line 1 pos 27
+
+
+-- !query 252
+SELECT cast('1' as binary) in (cast('1' as binary), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 252 schema
+struct<>
+-- !query 252 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: binary != date; line 1 pos 27
+
+
+-- !query 253
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as tinyint)) FROM t
+-- !query 253 schema
+struct<>
+-- !query 253 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: boolean != tinyint; line 1 pos 28
+
+
+-- !query 254
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as smallint)) FROM t
+-- !query 254 schema
+struct<>
+-- !query 254 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: boolean != smallint; line 1 pos 28
+
+
+-- !query 255
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as int)) FROM t
+-- !query 255 schema
+struct<>
+-- !query 255 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: boolean != int; line 1 pos 28
+
+
+-- !query 256
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as bigint)) FROM t
+-- !query 256 schema
+struct<>
+-- !query 256 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: boolean != bigint; line 1 pos 28
+
+
+-- !query 257
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as float)) FROM t
+-- !query 257 schema
+struct<>
+-- !query 257 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: boolean != float; line 1 pos 28
+
+
+-- !query 258
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as double)) FROM t
+-- !query 258 schema
+struct<>
+-- !query 258 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: boolean != double; line 1 pos 28
+
+
+-- !query 259
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as decimal(10, 0))) FROM t
+-- !query 259 schema
+struct<>
+-- !query 259 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: boolean != decimal(10,0); line 1 pos 28
+
+
+-- !query 260
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as string)) FROM t
+-- !query 260 schema
+struct<>
+-- !query 260 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: boolean != string; line 1 pos 28
+
+
+-- !query 261
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast('1' as binary)) FROM t
+-- !query 261 schema
+struct<>
+-- !query 261 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: boolean != binary; line 1 pos 28
+
+
+-- !query 262
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as boolean)) FROM t
+-- !query 262 schema
+struct<(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS BOOLEAN))):boolean>
+-- !query 262 output
+true
+
+
+-- !query 263
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 263 schema
+struct<>
+-- !query 263 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: boolean != timestamp; line 1 pos 28
+
+
+-- !query 264
+SELECT cast('1' as boolean) in (cast('1' as boolean), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 264 schema
+struct<>
+-- !query 264 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: boolean != date; line 1 pos 28
+
+
+-- !query 265
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as tinyint)) FROM t
+-- !query 265 schema
+struct<>
+-- !query 265 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != tinyint; line 1 pos 50
+
+
+-- !query 266
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as smallint)) FROM t
+-- !query 266 schema
+struct<>
+-- !query 266 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != smallint; line 1 pos 50
+
+
+-- !query 267
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as int)) FROM t
+-- !query 267 schema
+struct<>
+-- !query 267 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: timestamp != int; line 1 pos 50
+
+
+-- !query 268
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as bigint)) FROM t
+-- !query 268 schema
+struct<>
+-- !query 268 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != bigint; line 1 pos 50
+
+
+-- !query 269
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as float)) FROM t
+-- !query 269 schema
+struct<>
+-- !query 269 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: timestamp != float; line 1 pos 50
+
+
+-- !query 270
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as double)) FROM t
+-- !query 270 schema
+struct<>
+-- !query 270 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: timestamp != double; line 1 pos 50
+
+
+-- !query 271
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as decimal(10, 0))) FROM t
+-- !query 271 schema
+struct<>
+-- !query 271 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: timestamp != decimal(10,0); line 1 pos 50
+
+
+-- !query 272
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as string)) FROM t
+-- !query 272 schema
+struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING) IN (CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 272 output
+true
+
+
+-- !query 273
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast('1' as binary)) FROM t
+-- !query 273 schema
+struct<>
+-- !query 273 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: timestamp != binary; line 1 pos 50
+
+
+-- !query 274
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as boolean)) FROM t
+-- !query 274 schema
+struct<>
+-- !query 274 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: timestamp != boolean; line 1 pos 50
+
+
+-- !query 275
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 275 schema
+struct<(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):boolean>
+-- !query 275 output
+true
+
+
+-- !query 276
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 276 schema
+struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP) IN (CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP), CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP))):boolean>
+-- !query 276 output
+true
+
+
+-- !query 277
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as tinyint)) FROM t
+-- !query 277 schema
+struct<>
+-- !query 277 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: date != tinyint; line 1 pos 43
+
+
+-- !query 278
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as smallint)) FROM t
+-- !query 278 schema
+struct<>
+-- !query 278 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: date != smallint; line 1 pos 43
+
+
+-- !query 279
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as int)) FROM t
+-- !query 279 schema
+struct<>
+-- !query 279 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: date != int; line 1 pos 43
+
+
+-- !query 280
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as bigint)) FROM t
+-- !query 280 schema
+struct<>
+-- !query 280 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: date != bigint; line 1 pos 43
+
+
+-- !query 281
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as float)) FROM t
+-- !query 281 schema
+struct<>
+-- !query 281 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: date != float; line 1 pos 43
+
+
+-- !query 282
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as double)) FROM t
+-- !query 282 schema
+struct<>
+-- !query 282 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: date != double; line 1 pos 43
+
+
+-- !query 283
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as decimal(10, 0))) FROM t
+-- !query 283 schema
+struct<>
+-- !query 283 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: date != decimal(10,0); line 1 pos 43
+
+
+-- !query 284
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as string)) FROM t
+-- !query 284 schema
+struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING) IN (CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
+-- !query 284 output
+true
+
+
+-- !query 285
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast('1' as binary)) FROM t
+-- !query 285 schema
+struct<>
+-- !query 285 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: date != binary; line 1 pos 43
+
+
+-- !query 286
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as boolean)) FROM t
+-- !query 286 schema
+struct<>
+-- !query 286 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: date != boolean; line 1 pos 43
+
+
+-- !query 287
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 287 schema
+struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS TIMESTAMP) IN (CAST(CAST(2017-12-12 09:30:00 AS DATE) AS TIMESTAMP), CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP))):boolean>
+-- !query 287 output
+true
+
+
+-- !query 288
+SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 288 schema
+struct<(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(2017-12-11 09:30:00 AS DATE))):boolean>
+-- !query 288 output
+true
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
new file mode 100644
index 0000000000000..0beb1f6263d2c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
@@ -0,0 +1,2578 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 316
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT '1' + cast(1 as tinyint)                         FROM t
+-- !query 1 schema
+struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 1 output
+2.0
+
+
+-- !query 2
+SELECT '1' + cast(1 as smallint)                        FROM t
+-- !query 2 schema
+struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 2 output
+2.0
+
+
+-- !query 3
+SELECT '1' + cast(1 as int)                             FROM t
+-- !query 3 schema
+struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 3 output
+2.0
+
+
+-- !query 4
+SELECT '1' + cast(1 as bigint)                          FROM t
+-- !query 4 schema
+struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 4 output
+2.0
+
+
+-- !query 5
+SELECT '1' + cast(1 as float)                           FROM t
+-- !query 5 schema
+struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 5 output
+2.0
+
+
+-- !query 6
+SELECT '1' + cast(1 as double)                          FROM t
+-- !query 6 schema
+struct<(CAST(1 AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 6 output
+2.0
+
+
+-- !query 7
+SELECT '1' + cast(1 as decimal(10, 0))                  FROM t
+-- !query 7 schema
+struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 7 output
+2.0
+
+
+-- !query 8
+SELECT '1' + '1'                                        FROM t
+-- !query 8 schema
+struct<(CAST(1 AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 8 output
+2.0
+
+
+-- !query 9
+SELECT '1' + cast('1' as binary)                        FROM t
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+
+
+-- !query 10
+SELECT '1' + cast(1 as boolean)                         FROM t
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+
+
+-- !query 11
+SELECT '1' + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+
+
+-- !query 12
+SELECT '1' + cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) + CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+
+
+-- !query 13
+SELECT '1' - cast(1 as tinyint)                         FROM t
+-- !query 13 schema
+struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 13 output
+0.0
+
+
+-- !query 14
+SELECT '1' - cast(1 as smallint)                        FROM t
+-- !query 14 schema
+struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 14 output
+0.0
+
+
+-- !query 15
+SELECT '1' - cast(1 as int)                             FROM t
+-- !query 15 schema
+struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 15 output
+0.0
+
+
+-- !query 16
+SELECT '1' - cast(1 as bigint)                          FROM t
+-- !query 16 schema
+struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 16 output
+0.0
+
+
+-- !query 17
+SELECT '1' - cast(1 as float)                           FROM t
+-- !query 17 schema
+struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 17 output
+0.0
+
+
+-- !query 18
+SELECT '1' - cast(1 as double)                          FROM t
+-- !query 18 schema
+struct<(CAST(1 AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 18 output
+0.0
+
+
+-- !query 19
+SELECT '1' - cast(1 as decimal(10, 0))                  FROM t
+-- !query 19 schema
+struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 19 output
+0.0
+
+
+-- !query 20
+SELECT '1' - '1'                                        FROM t
+-- !query 20 schema
+struct<(CAST(1 AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 20 output
+0.0
+
+
+-- !query 21
+SELECT '1' - cast('1' as binary)                        FROM t
+-- !query 21 schema
+struct<>
+-- !query 21 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) - CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+
+
+-- !query 22
+SELECT '1' - cast(1 as boolean)                         FROM t
+-- !query 22 schema
+struct<>
+-- !query 22 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) - CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+
+
+-- !query 23
+SELECT '1' - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+
+
+-- !query 24
+SELECT '1' - cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) - CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+
+
+-- !query 25
+SELECT '1' * cast(1 as tinyint)                         FROM t
+-- !query 25 schema
+struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 25 output
+1.0
+
+
+-- !query 26
+SELECT '1' * cast(1 as smallint)                        FROM t
+-- !query 26 schema
+struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 26 output
+1.0
+
+
+-- !query 27
+SELECT '1' * cast(1 as int)                             FROM t
+-- !query 27 schema
+struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 27 output
+1.0
+
+
+-- !query 28
+SELECT '1' * cast(1 as bigint)                          FROM t
+-- !query 28 schema
+struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 28 output
+1.0
+
+
+-- !query 29
+SELECT '1' * cast(1 as float)                           FROM t
+-- !query 29 schema
+struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 29 output
+1.0
+
+
+-- !query 30
+SELECT '1' * cast(1 as double)                          FROM t
+-- !query 30 schema
+struct<(CAST(1 AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 30 output
+1.0
+
+
+-- !query 31
+SELECT '1' * cast(1 as decimal(10, 0))                  FROM t
+-- !query 31 schema
+struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 31 output
+1.0
+
+
+-- !query 32
+SELECT '1' * '1'                                        FROM t
+-- !query 32 schema
+struct<(CAST(1 AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 32 output
+1.0
+
+
+-- !query 33
+SELECT '1' * cast('1' as binary)                        FROM t
+-- !query 33 schema
+struct<>
+-- !query 33 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+
+
+-- !query 34
+SELECT '1' * cast(1 as boolean)                         FROM t
+-- !query 34 schema
+struct<>
+-- !query 34 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+
+
+-- !query 35
+SELECT '1' * cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 35 schema
+struct<>
+-- !query 35 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+
+
+-- !query 36
+SELECT '1' * cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 36 schema
+struct<>
+-- !query 36 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+
+
+-- !query 37
+SELECT '1' / cast(1 as tinyint)                         FROM t
+-- !query 37 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 37 output
+1.0
+
+
+-- !query 38
+SELECT '1' / cast(1 as smallint)                        FROM t
+-- !query 38 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 38 output
+1.0
+
+
+-- !query 39
+SELECT '1' / cast(1 as int)                             FROM t
+-- !query 39 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 39 output
+1.0
+
+
+-- !query 40
+SELECT '1' / cast(1 as bigint)                          FROM t
+-- !query 40 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 40 output
+1.0
+
+
+-- !query 41
+SELECT '1' / cast(1 as float)                           FROM t
+-- !query 41 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 41 output
+1.0
+
+
+-- !query 42
+SELECT '1' / cast(1 as double)                          FROM t
+-- !query 42 schema
+struct<(CAST(1 AS DOUBLE) / CAST(1 AS DOUBLE)):double>
+-- !query 42 output
+1.0
+
+
+-- !query 43
+SELECT '1' / cast(1 as decimal(10, 0))                  FROM t
+-- !query 43 schema
+struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 43 output
+1.0
+
+
+-- !query 44
+SELECT '1' / '1'                                        FROM t
+-- !query 44 schema
+struct<(CAST(1 AS DOUBLE) / CAST(1 AS DOUBLE)):double>
+-- !query 44 output
+1.0
+
+
+-- !query 45
+SELECT '1' / cast('1' as binary)                        FROM t
+-- !query 45 schema
+struct<>
+-- !query 45 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+
+
+-- !query 46
+SELECT '1' / cast(1 as boolean)                         FROM t
+-- !query 46 schema
+struct<>
+-- !query 46 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+
+
+-- !query 47
+SELECT '1' / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 47 schema
+struct<>
+-- !query 47 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+
+
+-- !query 48
+SELECT '1' / cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 48 schema
+struct<>
+-- !query 48 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+
+
+-- !query 49
+SELECT '1' % cast(1 as tinyint)                         FROM t
+-- !query 49 schema
+struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 49 output
+0.0
+
+
+-- !query 50
+SELECT '1' % cast(1 as smallint)                        FROM t
+-- !query 50 schema
+struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 50 output
+0.0
+
+
+-- !query 51
+SELECT '1' % cast(1 as int)                             FROM t
+-- !query 51 schema
+struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 51 output
+0.0
+
+
+-- !query 52
+SELECT '1' % cast(1 as bigint)                          FROM t
+-- !query 52 schema
+struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 52 output
+0.0
+
+
+-- !query 53
+SELECT '1' % cast(1 as float)                           FROM t
+-- !query 53 schema
+struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 53 output
+0.0
+
+
+-- !query 54
+SELECT '1' % cast(1 as double)                          FROM t
+-- !query 54 schema
+struct<(CAST(1 AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 54 output
+0.0
+
+
+-- !query 55
+SELECT '1' % cast(1 as decimal(10, 0))                  FROM t
+-- !query 55 schema
+struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 55 output
+0.0
+
+
+-- !query 56
+SELECT '1' % '1'                                        FROM t
+-- !query 56 schema
+struct<(CAST(1 AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 56 output
+0.0
+
+
+-- !query 57
+SELECT '1' % cast('1' as binary)                        FROM t
+-- !query 57 schema
+struct<>
+-- !query 57 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+
+
+-- !query 58
+SELECT '1' % cast(1 as boolean)                         FROM t
+-- !query 58 schema
+struct<>
+-- !query 58 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+
+
+-- !query 59
+SELECT '1' % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 59 schema
+struct<>
+-- !query 59 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+
+
+-- !query 60
+SELECT '1' % cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 60 schema
+struct<>
+-- !query 60 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+
+
+-- !query 61
+SELECT pmod('1', cast(1 as tinyint))                         FROM t
+-- !query 61 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
+-- !query 61 output
+0.0
+
+
+-- !query 62
+SELECT pmod('1', cast(1 as smallint))                        FROM t
+-- !query 62 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
+-- !query 62 output
+0.0
+
+
+-- !query 63
+SELECT pmod('1', cast(1 as int))                             FROM t
+-- !query 63 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS INT) AS DOUBLE)):double>
+-- !query 63 output
+0.0
+
+
+-- !query 64
+SELECT pmod('1', cast(1 as bigint))                          FROM t
+-- !query 64 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
+-- !query 64 output
+0.0
+
+
+-- !query 65
+SELECT pmod('1', cast(1 as float))                           FROM t
+-- !query 65 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
+-- !query 65 output
+0.0
+
+
+-- !query 66
+SELECT pmod('1', cast(1 as double))                          FROM t
+-- !query 66 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 66 output
+0.0
+
+
+-- !query 67
+SELECT pmod('1', cast(1 as decimal(10, 0)))                  FROM t
+-- !query 67 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
+-- !query 67 output
+0.0
+
+
+-- !query 68
+SELECT pmod('1', '1')                                        FROM t
+-- !query 68 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 68 output
+0.0
+
+
+-- !query 69
+SELECT pmod('1', cast('1' as binary))                        FROM t
+-- !query 69 schema
+struct<>
+-- !query 69 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+
+
+-- !query 70
+SELECT pmod('1', cast(1 as boolean))                         FROM t
+-- !query 70 schema
+struct<>
+-- !query 70 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+
+
+-- !query 71
+SELECT pmod('1', cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 71 schema
+struct<>
+-- !query 71 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+
+
+-- !query 72
+SELECT pmod('1', cast('2017-12-11 09:30:00' as date))        FROM t
+-- !query 72 schema
+struct<>
+-- !query 72 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+
+
+-- !query 73
+SELECT cast(1 as tinyint)                         + '1' FROM t
+-- !query 73 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 73 output
+2.0
+
+
+-- !query 74
+SELECT cast(1 as smallint)                        + '1' FROM t
+-- !query 74 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 74 output
+2.0
+
+
+-- !query 75
+SELECT cast(1 as int)                             + '1' FROM t
+-- !query 75 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 75 output
+2.0
+
+
+-- !query 76
+SELECT cast(1 as bigint)                          + '1' FROM t
+-- !query 76 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 76 output
+2.0
+
+
+-- !query 77
+SELECT cast(1 as float)                           + '1' FROM t
+-- !query 77 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 77 output
+2.0
+
+
+-- !query 78
+SELECT cast(1 as double)                          + '1' FROM t
+-- !query 78 schema
+struct<(CAST(1 AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 78 output
+2.0
+
+
+-- !query 79
+SELECT cast(1 as decimal(10, 0))                  + '1' FROM t
+-- !query 79 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
+-- !query 79 output
+2.0
+
+
+-- !query 80
+SELECT cast('1' as binary)                        + '1' FROM t
+-- !query 80 schema
+struct<>
+-- !query 80 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+
+
+-- !query 81
+SELECT cast(1 as boolean)                         + '1' FROM t
+-- !query 81 schema
+struct<>
+-- !query 81 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) + CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+
+
+-- !query 82
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) + '1' FROM t
+-- !query 82 schema
+struct<>
+-- !query 82 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+
+
+-- !query 83
+SELECT cast('2017-12-11 09:30:00' as date)        + '1' FROM t
+-- !query 83 schema
+struct<>
+-- !query 83 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) + CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+
+
+-- !query 84
+SELECT cast(1 as tinyint)                         - '1' FROM t
+-- !query 84 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 84 output
+0.0
+
+
+-- !query 85
+SELECT cast(1 as smallint)                        - '1' FROM t
+-- !query 85 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 85 output
+0.0
+
+
+-- !query 86
+SELECT cast(1 as int)                             - '1' FROM t
+-- !query 86 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 86 output
+0.0
+
+
+-- !query 87
+SELECT cast(1 as bigint)                          - '1' FROM t
+-- !query 87 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 87 output
+0.0
+
+
+-- !query 88
+SELECT cast(1 as float)                           - '1' FROM t
+-- !query 88 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 88 output
+0.0
+
+
+-- !query 89
+SELECT cast(1 as double)                          - '1' FROM t
+-- !query 89 schema
+struct<(CAST(1 AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 89 output
+0.0
+
+
+-- !query 90
+SELECT cast(1 as decimal(10, 0))                  - '1' FROM t
+-- !query 90 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
+-- !query 90 output
+0.0
+
+
+-- !query 91
+SELECT cast('1' as binary)                        - '1' FROM t
+-- !query 91 schema
+struct<>
+-- !query 91 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) - CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+
+
+-- !query 92
+SELECT cast(1 as boolean)                         - '1' FROM t
+-- !query 92 schema
+struct<>
+-- !query 92 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) - CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) - CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+
+
+-- !query 93
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) - '1' FROM t
+-- !query 93 schema
+struct<>
+-- !query 93 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+
+
+-- !query 94
+SELECT cast('2017-12-11 09:30:00' as date)        - '1' FROM t
+-- !query 94 schema
+struct<>
+-- !query 94 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) - CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) - CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+
+
+-- !query 95
+SELECT cast(1 as tinyint)                         * '1' FROM t
+-- !query 95 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 95 output
+1.0
+
+
+-- !query 96
+SELECT cast(1 as smallint)                        * '1' FROM t
+-- !query 96 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 96 output
+1.0
+
+
+-- !query 97
+SELECT cast(1 as int)                             * '1' FROM t
+-- !query 97 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 97 output
+1.0
+
+
+-- !query 98
+SELECT cast(1 as bigint)                          * '1' FROM t
+-- !query 98 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 98 output
+1.0
+
+
+-- !query 99
+SELECT cast(1 as float)                           * '1' FROM t
+-- !query 99 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 99 output
+1.0
+
+
+-- !query 100
+SELECT cast(1 as double)                          * '1' FROM t
+-- !query 100 schema
+struct<(CAST(1 AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 100 output
+1.0
+
+
+-- !query 101
+SELECT cast(1 as decimal(10, 0))                  * '1' FROM t
+-- !query 101 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
+-- !query 101 output
+1.0
+
+
+-- !query 102
+SELECT cast('1' as binary)                        * '1' FROM t
+-- !query 102 schema
+struct<>
+-- !query 102 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+
+
+-- !query 103
+SELECT cast(1 as boolean)                         * '1' FROM t
+-- !query 103 schema
+struct<>
+-- !query 103 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) * CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+
+
+-- !query 104
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) * '1' FROM t
+-- !query 104 schema
+struct<>
+-- !query 104 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) * CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+
+
+-- !query 105
+SELECT cast('2017-12-11 09:30:00' as date)        * '1' FROM t
+-- !query 105 schema
+struct<>
+-- !query 105 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) * CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+
+
+-- !query 106
+SELECT cast(1 as tinyint)                         / '1' FROM t
+-- !query 106 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
+-- !query 106 output
+1.0
+
+
+-- !query 107
+SELECT cast(1 as smallint)                        / '1' FROM t
+-- !query 107 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
+-- !query 107 output
+1.0
+
+
+-- !query 108
+SELECT cast(1 as int)                             / '1' FROM t
+-- !query 108 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
+-- !query 108 output
+1.0
+
+
+-- !query 109
+SELECT cast(1 as bigint)                          / '1' FROM t
+-- !query 109 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
+-- !query 109 output
+1.0
+
+
+-- !query 110
+SELECT cast(1 as float)                           / '1' FROM t
+-- !query 110 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
+-- !query 110 output
+1.0
+
+
+-- !query 111
+SELECT cast(1 as double)                          / '1' FROM t
+-- !query 111 schema
+struct<(CAST(1 AS DOUBLE) / CAST(1 AS DOUBLE)):double>
+-- !query 111 output
+1.0
+
+
+-- !query 112
+SELECT cast(1 as decimal(10, 0))                  / '1' FROM t
+-- !query 112 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
+-- !query 112 output
+1.0
+
+
+-- !query 113
+SELECT cast('1' as binary)                        / '1' FROM t
+-- !query 113 schema
+struct<>
+-- !query 113 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+
+
+-- !query 114
+SELECT cast(1 as boolean)                         / '1' FROM t
+-- !query 114 schema
+struct<>
+-- !query 114 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+
+
+-- !query 115
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) / '1' FROM t
+-- !query 115 schema
+struct<>
+-- !query 115 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+
+
+-- !query 116
+SELECT cast('2017-12-11 09:30:00' as date)        / '1' FROM t
+-- !query 116 schema
+struct<>
+-- !query 116 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+
+
+-- !query 117
+SELECT cast(1 as tinyint)                         % '1' FROM t
+-- !query 117 schema
+struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 117 output
+0.0
+
+
+-- !query 118
+SELECT cast(1 as smallint)                        % '1' FROM t
+-- !query 118 schema
+struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 118 output
+0.0
+
+
+-- !query 119
+SELECT cast(1 as int)                             % '1' FROM t
+-- !query 119 schema
+struct<(CAST(CAST(1 AS INT) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 119 output
+0.0
+
+
+-- !query 120
+SELECT cast(1 as bigint)                          % '1' FROM t
+-- !query 120 schema
+struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 120 output
+0.0
+
+
+-- !query 121
+SELECT cast(1 as float)                           % '1' FROM t
+-- !query 121 schema
+struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 121 output
+0.0
+
+
+-- !query 122
+SELECT cast(1 as double)                          % '1' FROM t
+-- !query 122 schema
+struct<(CAST(1 AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 122 output
+0.0
+
+
+-- !query 123
+SELECT cast(1 as decimal(10, 0))                  % '1' FROM t
+-- !query 123 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
+-- !query 123 output
+0.0
+
+
+-- !query 124
+SELECT cast('1' as binary)                        % '1' FROM t
+-- !query 124 schema
+struct<>
+-- !query 124 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('1' AS BINARY) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+
+
+-- !query 125
+SELECT cast(1 as boolean)                         % '1' FROM t
+-- !query 125 schema
+struct<>
+-- !query 125 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST(1 AS BOOLEAN) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) % CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+
+
+-- !query 126
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) % '1' FROM t
+-- !query 126 schema
+struct<>
+-- !query 126 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+
+
+-- !query 127
+SELECT cast('2017-12-11 09:30:00' as date)        % '1' FROM t
+-- !query 127 schema
+struct<>
+-- !query 127 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+
+
+-- !query 128
+SELECT pmod(cast(1 as tinyint), '1')                         FROM t
+-- !query 128 schema
+struct<pmod(CAST(CAST(1 AS TINYINT) AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 128 output
+0.0
+
+
+-- !query 129
+SELECT pmod(cast(1 as smallint), '1')                        FROM t
+-- !query 129 schema
+struct<pmod(CAST(CAST(1 AS SMALLINT) AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 129 output
+0.0
+
+
+-- !query 130
+SELECT pmod(cast(1 as int), '1')                             FROM t
+-- !query 130 schema
+struct<pmod(CAST(CAST(1 AS INT) AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 130 output
+0.0
+
+
+-- !query 131
+SELECT pmod(cast(1 as bigint), '1')                          FROM t
+-- !query 131 schema
+struct<pmod(CAST(CAST(1 AS BIGINT) AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 131 output
+0.0
+
+
+-- !query 132
+SELECT pmod(cast(1 as float), '1')                           FROM t
+-- !query 132 schema
+struct<pmod(CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 132 output
+0.0
+
+
+-- !query 133
+SELECT pmod(cast(1 as double), '1')                          FROM t
+-- !query 133 schema
+struct<pmod(CAST(1 AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 133 output
+0.0
+
+
+-- !query 134
+SELECT pmod(cast(1 as decimal(10, 0)), '1')                  FROM t
+-- !query 134 schema
+struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(1 AS DOUBLE)):double>
+-- !query 134 output
+0.0
+
+
+-- !query 135
+SELECT pmod(cast('1' as binary), '1')                        FROM t
+-- !query 135 schema
+struct<>
+-- !query 135 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('1' AS BINARY), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+
+
+-- !query 136
+SELECT pmod(cast(1 as boolean), '1')                         FROM t
+-- !query 136 schema
+struct<>
+-- !query 136 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST(1 AS BOOLEAN), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS BOOLEAN), CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+
+
+-- !query 137
+SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), '1') FROM t
+-- !query 137 schema
+struct<>
+-- !query 137 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+
+
+-- !query 138
+SELECT pmod(cast('2017-12-11 09:30:00' as date), '1')        FROM t
+-- !query 138 schema
+struct<>
+-- !query 138 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+
+
+-- !query 139
+SELECT '1' = cast(1 as tinyint)                         FROM t
+-- !query 139 schema
+struct<(CAST(1 AS TINYINT) = CAST(1 AS TINYINT)):boolean>
+-- !query 139 output
+true
+
+
+-- !query 140
+SELECT '1' = cast(1 as smallint)                        FROM t
+-- !query 140 schema
+struct<(CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
+-- !query 140 output
+true
+
+
+-- !query 141
+SELECT '1' = cast(1 as int)                             FROM t
+-- !query 141 schema
+struct<(CAST(1 AS INT) = CAST(1 AS INT)):boolean>
+-- !query 141 output
+true
+
+
+-- !query 142
+SELECT '1' = cast(1 as bigint)                          FROM t
+-- !query 142 schema
+struct<(CAST(1 AS BIGINT) = CAST(1 AS BIGINT)):boolean>
+-- !query 142 output
+true
+
+
+-- !query 143
+SELECT '1' = cast(1 as float)                           FROM t
+-- !query 143 schema
+struct<(CAST(1 AS FLOAT) = CAST(1 AS FLOAT)):boolean>
+-- !query 143 output
+true
+
+
+-- !query 144
+SELECT '1' = cast(1 as double)                          FROM t
+-- !query 144 schema
+struct<(CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 144 output
+true
+
+
+-- !query 145
+SELECT '1' = cast(1 as decimal(10, 0))                  FROM t
+-- !query 145 schema
+struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 145 output
+true
+
+
+-- !query 146
+SELECT '1' = '1'                                        FROM t
+-- !query 146 schema
+struct<(1 = 1):boolean>
+-- !query 146 output
+true
+
+
+-- !query 147
+SELECT '1' = cast('1' as binary)                        FROM t
+-- !query 147 schema
+struct<(CAST(1 AS BINARY) = CAST(1 AS BINARY)):boolean>
+-- !query 147 output
+true
+
+
+-- !query 148
+SELECT '1' = cast(1 as boolean)                         FROM t
+-- !query 148 schema
+struct<(CAST(1 AS BOOLEAN) = CAST(1 AS BOOLEAN)):boolean>
+-- !query 148 output
+true
+
+
+-- !query 149
+SELECT '1' = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 149 schema
+struct<(CAST(1 AS TIMESTAMP) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
+-- !query 149 output
+NULL
+
+
+-- !query 150
+SELECT '1' = cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 150 schema
+struct<(1 = CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+-- !query 150 output
+false
+
+
+-- !query 151
+SELECT cast(1 as tinyint)                         = '1' FROM t
+-- !query 151 schema
+struct<(CAST(1 AS TINYINT) = CAST(1 AS TINYINT)):boolean>
+-- !query 151 output
+true
+
+
+-- !query 152
+SELECT cast(1 as smallint)                        = '1' FROM t
+-- !query 152 schema
+struct<(CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
+-- !query 152 output
+true
+
+
+-- !query 153
+SELECT cast(1 as int)                             = '1' FROM t
+-- !query 153 schema
+struct<(CAST(1 AS INT) = CAST(1 AS INT)):boolean>
+-- !query 153 output
+true
+
+
+-- !query 154
+SELECT cast(1 as bigint)                          = '1' FROM t
+-- !query 154 schema
+struct<(CAST(1 AS BIGINT) = CAST(1 AS BIGINT)):boolean>
+-- !query 154 output
+true
+
+
+-- !query 155
+SELECT cast(1 as float)                           = '1' FROM t
+-- !query 155 schema
+struct<(CAST(1 AS FLOAT) = CAST(1 AS FLOAT)):boolean>
+-- !query 155 output
+true
+
+
+-- !query 156
+SELECT cast(1 as double)                          = '1' FROM t
+-- !query 156 schema
+struct<(CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 156 output
+true
+
+
+-- !query 157
+SELECT cast(1 as decimal(10, 0))                  = '1' FROM t
+-- !query 157 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
+-- !query 157 output
+true
+
+
+-- !query 158
+SELECT cast('1' as binary)                        = '1' FROM t
+-- !query 158 schema
+struct<(CAST(1 AS BINARY) = CAST(1 AS BINARY)):boolean>
+-- !query 158 output
+true
+
+
+-- !query 159
+SELECT cast(1 as boolean)                         = '1' FROM t
+-- !query 159 schema
+struct<(CAST(1 AS BOOLEAN) = CAST(1 AS BOOLEAN)):boolean>
+-- !query 159 output
+true
+
+
+-- !query 160
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) = '1' FROM t
+-- !query 160 schema
+struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS TIMESTAMP)):boolean>
+-- !query 160 output
+NULL
+
+
+-- !query 161
+SELECT cast('2017-12-11 09:30:00' as date)        = '1' FROM t
+-- !query 161 schema
+struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) = 1):boolean>
+-- !query 161 output
+false
+
+
+-- !query 162
+SELECT '1' <=> cast(1 as tinyint)                         FROM t
+-- !query 162 schema
+struct<(CAST(1 AS TINYINT) <=> CAST(1 AS TINYINT)):boolean>
+-- !query 162 output
+true
+
+
+-- !query 163
+SELECT '1' <=> cast(1 as smallint)                        FROM t
+-- !query 163 schema
+struct<(CAST(1 AS SMALLINT) <=> CAST(1 AS SMALLINT)):boolean>
+-- !query 163 output
+true
+
+
+-- !query 164
+SELECT '1' <=> cast(1 as int)                             FROM t
+-- !query 164 schema
+struct<(CAST(1 AS INT) <=> CAST(1 AS INT)):boolean>
+-- !query 164 output
+true
+
+
+-- !query 165
+SELECT '1' <=> cast(1 as bigint)                          FROM t
+-- !query 165 schema
+struct<(CAST(1 AS BIGINT) <=> CAST(1 AS BIGINT)):boolean>
+-- !query 165 output
+true
+
+
+-- !query 166
+SELECT '1' <=> cast(1 as float)                           FROM t
+-- !query 166 schema
+struct<(CAST(1 AS FLOAT) <=> CAST(1 AS FLOAT)):boolean>
+-- !query 166 output
+true
+
+
+-- !query 167
+SELECT '1' <=> cast(1 as double)                          FROM t
+-- !query 167 schema
+struct<(CAST(1 AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
+-- !query 167 output
+true
+
+
+-- !query 168
+SELECT '1' <=> cast(1 as decimal(10, 0))                  FROM t
+-- !query 168 schema
+struct<(CAST(1 AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 168 output
+true
+
+
+-- !query 169
+SELECT '1' <=> '1'                                        FROM t
+-- !query 169 schema
+struct<(1 <=> 1):boolean>
+-- !query 169 output
+true
+
+
+-- !query 170
+SELECT '1' <=> cast('1' as binary)                        FROM t
+-- !query 170 schema
+struct<(CAST(1 AS BINARY) <=> CAST(1 AS BINARY)):boolean>
+-- !query 170 output
+true
+
+
+-- !query 171
+SELECT '1' <=> cast(1 as boolean)                         FROM t
+-- !query 171 schema
+struct<(CAST(1 AS BOOLEAN) <=> CAST(1 AS BOOLEAN)):boolean>
+-- !query 171 output
+true
+
+
+-- !query 172
+SELECT '1' <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 172 schema
+struct<(CAST(1 AS TIMESTAMP) <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
+-- !query 172 output
+false
+
+
+-- !query 173
+SELECT '1' <=> cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 173 schema
+struct<(1 <=> CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+-- !query 173 output
+false
+
+
+-- !query 174
+SELECT cast(1 as tinyint)                         <=> '1' FROM t
+-- !query 174 schema
+struct<(CAST(1 AS TINYINT) <=> CAST(1 AS TINYINT)):boolean>
+-- !query 174 output
+true
+
+
+-- !query 175
+SELECT cast(1 as smallint)                        <=> '1' FROM t
+-- !query 175 schema
+struct<(CAST(1 AS SMALLINT) <=> CAST(1 AS SMALLINT)):boolean>
+-- !query 175 output
+true
+
+
+-- !query 176
+SELECT cast(1 as int)                             <=> '1' FROM t
+-- !query 176 schema
+struct<(CAST(1 AS INT) <=> CAST(1 AS INT)):boolean>
+-- !query 176 output
+true
+
+
+-- !query 177
+SELECT cast(1 as bigint)                          <=> '1' FROM t
+-- !query 177 schema
+struct<(CAST(1 AS BIGINT) <=> CAST(1 AS BIGINT)):boolean>
+-- !query 177 output
+true
+
+
+-- !query 178
+SELECT cast(1 as float)                           <=> '1' FROM t
+-- !query 178 schema
+struct<(CAST(1 AS FLOAT) <=> CAST(1 AS FLOAT)):boolean>
+-- !query 178 output
+true
+
+
+-- !query 179
+SELECT cast(1 as double)                          <=> '1' FROM t
+-- !query 179 schema
+struct<(CAST(1 AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
+-- !query 179 output
+true
+
+
+-- !query 180
+SELECT cast(1 as decimal(10, 0))                  <=> '1' FROM t
+-- !query 180 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
+-- !query 180 output
+true
+
+
+-- !query 181
+SELECT cast('1' as binary)                        <=> '1' FROM t
+-- !query 181 schema
+struct<(CAST(1 AS BINARY) <=> CAST(1 AS BINARY)):boolean>
+-- !query 181 output
+true
+
+
+-- !query 182
+SELECT cast(1 as boolean)                         <=> '1' FROM t
+-- !query 182 schema
+struct<(CAST(1 AS BOOLEAN) <=> CAST(1 AS BOOLEAN)):boolean>
+-- !query 182 output
+true
+
+
+-- !query 183
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> '1' FROM t
+-- !query 183 schema
+struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <=> CAST(1 AS TIMESTAMP)):boolean>
+-- !query 183 output
+false
+
+
+-- !query 184
+SELECT cast('2017-12-11 09:30:00' as date)        <=> '1' FROM t
+-- !query 184 schema
+struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) <=> 1):boolean>
+-- !query 184 output
+false
+
+
+-- !query 185
+SELECT '1' < cast(1 as tinyint)                         FROM t
+-- !query 185 schema
+struct<(CAST(1 AS TINYINT) < CAST(1 AS TINYINT)):boolean>
+-- !query 185 output
+false
+
+
+-- !query 186
+SELECT '1' < cast(1 as smallint)                        FROM t
+-- !query 186 schema
+struct<(CAST(1 AS SMALLINT) < CAST(1 AS SMALLINT)):boolean>
+-- !query 186 output
+false
+
+
+-- !query 187
+SELECT '1' < cast(1 as int)                             FROM t
+-- !query 187 schema
+struct<(CAST(1 AS INT) < CAST(1 AS INT)):boolean>
+-- !query 187 output
+false
+
+
+-- !query 188
+SELECT '1' < cast(1 as bigint)                          FROM t
+-- !query 188 schema
+struct<(CAST(1 AS BIGINT) < CAST(1 AS BIGINT)):boolean>
+-- !query 188 output
+false
+
+
+-- !query 189
+SELECT '1' < cast(1 as float)                           FROM t
+-- !query 189 schema
+struct<(CAST(1 AS FLOAT) < CAST(1 AS FLOAT)):boolean>
+-- !query 189 output
+false
+
+
+-- !query 190
+SELECT '1' < cast(1 as double)                          FROM t
+-- !query 190 schema
+struct<(CAST(1 AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
+-- !query 190 output
+false
+
+
+-- !query 191
+SELECT '1' < cast(1 as decimal(10, 0))                  FROM t
+-- !query 191 schema
+struct<(CAST(1 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 191 output
+false
+
+
+-- !query 192
+SELECT '1' < '1'                                        FROM t
+-- !query 192 schema
+struct<(1 < 1):boolean>
+-- !query 192 output
+false
+
+
+-- !query 193
+SELECT '1' < cast('1' as binary)                        FROM t
+-- !query 193 schema
+struct<(CAST(1 AS BINARY) < CAST(1 AS BINARY)):boolean>
+-- !query 193 output
+false
+
+
+-- !query 194
+SELECT '1' < cast(1 as boolean)                         FROM t
+-- !query 194 schema
+struct<(CAST(1 AS BOOLEAN) < CAST(1 AS BOOLEAN)):boolean>
+-- !query 194 output
+false
+
+
+-- !query 195
+SELECT '1' < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 195 schema
+struct<(1 < CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 195 output
+true
+
+
+-- !query 196
+SELECT '1' < cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 196 schema
+struct<(1 < CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+-- !query 196 output
+true
+
+
+-- !query 197
+SELECT '1' <= cast(1 as tinyint)                         FROM t
+-- !query 197 schema
+struct<(CAST(1 AS TINYINT) <= CAST(1 AS TINYINT)):boolean>
+-- !query 197 output
+true
+
+
+-- !query 198
+SELECT '1' <= cast(1 as smallint)                        FROM t
+-- !query 198 schema
+struct<(CAST(1 AS SMALLINT) <= CAST(1 AS SMALLINT)):boolean>
+-- !query 198 output
+true
+
+
+-- !query 199
+SELECT '1' <= cast(1 as int)                             FROM t
+-- !query 199 schema
+struct<(CAST(1 AS INT) <= CAST(1 AS INT)):boolean>
+-- !query 199 output
+true
+
+
+-- !query 200
+SELECT '1' <= cast(1 as bigint)                          FROM t
+-- !query 200 schema
+struct<(CAST(1 AS BIGINT) <= CAST(1 AS BIGINT)):boolean>
+-- !query 200 output
+true
+
+
+-- !query 201
+SELECT '1' <= cast(1 as float)                           FROM t
+-- !query 201 schema
+struct<(CAST(1 AS FLOAT) <= CAST(1 AS FLOAT)):boolean>
+-- !query 201 output
+true
+
+
+-- !query 202
+SELECT '1' <= cast(1 as double)                          FROM t
+-- !query 202 schema
+struct<(CAST(1 AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
+-- !query 202 output
+true
+
+
+-- !query 203
+SELECT '1' <= cast(1 as decimal(10, 0))                  FROM t
+-- !query 203 schema
+struct<(CAST(1 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 203 output
+true
+
+
+-- !query 204
+SELECT '1' <= '1'                                        FROM t
+-- !query 204 schema
+struct<(1 <= 1):boolean>
+-- !query 204 output
+true
+
+
+-- !query 205
+SELECT '1' <= cast('1' as binary)                        FROM t
+-- !query 205 schema
+struct<(CAST(1 AS BINARY) <= CAST(1 AS BINARY)):boolean>
+-- !query 205 output
+true
+
+
+-- !query 206
+SELECT '1' <= cast(1 as boolean)                         FROM t
+-- !query 206 schema
+struct<(CAST(1 AS BOOLEAN) <= CAST(1 AS BOOLEAN)):boolean>
+-- !query 206 output
+true
+
+
+-- !query 207
+SELECT '1' <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 207 schema
+struct<(1 <= CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 207 output
+true
+
+
+-- !query 208
+SELECT '1' <= cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 208 schema
+struct<(1 <= CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+-- !query 208 output
+true
+
+
+-- !query 209
+SELECT '1' > cast(1 as tinyint)                         FROM t
+-- !query 209 schema
+struct<(CAST(1 AS TINYINT) > CAST(1 AS TINYINT)):boolean>
+-- !query 209 output
+false
+
+
+-- !query 210
+SELECT '1' > cast(1 as smallint)                        FROM t
+-- !query 210 schema
+struct<(CAST(1 AS SMALLINT) > CAST(1 AS SMALLINT)):boolean>
+-- !query 210 output
+false
+
+
+-- !query 211
+SELECT '1' > cast(1 as int)                             FROM t
+-- !query 211 schema
+struct<(CAST(1 AS INT) > CAST(1 AS INT)):boolean>
+-- !query 211 output
+false
+
+
+-- !query 212
+SELECT '1' > cast(1 as bigint)                          FROM t
+-- !query 212 schema
+struct<(CAST(1 AS BIGINT) > CAST(1 AS BIGINT)):boolean>
+-- !query 212 output
+false
+
+
+-- !query 213
+SELECT '1' > cast(1 as float)                           FROM t
+-- !query 213 schema
+struct<(CAST(1 AS FLOAT) > CAST(1 AS FLOAT)):boolean>
+-- !query 213 output
+false
+
+
+-- !query 214
+SELECT '1' > cast(1 as double)                          FROM t
+-- !query 214 schema
+struct<(CAST(1 AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
+-- !query 214 output
+false
+
+
+-- !query 215
+SELECT '1' > cast(1 as decimal(10, 0))                  FROM t
+-- !query 215 schema
+struct<(CAST(1 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 215 output
+false
+
+
+-- !query 216
+SELECT '1' > '1'                                        FROM t
+-- !query 216 schema
+struct<(1 > 1):boolean>
+-- !query 216 output
+false
+
+
+-- !query 217
+SELECT '1' > cast('1' as binary)                        FROM t
+-- !query 217 schema
+struct<(CAST(1 AS BINARY) > CAST(1 AS BINARY)):boolean>
+-- !query 217 output
+false
+
+
+-- !query 218
+SELECT '1' > cast(1 as boolean)                         FROM t
+-- !query 218 schema
+struct<(CAST(1 AS BOOLEAN) > CAST(1 AS BOOLEAN)):boolean>
+-- !query 218 output
+false
+
+
+-- !query 219
+SELECT '1' > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 219 schema
+struct<(1 > CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 219 output
+false
+
+
+-- !query 220
+SELECT '1' > cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 220 schema
+struct<(1 > CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+-- !query 220 output
+false
+
+
+-- !query 221
+SELECT '1' >= cast(1 as tinyint)                         FROM t
+-- !query 221 schema
+struct<(CAST(1 AS TINYINT) >= CAST(1 AS TINYINT)):boolean>
+-- !query 221 output
+true
+
+
+-- !query 222
+SELECT '1' >= cast(1 as smallint)                        FROM t
+-- !query 222 schema
+struct<(CAST(1 AS SMALLINT) >= CAST(1 AS SMALLINT)):boolean>
+-- !query 222 output
+true
+
+
+-- !query 223
+SELECT '1' >= cast(1 as int)                             FROM t
+-- !query 223 schema
+struct<(CAST(1 AS INT) >= CAST(1 AS INT)):boolean>
+-- !query 223 output
+true
+
+
+-- !query 224
+SELECT '1' >= cast(1 as bigint)                          FROM t
+-- !query 224 schema
+struct<(CAST(1 AS BIGINT) >= CAST(1 AS BIGINT)):boolean>
+-- !query 224 output
+true
+
+
+-- !query 225
+SELECT '1' >= cast(1 as float)                           FROM t
+-- !query 225 schema
+struct<(CAST(1 AS FLOAT) >= CAST(1 AS FLOAT)):boolean>
+-- !query 225 output
+true
+
+
+-- !query 226
+SELECT '1' >= cast(1 as double)                          FROM t
+-- !query 226 schema
+struct<(CAST(1 AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
+-- !query 226 output
+true
+
+
+-- !query 227
+SELECT '1' >= cast(1 as decimal(10, 0))                  FROM t
+-- !query 227 schema
+struct<(CAST(1 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
+-- !query 227 output
+true
+
+
+-- !query 228
+SELECT '1' >= '1'                                        FROM t
+-- !query 228 schema
+struct<(1 >= 1):boolean>
+-- !query 228 output
+true
+
+
+-- !query 229
+SELECT '1' >= cast('1' as binary)                        FROM t
+-- !query 229 schema
+struct<(CAST(1 AS BINARY) >= CAST(1 AS BINARY)):boolean>
+-- !query 229 output
+true
+
+
+-- !query 230
+SELECT '1' >= cast(1 as boolean)                         FROM t
+-- !query 230 schema
+struct<(CAST(1 AS BOOLEAN) >= CAST(1 AS BOOLEAN)):boolean>
+-- !query 230 output
+true
+
+
+-- !query 231
+SELECT '1' >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 231 schema
+struct<(1 >= CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean>
+-- !query 231 output
+false
+
+
+-- !query 232
+SELECT '1' >= cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 232 schema
+struct<(1 >= CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean>
+-- !query 232 output
+false
+
+
+-- !query 233
+SELECT '1' <> cast(1 as tinyint)                         FROM t
+-- !query 233 schema
+struct<(NOT (CAST(1 AS TINYINT) = CAST(1 AS TINYINT))):boolean>
+-- !query 233 output
+false
+
+
+-- !query 234
+SELECT '1' <> cast(1 as smallint)                        FROM t
+-- !query 234 schema
+struct<(NOT (CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT))):boolean>
+-- !query 234 output
+false
+
+
+-- !query 235
+SELECT '1' <> cast(1 as int)                             FROM t
+-- !query 235 schema
+struct<(NOT (CAST(1 AS INT) = CAST(1 AS INT))):boolean>
+-- !query 235 output
+false
+
+
+-- !query 236
+SELECT '1' <> cast(1 as bigint)                          FROM t
+-- !query 236 schema
+struct<(NOT (CAST(1 AS BIGINT) = CAST(1 AS BIGINT))):boolean>
+-- !query 236 output
+false
+
+
+-- !query 237
+SELECT '1' <> cast(1 as float)                           FROM t
+-- !query 237 schema
+struct<(NOT (CAST(1 AS FLOAT) = CAST(1 AS FLOAT))):boolean>
+-- !query 237 output
+false
+
+
+-- !query 238
+SELECT '1' <> cast(1 as double)                          FROM t
+-- !query 238 schema
+struct<(NOT (CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
+-- !query 238 output
+false
+
+
+-- !query 239
+SELECT '1' <> cast(1 as decimal(10, 0))                  FROM t
+-- !query 239 schema
+struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
+-- !query 239 output
+false
+
+
+-- !query 240
+SELECT '1' <> '1'                                        FROM t
+-- !query 240 schema
+struct<(NOT (1 = 1)):boolean>
+-- !query 240 output
+false
+
+
+-- !query 241
+SELECT '1' <> cast('1' as binary)                        FROM t
+-- !query 241 schema
+struct<(NOT (CAST(1 AS BINARY) = CAST(1 AS BINARY))):boolean>
+-- !query 241 output
+false
+
+
+-- !query 242
+SELECT '1' <> cast(1 as boolean)                         FROM t
+-- !query 242 schema
+struct<(NOT (CAST(1 AS BOOLEAN) = CAST(1 AS BOOLEAN))):boolean>
+-- !query 242 output
+false
+
+
+-- !query 243
+SELECT '1' <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 243 schema
+struct<(NOT (CAST(1 AS TIMESTAMP) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):boolean>
+-- !query 243 output
+NULL
+
+
+-- !query 244
+SELECT '1' <> cast('2017-12-11 09:30:00' as date)        FROM t
+-- !query 244 schema
+struct<(NOT (1 = CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING))):boolean>
+-- !query 244 output
+true
+
+
+-- !query 245
+SELECT cast(1 as tinyint)                         < '1' FROM t
+-- !query 245 schema
+struct<(CAST(1 AS TINYINT) < CAST(1 AS TINYINT)):boolean>
+-- !query 245 output
+false
+
+
+-- !query 246
+SELECT cast(1 as smallint)                        < '1' FROM t
+-- !query 246 schema
+struct<(CAST(1 AS SMALLINT) < CAST(1 AS SMALLINT)):boolean>
+-- !query 246 output
+false
+
+
+-- !query 247
+SELECT cast(1 as int)                             < '1' FROM t
+-- !query 247 schema
+struct<(CAST(1 AS INT) < CAST(1 AS INT)):boolean>
+-- !query 247 output
+false
+
+
+-- !query 248
+SELECT cast(1 as bigint)                          < '1' FROM t
+-- !query 248 schema
+struct<(CAST(1 AS BIGINT) < CAST(1 AS BIGINT)):boolean>
+-- !query 248 output
+false
+
+
+-- !query 249
+SELECT cast(1 as float)                           < '1' FROM t
+-- !query 249 schema
+struct<(CAST(1 AS FLOAT) < CAST(1 AS FLOAT)):boolean>
+-- !query 249 output
+false
+
+
+-- !query 250
+SELECT cast(1 as double)                          < '1' FROM t
+-- !query 250 schema
+struct<(CAST(1 AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
+-- !query 250 output
+false
+
+
+-- !query 251
+SELECT cast(1 as decimal(10, 0))                  < '1' FROM t
+-- !query 251 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
+-- !query 251 output
+false
+
+
+-- !query 252
+SELECT '1'                                        < '1' FROM t
+-- !query 252 schema
+struct<(1 < 1):boolean>
+-- !query 252 output
+false
+
+
+-- !query 253
+SELECT cast('1' as binary)                        < '1' FROM t
+-- !query 253 schema
+struct<(CAST(1 AS BINARY) < CAST(1 AS BINARY)):boolean>
+-- !query 253 output
+false
+
+
+-- !query 254
+SELECT cast(1 as boolean)                         < '1' FROM t
+-- !query 254 schema
+struct<(CAST(1 AS BOOLEAN) < CAST(1 AS BOOLEAN)):boolean>
+-- !query 254 output
+false
+
+
+-- !query 255
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) < '1' FROM t
+-- !query 255 schema
+struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) < 1):boolean>
+-- !query 255 output
+false
+
+
+-- !query 256
+SELECT cast('2017-12-11 09:30:00' as date)        < '1' FROM t
+-- !query 256 schema
+struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) < 1):boolean>
+-- !query 256 output
+false
+
+
+-- !query 257
+SELECT cast(1 as tinyint)                         <= '1' FROM t
+-- !query 257 schema
+struct<(CAST(1 AS TINYINT) <= CAST(1 AS TINYINT)):boolean>
+-- !query 257 output
+true
+
+
+-- !query 258
+SELECT cast(1 as smallint)                        <= '1' FROM t
+-- !query 258 schema
+struct<(CAST(1 AS SMALLINT) <= CAST(1 AS SMALLINT)):boolean>
+-- !query 258 output
+true
+
+
+-- !query 259
+SELECT cast(1 as int)                             <= '1' FROM t
+-- !query 259 schema
+struct<(CAST(1 AS INT) <= CAST(1 AS INT)):boolean>
+-- !query 259 output
+true
+
+
+-- !query 260
+SELECT cast(1 as bigint)                          <= '1' FROM t
+-- !query 260 schema
+struct<(CAST(1 AS BIGINT) <= CAST(1 AS BIGINT)):boolean>
+-- !query 260 output
+true
+
+
+-- !query 261
+SELECT cast(1 as float)                           <= '1' FROM t
+-- !query 261 schema
+struct<(CAST(1 AS FLOAT) <= CAST(1 AS FLOAT)):boolean>
+-- !query 261 output
+true
+
+
+-- !query 262
+SELECT cast(1 as double)                          <= '1' FROM t
+-- !query 262 schema
+struct<(CAST(1 AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
+-- !query 262 output
+true
+
+
+-- !query 263
+SELECT cast(1 as decimal(10, 0))                  <= '1' FROM t
+-- !query 263 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
+-- !query 263 output
+true
+
+
+-- !query 264
+SELECT '1'                                        <= '1' FROM t
+-- !query 264 schema
+struct<(1 <= 1):boolean>
+-- !query 264 output
+true
+
+
+-- !query 265
+SELECT cast('1' as binary)                        <= '1' FROM t
+-- !query 265 schema
+struct<(CAST(1 AS BINARY) <= CAST(1 AS BINARY)):boolean>
+-- !query 265 output
+true
+
+
+-- !query 266
+SELECT cast(1 as boolean)                         <= '1' FROM t
+-- !query 266 schema
+struct<(CAST(1 AS BOOLEAN) <= CAST(1 AS BOOLEAN)):boolean>
+-- !query 266 output
+true
+
+
+-- !query 267
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= '1' FROM t
+-- !query 267 schema
+struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) <= 1):boolean>
+-- !query 267 output
+false
+
+
+-- !query 268
+SELECT cast('2017-12-11 09:30:00' as date)        <= '1' FROM t
+-- !query 268 schema
+struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) <= 1):boolean>
+-- !query 268 output
+false
+
+
+-- !query 269
+SELECT cast(1 as tinyint)                         > '1' FROM t
+-- !query 269 schema
+struct<(CAST(1 AS TINYINT) > CAST(1 AS TINYINT)):boolean>
+-- !query 269 output
+false
+
+
+-- !query 270
+SELECT cast(1 as smallint)                        > '1' FROM t
+-- !query 270 schema
+struct<(CAST(1 AS SMALLINT) > CAST(1 AS SMALLINT)):boolean>
+-- !query 270 output
+false
+
+
+-- !query 271
+SELECT cast(1 as int)                             > '1' FROM t
+-- !query 271 schema
+struct<(CAST(1 AS INT) > CAST(1 AS INT)):boolean>
+-- !query 271 output
+false
+
+
+-- !query 272
+SELECT cast(1 as bigint)                          > '1' FROM t
+-- !query 272 schema
+struct<(CAST(1 AS BIGINT) > CAST(1 AS BIGINT)):boolean>
+-- !query 272 output
+false
+
+
+-- !query 273
+SELECT cast(1 as float)                           > '1' FROM t
+-- !query 273 schema
+struct<(CAST(1 AS FLOAT) > CAST(1 AS FLOAT)):boolean>
+-- !query 273 output
+false
+
+
+-- !query 274
+SELECT cast(1 as double)                          > '1' FROM t
+-- !query 274 schema
+struct<(CAST(1 AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
+-- !query 274 output
+false
+
+
+-- !query 275
+SELECT cast(1 as decimal(10, 0))                  > '1' FROM t
+-- !query 275 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
+-- !query 275 output
+false
+
+
+-- !query 276
+SELECT '1'                                        > '1' FROM t
+-- !query 276 schema
+struct<(1 > 1):boolean>
+-- !query 276 output
+false
+
+
+-- !query 277
+SELECT cast('1' as binary)                        > '1' FROM t
+-- !query 277 schema
+struct<(CAST(1 AS BINARY) > CAST(1 AS BINARY)):boolean>
+-- !query 277 output
+false
+
+
+-- !query 278
+SELECT cast(1 as boolean)                         > '1' FROM t
+-- !query 278 schema
+struct<(CAST(1 AS BOOLEAN) > CAST(1 AS BOOLEAN)):boolean>
+-- !query 278 output
+false
+
+
+-- !query 279
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) > '1' FROM t
+-- !query 279 schema
+struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) > 1):boolean>
+-- !query 279 output
+true
+
+
+-- !query 280
+SELECT cast('2017-12-11 09:30:00' as date)        > '1' FROM t
+-- !query 280 schema
+struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) > 1):boolean>
+-- !query 280 output
+true
+
+
+-- !query 281
+SELECT cast(1 as tinyint)                         >= '1' FROM t
+-- !query 281 schema
+struct<(CAST(1 AS TINYINT) >= CAST(1 AS TINYINT)):boolean>
+-- !query 281 output
+true
+
+
+-- !query 282
+SELECT cast(1 as smallint)                        >= '1' FROM t
+-- !query 282 schema
+struct<(CAST(1 AS SMALLINT) >= CAST(1 AS SMALLINT)):boolean>
+-- !query 282 output
+true
+
+
+-- !query 283
+SELECT cast(1 as int)                             >= '1' FROM t
+-- !query 283 schema
+struct<(CAST(1 AS INT) >= CAST(1 AS INT)):boolean>
+-- !query 283 output
+true
+
+
+-- !query 284
+SELECT cast(1 as bigint)                          >= '1' FROM t
+-- !query 284 schema
+struct<(CAST(1 AS BIGINT) >= CAST(1 AS BIGINT)):boolean>
+-- !query 284 output
+true
+
+
+-- !query 285
+SELECT cast(1 as float)                           >= '1' FROM t
+-- !query 285 schema
+struct<(CAST(1 AS FLOAT) >= CAST(1 AS FLOAT)):boolean>
+-- !query 285 output
+true
+
+
+-- !query 286
+SELECT cast(1 as double)                          >= '1' FROM t
+-- !query 286 schema
+struct<(CAST(1 AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
+-- !query 286 output
+true
+
+
+-- !query 287
+SELECT cast(1 as decimal(10, 0))                  >= '1' FROM t
+-- !query 287 schema
+struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
+-- !query 287 output
+true
+
+
+-- !query 288
+SELECT '1'                                        >= '1' FROM t
+-- !query 288 schema
+struct<(1 >= 1):boolean>
+-- !query 288 output
+true
+
+
+-- !query 289
+SELECT cast('1' as binary)                        >= '1' FROM t
+-- !query 289 schema
+struct<(CAST(1 AS BINARY) >= CAST(1 AS BINARY)):boolean>
+-- !query 289 output
+true
+
+
+-- !query 290
+SELECT cast(1 as boolean)                         >= '1' FROM t
+-- !query 290 schema
+struct<(CAST(1 AS BOOLEAN) >= CAST(1 AS BOOLEAN)):boolean>
+-- !query 290 output
+true
+
+
+-- !query 291
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= '1' FROM t
+-- !query 291 schema
+struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) >= 1):boolean>
+-- !query 291 output
+true
+
+
+-- !query 292
+SELECT cast('2017-12-11 09:30:00' as date)        >= '1' FROM t
+-- !query 292 schema
+struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) >= 1):boolean>
+-- !query 292 output
+true
+
+
+-- !query 293
+SELECT cast(1 as tinyint)                         <> '1' FROM t
+-- !query 293 schema
+struct<(NOT (CAST(1 AS TINYINT) = CAST(1 AS TINYINT))):boolean>
+-- !query 293 output
+false
+
+
+-- !query 294
+SELECT cast(1 as smallint)                        <> '1' FROM t
+-- !query 294 schema
+struct<(NOT (CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT))):boolean>
+-- !query 294 output
+false
+
+
+-- !query 295
+SELECT cast(1 as int)                             <> '1' FROM t
+-- !query 295 schema
+struct<(NOT (CAST(1 AS INT) = CAST(1 AS INT))):boolean>
+-- !query 295 output
+false
+
+
+-- !query 296
+SELECT cast(1 as bigint)                          <> '1' FROM t
+-- !query 296 schema
+struct<(NOT (CAST(1 AS BIGINT) = CAST(1 AS BIGINT))):boolean>
+-- !query 296 output
+false
+
+
+-- !query 297
+SELECT cast(1 as float)                           <> '1' FROM t
+-- !query 297 schema
+struct<(NOT (CAST(1 AS FLOAT) = CAST(1 AS FLOAT))):boolean>
+-- !query 297 output
+false
+
+
+-- !query 298
+SELECT cast(1 as double)                          <> '1' FROM t
+-- !query 298 schema
+struct<(NOT (CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
+-- !query 298 output
+false
+
+
+-- !query 299
+SELECT cast(1 as decimal(10, 0))                  <> '1' FROM t
+-- !query 299 schema
+struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
+-- !query 299 output
+false
+
+
+-- !query 300
+SELECT '1'                                        <> '1' FROM t
+-- !query 300 schema
+struct<(NOT (1 = 1)):boolean>
+-- !query 300 output
+false
+
+
+-- !query 301
+SELECT cast('1' as binary)                        <> '1' FROM t
+-- !query 301 schema
+struct<(NOT (CAST(1 AS BINARY) = CAST(1 AS BINARY))):boolean>
+-- !query 301 output
+false
+
+
+-- !query 302
+SELECT cast(1 as boolean)                         <> '1' FROM t
+-- !query 302 schema
+struct<(NOT (CAST(1 AS BOOLEAN) = CAST(1 AS BOOLEAN))):boolean>
+-- !query 302 output
+false
+
+
+-- !query 303
+SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> '1' FROM t
+-- !query 303 schema
+struct<(NOT (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS TIMESTAMP))):boolean>
+-- !query 303 output
+NULL
+
+
+-- !query 304
+SELECT cast('2017-12-11 09:30:00' as date)        <> '1' FROM t
+-- !query 304 schema
+struct<(NOT (CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) = 1)):boolean>
+-- !query 304 output
+true
+
+
+-- !query 305
+SELECT abs('1') FROM t
+-- !query 305 schema
+struct<abs(CAST(1 AS DOUBLE)):double>
+-- !query 305 output
+1.0
+
+
+-- !query 306
+SELECT sum('1') FROM t
+-- !query 306 schema
+struct<sum(CAST(1 AS DOUBLE)):double>
+-- !query 306 output
+1.0
+
+
+-- !query 307
+SELECT avg('1') FROM t
+-- !query 307 schema
+struct<avg(CAST(1 AS DOUBLE)):double>
+-- !query 307 output
+1.0
+
+
+-- !query 308
+SELECT stddev_pop('1') FROM t
+-- !query 308 schema
+struct<stddev_pop(CAST(1 AS DOUBLE)):double>
+-- !query 308 output
+0.0
+
+
+-- !query 309
+SELECT stddev_samp('1') FROM t
+-- !query 309 schema
+struct<stddev_samp(CAST(1 AS DOUBLE)):double>
+-- !query 309 output
+NaN
+
+
+-- !query 310
+SELECT - '1' FROM t
+-- !query 310 schema
+struct<(- CAST(1 AS DOUBLE)):double>
+-- !query 310 output
+-1.0
+
+
+-- !query 311
+SELECT + '1' FROM t
+-- !query 311 schema
+struct<1:string>
+-- !query 311 output
+1
+
+
+-- !query 312
+SELECT var_pop('1') FROM t
+-- !query 312 schema
+struct<var_pop(CAST(1 AS DOUBLE)):double>
+-- !query 312 output
+0.0
+
+
+-- !query 313
+SELECT var_samp('1') FROM t
+-- !query 313 schema
+struct<var_samp(CAST(1 AS DOUBLE)):double>
+-- !query 313 output
+NaN
+
+
+-- !query 314
+SELECT skewness('1') FROM t
+-- !query 314 schema
+struct<skewness(CAST(1 AS DOUBLE)):double>
+-- !query 314 output
+NaN
+
+
+-- !query 315
+SELECT kurtosis('1') FROM t
+-- !query 315 schema
+struct<kurtosis(CAST(1 AS DOUBLE)):double>
+-- !query 315 output
+NaN
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
new file mode 100644
index 0000000000000..8ed2820244412
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
@@ -0,0 +1,261 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 32
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 'aa' as a
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+select cast(a as byte) from t
+-- !query 1 schema
+struct<a:tinyint>
+-- !query 1 output
+NULL
+
+
+-- !query 2
+select cast(a as short) from t
+-- !query 2 schema
+struct<a:smallint>
+-- !query 2 output
+NULL
+
+
+-- !query 3
+select cast(a as int) from t
+-- !query 3 schema
+struct<a:int>
+-- !query 3 output
+NULL
+
+
+-- !query 4
+select cast(a as long) from t
+-- !query 4 schema
+struct<a:bigint>
+-- !query 4 output
+NULL
+
+
+-- !query 5
+select cast(a as float) from t
+-- !query 5 schema
+struct<a:float>
+-- !query 5 output
+NULL
+
+
+-- !query 6
+select cast(a as double) from t
+-- !query 6 schema
+struct<a:double>
+-- !query 6 output
+NULL
+
+
+-- !query 7
+select cast(a as decimal) from t
+-- !query 7 schema
+struct<a:decimal(10,0)>
+-- !query 7 output
+NULL
+
+
+-- !query 8
+select cast(a as boolean) from t
+-- !query 8 schema
+struct<a:boolean>
+-- !query 8 output
+NULL
+
+
+-- !query 9
+select cast(a as timestamp) from t
+-- !query 9 schema
+struct<a:timestamp>
+-- !query 9 output
+NULL
+
+
+-- !query 10
+select cast(a as date) from t
+-- !query 10 schema
+struct<a:date>
+-- !query 10 output
+NULL
+
+
+-- !query 11
+select cast(a as binary) from t
+-- !query 11 schema
+struct<a:binary>
+-- !query 11 output
+aa
+
+
+-- !query 12
+select cast(a as array<string>) from t
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 't.`a`' due to data type mismatch: cannot cast string to array<string>; line 1 pos 7
+
+
+-- !query 13
+select cast(a as struct<s:string>) from t
+-- !query 13 schema
+struct<>
+-- !query 13 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 't.`a`' due to data type mismatch: cannot cast string to struct<s:string>; line 1 pos 7
+
+
+-- !query 14
+select cast(a as map<string, string>) from t
+-- !query 14 schema
+struct<>
+-- !query 14 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 't.`a`' due to data type mismatch: cannot cast string to map<string,string>; line 1 pos 7
+
+
+-- !query 15
+select to_timestamp(a) from t
+-- !query 15 schema
+struct<to_timestamp(t.`a`):timestamp>
+-- !query 15 output
+NULL
+
+
+-- !query 16
+select to_timestamp('2018-01-01', a) from t
+-- !query 16 schema
+struct<to_timestamp('2018-01-01', t.`a`):timestamp>
+-- !query 16 output
+NULL
+
+
+-- !query 17
+select to_unix_timestamp(a) from t
+-- !query 17 schema
+struct<to_unix_timestamp(a, yyyy-MM-dd HH:mm:ss):bigint>
+-- !query 17 output
+NULL
+
+
+-- !query 18
+select to_unix_timestamp('2018-01-01', a) from t
+-- !query 18 schema
+struct<to_unix_timestamp(2018-01-01, a):bigint>
+-- !query 18 output
+NULL
+
+
+-- !query 19
+select unix_timestamp(a) from t
+-- !query 19 schema
+struct<unix_timestamp(a, yyyy-MM-dd HH:mm:ss):bigint>
+-- !query 19 output
+NULL
+
+
+-- !query 20
+select unix_timestamp('2018-01-01', a) from t
+-- !query 20 schema
+struct<unix_timestamp(2018-01-01, a):bigint>
+-- !query 20 output
+NULL
+
+
+-- !query 21
+select from_unixtime(a) from t
+-- !query 21 schema
+struct<from_unixtime(CAST(a AS BIGINT), yyyy-MM-dd HH:mm:ss):string>
+-- !query 21 output
+NULL
+
+
+-- !query 22
+select from_unixtime('2018-01-01', a) from t
+-- !query 22 schema
+struct<from_unixtime(CAST(2018-01-01 AS BIGINT), a):string>
+-- !query 22 output
+NULL
+
+
+-- !query 23
+select next_day(a, 'MO') from t
+-- !query 23 schema
+struct<next_day(CAST(a AS DATE), MO):date>
+-- !query 23 output
+NULL
+
+
+-- !query 24
+select next_day('2018-01-01', a) from t
+-- !query 24 schema
+struct<next_day(CAST(2018-01-01 AS DATE), a):date>
+-- !query 24 output
+NULL
+
+
+-- !query 25
+select trunc(a, 'MM') from t
+-- !query 25 schema
+struct<trunc(CAST(a AS DATE), MM):date>
+-- !query 25 output
+NULL
+
+
+-- !query 26
+select trunc('2018-01-01', a) from t
+-- !query 26 schema
+struct<trunc(CAST(2018-01-01 AS DATE), a):date>
+-- !query 26 output
+NULL
+
+
+-- !query 27
+select unhex('-123')
+-- !query 27 schema
+struct<unhex(-123):binary>
+-- !query 27 output
+NULL
+
+
+-- !query 28
+select sha2(a, a) from t
+-- !query 28 schema
+struct<sha2(CAST(a AS BINARY), CAST(a AS INT)):string>
+-- !query 28 output
+NULL
+
+
+-- !query 29
+select get_json_object(a, a) from t
+-- !query 29 schema
+struct<get_json_object(a, a):string>
+-- !query 29 output
+NULL
+
+
+-- !query 30
+select json_tuple(a, a) from t
+-- !query 30 schema
+struct<c0:string>
+-- !query 30 output
+NULL
+
+
+-- !query 31
+select from_json(a, 'a INT') from t
+-- !query 31 schema
+struct<jsontostructs(a):struct<a:int>>
+-- !query 31 output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
new file mode 100644
index 0000000000000..20a9e47217238
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
@@ -0,0 +1,1305 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 145
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 1 schema
+struct<CAST(1 AS TINYINT):tinyint>
+-- !query 1 output
+1
+2
+
+
+-- !query 2
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 2 schema
+struct<CAST(1 AS TINYINT):smallint>
+-- !query 2 output
+1
+2
+
+
+-- !query 3
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 3 schema
+struct<CAST(1 AS TINYINT):int>
+-- !query 3 output
+1
+2
+
+
+-- !query 4
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 4 schema
+struct<CAST(1 AS TINYINT):bigint>
+-- !query 4 output
+1
+2
+
+
+-- !query 5
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 5 schema
+struct<CAST(1 AS TINYINT):float>
+-- !query 5 output
+1.0
+2.0
+
+
+-- !query 6
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 6 schema
+struct<CAST(1 AS TINYINT):double>
+-- !query 6 output
+1.0
+2.0
+
+
+-- !query 7
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 7 schema
+struct<CAST(1 AS TINYINT):decimal(10,0)>
+-- !query 7 output
+1
+2
+
+
+-- !query 8
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 8 schema
+struct<CAST(1 AS TINYINT):string>
+-- !query 8 output
+1
+2
+
+
+-- !query 9
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. binary <> tinyint at the first column of the second table;
+
+
+-- !query 10
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. boolean <> tinyint at the first column of the second table;
+
+
+-- !query 11
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. timestamp <> tinyint at the first column of the second table;
+
+
+-- !query 12
+SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. date <> tinyint at the first column of the second table;
+
+
+-- !query 13
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 13 schema
+struct<CAST(1 AS SMALLINT):smallint>
+-- !query 13 output
+1
+2
+
+
+-- !query 14
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 14 schema
+struct<CAST(1 AS SMALLINT):smallint>
+-- !query 14 output
+1
+2
+
+
+-- !query 15
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 15 schema
+struct<CAST(1 AS SMALLINT):int>
+-- !query 15 output
+1
+2
+
+
+-- !query 16
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 16 schema
+struct<CAST(1 AS SMALLINT):bigint>
+-- !query 16 output
+1
+2
+
+
+-- !query 17
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 17 schema
+struct<CAST(1 AS SMALLINT):float>
+-- !query 17 output
+1.0
+2.0
+
+
+-- !query 18
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 18 schema
+struct<CAST(1 AS SMALLINT):double>
+-- !query 18 output
+1.0
+2.0
+
+
+-- !query 19
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 19 schema
+struct<CAST(1 AS SMALLINT):decimal(10,0)>
+-- !query 19 output
+1
+2
+
+
+-- !query 20
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 20 schema
+struct<CAST(1 AS SMALLINT):string>
+-- !query 20 output
+1
+2
+
+
+-- !query 21
+SELECT cast(1 as smallint) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 21 schema
+struct<>
+-- !query 21 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. binary <> smallint at the first column of the second table;
+
+
+-- !query 22
+SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 22 schema
+struct<>
+-- !query 22 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. boolean <> smallint at the first column of the second table;
+
+
+-- !query 23
+SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. timestamp <> smallint at the first column of the second table;
+
+
+-- !query 24
+SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 24 schema
+struct<>
+-- !query 24 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. date <> smallint at the first column of the second table;
+
+
+-- !query 25
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 25 schema
+struct<CAST(1 AS INT):int>
+-- !query 25 output
+1
+2
+
+
+-- !query 26
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 26 schema
+struct<CAST(1 AS INT):int>
+-- !query 26 output
+1
+2
+
+
+-- !query 27
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 27 schema
+struct<CAST(1 AS INT):int>
+-- !query 27 output
+1
+2
+
+
+-- !query 28
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 28 schema
+struct<CAST(1 AS INT):bigint>
+-- !query 28 output
+1
+2
+
+
+-- !query 29
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 29 schema
+struct<CAST(1 AS INT):float>
+-- !query 29 output
+1.0
+2.0
+
+
+-- !query 30
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 30 schema
+struct<CAST(1 AS INT):double>
+-- !query 30 output
+1.0
+2.0
+
+
+-- !query 31
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 31 schema
+struct<CAST(1 AS INT):decimal(10,0)>
+-- !query 31 output
+1
+2
+
+
+-- !query 32
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 32 schema
+struct<CAST(1 AS INT):string>
+-- !query 32 output
+1
+2
+
+
+-- !query 33
+SELECT cast(1 as int) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 33 schema
+struct<>
+-- !query 33 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. binary <> int at the first column of the second table;
+
+
+-- !query 34
+SELECT cast(1 as int) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 34 schema
+struct<>
+-- !query 34 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. boolean <> int at the first column of the second table;
+
+
+-- !query 35
+SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 35 schema
+struct<>
+-- !query 35 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. timestamp <> int at the first column of the second table;
+
+
+-- !query 36
+SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 36 schema
+struct<>
+-- !query 36 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. date <> int at the first column of the second table;
+
+
+-- !query 37
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 37 schema
+struct<CAST(1 AS BIGINT):bigint>
+-- !query 37 output
+1
+2
+
+
+-- !query 38
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 38 schema
+struct<CAST(1 AS BIGINT):bigint>
+-- !query 38 output
+1
+2
+
+
+-- !query 39
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 39 schema
+struct<CAST(1 AS BIGINT):bigint>
+-- !query 39 output
+1
+2
+
+
+-- !query 40
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 40 schema
+struct<CAST(1 AS BIGINT):bigint>
+-- !query 40 output
+1
+2
+
+
+-- !query 41
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 41 schema
+struct<CAST(1 AS BIGINT):float>
+-- !query 41 output
+1.0
+2.0
+
+
+-- !query 42
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 42 schema
+struct<CAST(1 AS BIGINT):double>
+-- !query 42 output
+1.0
+2.0
+
+
+-- !query 43
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 43 schema
+struct<CAST(1 AS BIGINT):decimal(20,0)>
+-- !query 43 output
+1
+2
+
+
+-- !query 44
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 44 schema
+struct<CAST(1 AS BIGINT):string>
+-- !query 44 output
+1
+2
+
+
+-- !query 45
+SELECT cast(1 as bigint) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 45 schema
+struct<>
+-- !query 45 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. binary <> bigint at the first column of the second table;
+
+
+-- !query 46
+SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 46 schema
+struct<>
+-- !query 46 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. boolean <> bigint at the first column of the second table;
+
+
+-- !query 47
+SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 47 schema
+struct<>
+-- !query 47 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. timestamp <> bigint at the first column of the second table;
+
+
+-- !query 48
+SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 48 schema
+struct<>
+-- !query 48 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. date <> bigint at the first column of the second table;
+
+
+-- !query 49
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 49 schema
+struct<CAST(1 AS FLOAT):float>
+-- !query 49 output
+1.0
+2.0
+
+
+-- !query 50
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 50 schema
+struct<CAST(1 AS FLOAT):float>
+-- !query 50 output
+1.0
+2.0
+
+
+-- !query 51
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 51 schema
+struct<CAST(1 AS FLOAT):float>
+-- !query 51 output
+1.0
+2.0
+
+
+-- !query 52
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 52 schema
+struct<CAST(1 AS FLOAT):float>
+-- !query 52 output
+1.0
+2.0
+
+
+-- !query 53
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 53 schema
+struct<CAST(1 AS FLOAT):float>
+-- !query 53 output
+1.0
+2.0
+
+
+-- !query 54
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 54 schema
+struct<CAST(1 AS FLOAT):double>
+-- !query 54 output
+1.0
+2.0
+
+
+-- !query 55
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 55 schema
+struct<CAST(1 AS FLOAT):double>
+-- !query 55 output
+1.0
+2.0
+
+
+-- !query 56
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 56 schema
+struct<CAST(1 AS FLOAT):string>
+-- !query 56 output
+1.0
+2
+
+
+-- !query 57
+SELECT cast(1 as float) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 57 schema
+struct<>
+-- !query 57 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. binary <> float at the first column of the second table;
+
+
+-- !query 58
+SELECT cast(1 as float) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 58 schema
+struct<>
+-- !query 58 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. boolean <> float at the first column of the second table;
+
+
+-- !query 59
+SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 59 schema
+struct<>
+-- !query 59 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. timestamp <> float at the first column of the second table;
+
+
+-- !query 60
+SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 60 schema
+struct<>
+-- !query 60 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. date <> float at the first column of the second table;
+
+
+-- !query 61
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 61 schema
+struct<CAST(1 AS DOUBLE):double>
+-- !query 61 output
+1.0
+2.0
+
+
+-- !query 62
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 62 schema
+struct<CAST(1 AS DOUBLE):double>
+-- !query 62 output
+1.0
+2.0
+
+
+-- !query 63
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 63 schema
+struct<CAST(1 AS DOUBLE):double>
+-- !query 63 output
+1.0
+2.0
+
+
+-- !query 64
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 64 schema
+struct<CAST(1 AS DOUBLE):double>
+-- !query 64 output
+1.0
+2.0
+
+
+-- !query 65
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 65 schema
+struct<CAST(1 AS DOUBLE):double>
+-- !query 65 output
+1.0
+2.0
+
+
+-- !query 66
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 66 schema
+struct<CAST(1 AS DOUBLE):double>
+-- !query 66 output
+1.0
+2.0
+
+
+-- !query 67
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 67 schema
+struct<CAST(1 AS DOUBLE):double>
+-- !query 67 output
+1.0
+2.0
+
+
+-- !query 68
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 68 schema
+struct<CAST(1 AS DOUBLE):string>
+-- !query 68 output
+1.0
+2
+
+
+-- !query 69
+SELECT cast(1 as double) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 69 schema
+struct<>
+-- !query 69 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. binary <> double at the first column of the second table;
+
+
+-- !query 70
+SELECT cast(1 as double) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 70 schema
+struct<>
+-- !query 70 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. boolean <> double at the first column of the second table;
+
+
+-- !query 71
+SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 71 schema
+struct<>
+-- !query 71 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. timestamp <> double at the first column of the second table;
+
+
+-- !query 72
+SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 72 schema
+struct<>
+-- !query 72 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. date <> double at the first column of the second table;
+
+
+-- !query 73
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 73 schema
+struct<CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
+-- !query 73 output
+1
+2
+
+
+-- !query 74
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 74 schema
+struct<CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
+-- !query 74 output
+1
+2
+
+
+-- !query 75
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 75 schema
+struct<CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
+-- !query 75 output
+1
+2
+
+
+-- !query 76
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 76 schema
+struct<CAST(1 AS DECIMAL(10,0)):decimal(20,0)>
+-- !query 76 output
+1
+2
+
+
+-- !query 77
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 77 schema
+struct<CAST(1 AS DECIMAL(10,0)):double>
+-- !query 77 output
+1.0
+2.0
+
+
+-- !query 78
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 78 schema
+struct<CAST(1 AS DECIMAL(10,0)):double>
+-- !query 78 output
+1.0
+2.0
+
+
+-- !query 79
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 79 schema
+struct<CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
+-- !query 79 output
+1
+2
+
+
+-- !query 80
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 80 schema
+struct<CAST(1 AS DECIMAL(10,0)):string>
+-- !query 80 output
+1
+2
+
+
+-- !query 81
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 81 schema
+struct<>
+-- !query 81 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. binary <> decimal(10,0) at the first column of the second table;
+
+
+-- !query 82
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 82 schema
+struct<>
+-- !query 82 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. boolean <> decimal(10,0) at the first column of the second table;
+
+
+-- !query 83
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 83 schema
+struct<>
+-- !query 83 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. timestamp <> decimal(10,0) at the first column of the second table;
+
+
+-- !query 84
+SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 84 schema
+struct<>
+-- !query 84 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. date <> decimal(10,0) at the first column of the second table;
+
+
+-- !query 85
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 85 schema
+struct<CAST(1 AS STRING):string>
+-- !query 85 output
+1
+2
+
+
+-- !query 86
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 86 schema
+struct<CAST(1 AS STRING):string>
+-- !query 86 output
+1
+2
+
+
+-- !query 87
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 87 schema
+struct<CAST(1 AS STRING):string>
+-- !query 87 output
+1
+2
+
+
+-- !query 88
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 88 schema
+struct<CAST(1 AS STRING):string>
+-- !query 88 output
+1
+2
+
+
+-- !query 89
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 89 schema
+struct<CAST(1 AS STRING):string>
+-- !query 89 output
+1
+2.0
+
+
+-- !query 90
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 90 schema
+struct<CAST(1 AS STRING):string>
+-- !query 90 output
+1
+2.0
+
+
+-- !query 91
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 91 schema
+struct<CAST(1 AS STRING):string>
+-- !query 91 output
+1
+2
+
+
+-- !query 92
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 92 schema
+struct<CAST(1 AS STRING):string>
+-- !query 92 output
+1
+2
+
+
+-- !query 93
+SELECT cast(1 as string) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 93 schema
+struct<>
+-- !query 93 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. binary <> string at the first column of the second table;
+
+
+-- !query 94
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 94 schema
+struct<>
+-- !query 94 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. boolean <> string at the first column of the second table;
+
+
+-- !query 95
+SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 95 schema
+struct<CAST(1 AS STRING):string>
+-- !query 95 output
+1
+2017-12-11 09:30:00
+
+
+-- !query 96
+SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 96 schema
+struct<CAST(1 AS STRING):string>
+-- !query 96 output
+1
+2017-12-11
+
+
+-- !query 97
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 97 schema
+struct<>
+-- !query 97 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. tinyint <> binary at the first column of the second table;
+
+
+-- !query 98
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 98 schema
+struct<>
+-- !query 98 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. smallint <> binary at the first column of the second table;
+
+
+-- !query 99
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 99 schema
+struct<>
+-- !query 99 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. int <> binary at the first column of the second table;
+
+
+-- !query 100
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 100 schema
+struct<>
+-- !query 100 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. bigint <> binary at the first column of the second table;
+
+
+-- !query 101
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 101 schema
+struct<>
+-- !query 101 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. float <> binary at the first column of the second table;
+
+
+-- !query 102
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 102 schema
+struct<>
+-- !query 102 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. double <> binary at the first column of the second table;
+
+
+-- !query 103
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 103 schema
+struct<>
+-- !query 103 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. decimal(10,0) <> binary at the first column of the second table;
+
+
+-- !query 104
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 104 schema
+struct<>
+-- !query 104 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. string <> binary at the first column of the second table;
+
+
+-- !query 105
+SELECT cast('1' as binary) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 105 schema
+struct<CAST(1 AS BINARY):binary>
+-- !query 105 output
+1
+2
+
+
+-- !query 106
+SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 106 schema
+struct<>
+-- !query 106 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. boolean <> binary at the first column of the second table;
+
+
+-- !query 107
+SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 107 schema
+struct<>
+-- !query 107 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. timestamp <> binary at the first column of the second table;
+
+
+-- !query 108
+SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 108 schema
+struct<>
+-- !query 108 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. date <> binary at the first column of the second table;
+
+
+-- !query 109
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 109 schema
+struct<>
+-- !query 109 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. tinyint <> boolean at the first column of the second table;
+
+
+-- !query 110
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 110 schema
+struct<>
+-- !query 110 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. smallint <> boolean at the first column of the second table;
+
+
+-- !query 111
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 111 schema
+struct<>
+-- !query 111 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. int <> boolean at the first column of the second table;
+
+
+-- !query 112
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 112 schema
+struct<>
+-- !query 112 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. bigint <> boolean at the first column of the second table;
+
+
+-- !query 113
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 113 schema
+struct<>
+-- !query 113 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. float <> boolean at the first column of the second table;
+
+
+-- !query 114
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 114 schema
+struct<>
+-- !query 114 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. double <> boolean at the first column of the second table;
+
+
+-- !query 115
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 115 schema
+struct<>
+-- !query 115 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. decimal(10,0) <> boolean at the first column of the second table;
+
+
+-- !query 116
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 116 schema
+struct<>
+-- !query 116 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. string <> boolean at the first column of the second table;
+
+
+-- !query 117
+SELECT cast(1 as boolean) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 117 schema
+struct<>
+-- !query 117 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. binary <> boolean at the first column of the second table;
+
+
+-- !query 118
+SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 118 schema
+struct<CAST(1 AS BOOLEAN):boolean>
+-- !query 118 output
+true
+
+
+-- !query 119
+SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 119 schema
+struct<>
+-- !query 119 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. timestamp <> boolean at the first column of the second table;
+
+
+-- !query 120
+SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 120 schema
+struct<>
+-- !query 120 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. date <> boolean at the first column of the second table;
+
+
+-- !query 121
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 121 schema
+struct<>
+-- !query 121 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. tinyint <> timestamp at the first column of the second table;
+
+
+-- !query 122
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 122 schema
+struct<>
+-- !query 122 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. smallint <> timestamp at the first column of the second table;
+
+
+-- !query 123
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 123 schema
+struct<>
+-- !query 123 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. int <> timestamp at the first column of the second table;
+
+
+-- !query 124
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 124 schema
+struct<>
+-- !query 124 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. bigint <> timestamp at the first column of the second table;
+
+
+-- !query 125
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 125 schema
+struct<>
+-- !query 125 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. float <> timestamp at the first column of the second table;
+
+
+-- !query 126
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 126 schema
+struct<>
+-- !query 126 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. double <> timestamp at the first column of the second table;
+
+
+-- !query 127
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 127 schema
+struct<>
+-- !query 127 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. decimal(10,0) <> timestamp at the first column of the second table;
+
+
+-- !query 128
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 128 schema
+struct<CAST(2017-12-12 09:30:00.0 AS TIMESTAMP):string>
+-- !query 128 output
+2
+2017-12-12 09:30:00
+
+
+-- !query 129
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 129 schema
+struct<>
+-- !query 129 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. binary <> timestamp at the first column of the second table;
+
+
+-- !query 130
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 130 schema
+struct<>
+-- !query 130 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. boolean <> timestamp at the first column of the second table;
+
+
+-- !query 131
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 131 schema
+struct<CAST(2017-12-12 09:30:00.0 AS TIMESTAMP):timestamp>
+-- !query 131 output
+2017-12-11 09:30:00
+2017-12-12 09:30:00
+
+
+-- !query 132
+SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 132 schema
+struct<CAST(2017-12-12 09:30:00.0 AS TIMESTAMP):timestamp>
+-- !query 132 output
+2017-12-11 00:00:00
+2017-12-12 09:30:00
+
+
+-- !query 133
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as tinyint) FROM t
+-- !query 133 schema
+struct<>
+-- !query 133 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. tinyint <> date at the first column of the second table;
+
+
+-- !query 134
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as smallint) FROM t
+-- !query 134 schema
+struct<>
+-- !query 134 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. smallint <> date at the first column of the second table;
+
+
+-- !query 135
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as int) FROM t
+-- !query 135 schema
+struct<>
+-- !query 135 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. int <> date at the first column of the second table;
+
+
+-- !query 136
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as bigint) FROM t
+-- !query 136 schema
+struct<>
+-- !query 136 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. bigint <> date at the first column of the second table;
+
+
+-- !query 137
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as float) FROM t
+-- !query 137 schema
+struct<>
+-- !query 137 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. float <> date at the first column of the second table;
+
+
+-- !query 138
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as double) FROM t
+-- !query 138 schema
+struct<>
+-- !query 138 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. double <> date at the first column of the second table;
+
+
+-- !query 139
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
+-- !query 139 schema
+struct<>
+-- !query 139 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. decimal(10,0) <> date at the first column of the second table;
+
+
+-- !query 140
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as string) FROM t
+-- !query 140 schema
+struct<CAST(2017-12-12 09:30:00 AS DATE):string>
+-- !query 140 output
+2
+2017-12-12
+
+
+-- !query 141
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2' as binary) FROM t
+-- !query 141 schema
+struct<>
+-- !query 141 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. binary <> date at the first column of the second table;
+
+
+-- !query 142
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query 142 schema
+struct<>
+-- !query 142 output
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. boolean <> date at the first column of the second table;
+
+
+-- !query 143
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query 143 schema
+struct<CAST(2017-12-12 09:30:00 AS DATE):timestamp>
+-- !query 143 output
+2017-12-11 09:30:00
+2017-12-12 00:00:00
+
+
+-- !query 144
+SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
+-- !query 144 schema
+struct<CAST(2017-12-12 09:30:00 AS DATE):date>
+-- !query 144 output
+2017-12-11
+2017-12-12
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
new file mode 100644
index 0000000000000..01d83938031fe
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
@@ -0,0 +1,206 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 25
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS SELECT 1
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as tinyint)) FROM t
+-- !query 1 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS TINYINT) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 1 output
+1
+
+
+-- !query 2
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as smallint)) FROM t
+-- !query 2 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS SMALLINT) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 2 output
+1
+
+
+-- !query 3
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as int)) FROM t
+-- !query 3 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS INT) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 3 output
+1
+
+
+-- !query 4
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as bigint)) FROM t
+-- !query 4 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS BIGINT) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 4 output
+1
+
+
+-- !query 5
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as float)) FROM t
+-- !query 5 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS FLOAT) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 5 output
+1
+
+
+-- !query 6
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as double)) FROM t
+-- !query 6 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS DOUBLE) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 6 output
+1
+
+
+-- !query 7
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as decimal(10, 0))) FROM t
+-- !query 7 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS DECIMAL(10,0)) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 7 output
+1
+
+
+-- !query 8
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as string)) FROM t
+-- !query 8 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS STRING) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 8 output
+1
+
+
+-- !query 9
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('1' as binary)) FROM t
+-- !query 9 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS BINARY) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 9 output
+1
+
+
+-- !query 10
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as boolean)) FROM t
+-- !query 10 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS BOOLEAN) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 10 output
+1
+
+
+-- !query 11
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query 11 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 11 output
+1
+
+
+-- !query 12
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00' as date)) FROM t
+-- !query 12 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(2017-12-11 09:30:00 AS DATE) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query 12 output
+1
+
+
+-- !query 13
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as tinyint) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 13 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS TINYINT) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS TINYINT) FOLLOWING):bigint>
+-- !query 13 output
+1
+
+
+-- !query 14
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as smallint) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 14 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS SMALLINT) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS SMALLINT) FOLLOWING):bigint>
+-- !query 14 output
+1
+
+
+-- !query 15
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as int) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 15 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS INT) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
+-- !query 15 output
+1
+
+
+-- !query 16
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as bigint) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 16 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS BIGINT) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS BIGINT) FOLLOWING):bigint>
+-- !query 16 output
+1
+
+
+-- !query 17
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as float) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 17 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS FLOAT) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS FLOAT) FOLLOWING):bigint>
+-- !query 17 output
+1
+
+
+-- !query 18
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as double) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 18 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS DOUBLE) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS DOUBLE) FOLLOWING):bigint>
+-- !query 18 output
+1
+
+
+-- !query 19
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as decimal(10, 0)) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 19 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS DECIMAL(10,0)) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS DECIMAL(10,0)) FOLLOWING):bigint>
+-- !query 19 output
+1
+
+
+-- !query 20
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as string) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 20 schema
+struct<>
+-- !query 20 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'RANGE BETWEEN CURRENT ROW AND CAST(1 AS STRING) FOLLOWING' due to data type mismatch: The data type of the upper bound 'string' does not match the expected data type '(numeric or calendarinterval)'.; line 1 pos 21
+
+
+-- !query 21
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('1' as binary) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 21 schema
+struct<>
+-- !query 21 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(PARTITION BY 1 ORDER BY CAST('1' AS BINARY) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'binary' used in the order specification does not match the data type 'int' which is used in the range frame.; line 1 pos 21
+
+
+-- !query 22
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as boolean) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 22 schema
+struct<>
+-- !query 22 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'RANGE BETWEEN CURRENT ROW AND CAST(1 AS BOOLEAN) FOLLOWING' due to data type mismatch: The data type of the upper bound 'boolean' does not match the expected data type '(numeric or calendarinterval)'.; line 1 pos 21
+
+
+-- !query 23
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00.0' as timestamp) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 23 schema
+struct<>
+-- !query 23 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(PARTITION BY 1 ORDER BY CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'timestamp' used in the order specification does not match the data type 'int' which is used in the range frame.; line 1 pos 21
+
+
+-- !query 24
+SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00' as date) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
+-- !query 24 schema
+struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(2017-12-11 09:30:00 AS DATE) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
+-- !query 24 output
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index 73ad27e5bf8ce..133458ae9303b 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -61,7 +61,7 @@ ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate,
 struct<>
 -- !query 3 output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'ROWS BETWEEN CURRENT ROW AND 2147483648L FOLLOWING' due to data type mismatch: The data type of the upper bound 'LongType does not match the expected data type 'IntegerType'.; line 1 pos 41
+cannot resolve 'ROWS BETWEEN CURRENT ROW AND 2147483648L FOLLOWING' due to data type mismatch: The data type of the upper bound 'bigint' does not match the expected data type 'int'.; line 1 pos 41
 
 
 -- !query 4
@@ -221,7 +221,7 @@ RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
 struct<>
 -- !query 14 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY testdata.`cate` ORDER BY current_timestamp() ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'TimestampType' used in the order specification does not match the data type 'IntegerType' which is used in the range frame.; line 1 pos 33
+cannot resolve '(PARTITION BY testdata.`cate` ORDER BY current_timestamp() ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'timestamp' used in the order specification does not match the data type 'int' which is used in the range frame.; line 1 pos 33
 
 
 -- !query 15
diff --git a/sql/core/src/test/resources/ssb/1.1.sql b/sql/core/src/test/resources/ssb/1.1.sql
new file mode 100644
index 0000000000000..62da3029469e4
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/1.1.sql
@@ -0,0 +1,6 @@
+select sum(lo_extendedprice*lo_discount) as revenue
+	from lineorder, date
+	where lo_orderdate = d_datekey
+		and d_year = 1993
+		and lo_discount between 1 and 3
+		and lo_quantity < 25
diff --git a/sql/core/src/test/resources/ssb/1.2.sql b/sql/core/src/test/resources/ssb/1.2.sql
new file mode 100644
index 0000000000000..1657bfd4b2a32
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/1.2.sql
@@ -0,0 +1,6 @@
+select sum(lo_extendedprice*lo_discount) as revenue
+	from lineorder, date
+	where lo_orderdate = d_datekey
+		and d_yearmonthnum = 199401
+		and lo_discount between 4 and 6
+		and lo_quantity between 26 and 35
diff --git a/sql/core/src/test/resources/ssb/1.3.sql b/sql/core/src/test/resources/ssb/1.3.sql
new file mode 100644
index 0000000000000..e9bbf51f051dd
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/1.3.sql
@@ -0,0 +1,6 @@
+select sum(lo_extendedprice*lo_discount) as revenue
+	from lineorder, date
+	where lo_orderdate = d_datekey
+		and d_weeknuminyear = 6 and d_year = 1994
+		and lo_discount between 5 and 7
+		and lo_quantity between 36 and 40
diff --git a/sql/core/src/test/resources/ssb/2.1.sql b/sql/core/src/test/resources/ssb/2.1.sql
new file mode 100644
index 0000000000000..00d402785320b
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/2.1.sql
@@ -0,0 +1,9 @@
+select sum(lo_revenue), d_year, p_brand1
+	from lineorder, date, part, supplier
+	where lo_orderdate = d_datekey
+		and lo_partkey = p_partkey
+		and lo_suppkey = s_suppkey
+		and p_category = 'MFGR#12'
+		and s_region = 'AMERICA'
+	group by d_year, p_brand1
+	order by d_year, p_brand1
diff --git a/sql/core/src/test/resources/ssb/2.2.sql b/sql/core/src/test/resources/ssb/2.2.sql
new file mode 100644
index 0000000000000..c53a925f75ddb
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/2.2.sql
@@ -0,0 +1,9 @@
+select sum(lo_revenue), d_year, p_brand1
+	from lineorder, date, part, supplier
+	where lo_orderdate = d_datekey
+		and lo_partkey = p_partkey
+		and lo_suppkey = s_suppkey
+		and p_brand1 between 'MFGR#2221' and 'MFGR#2228'
+		and s_region = 'ASIA'
+	group by d_year, p_brand1
+	order by d_year, p_brand1
diff --git a/sql/core/src/test/resources/ssb/2.3.sql b/sql/core/src/test/resources/ssb/2.3.sql
new file mode 100644
index 0000000000000..e36530c65e428
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/2.3.sql
@@ -0,0 +1,9 @@
+select sum(lo_revenue), d_year, p_brand1
+	from lineorder, date, part, supplier
+	where lo_orderdate = d_datekey
+		and lo_partkey = p_partkey
+		and lo_suppkey = s_suppkey
+		and p_brand1 = 'MFGR#2221'
+		and s_region = 'EUROPE'
+	group by d_year, p_brand1
+	order by d_year, p_brand1
diff --git a/sql/core/src/test/resources/ssb/3.1.sql b/sql/core/src/test/resources/ssb/3.1.sql
new file mode 100644
index 0000000000000..663ec3fb3d5fa
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/3.1.sql
@@ -0,0 +1,10 @@
+select c_nation, s_nation, d_year, sum(lo_revenue) as revenue
+	from customer, lineorder, supplier, date
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_orderdate = d_datekey
+		and c_region = 'ASIA'
+		and s_region = 'ASIA'
+		and d_year >= 1992 and d_year <= 1997
+	group by c_nation, s_nation, d_year
+	order by d_year asc, revenue desc
diff --git a/sql/core/src/test/resources/ssb/3.2.sql b/sql/core/src/test/resources/ssb/3.2.sql
new file mode 100644
index 0000000000000..e1eaf10cded62
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/3.2.sql
@@ -0,0 +1,10 @@
+select c_city, s_city, d_year, sum(lo_revenue) as revenue
+	from customer, lineorder, supplier, date
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_orderdate = d_datekey
+		and c_nation = 'UNITED STATES'
+		and s_nation = 'UNITED STATES'
+		and d_year >= 1992 and d_year <= 1997
+	group by c_city, s_city, d_year
+	order by d_year asc, revenue desc
diff --git a/sql/core/src/test/resources/ssb/3.3.sql b/sql/core/src/test/resources/ssb/3.3.sql
new file mode 100644
index 0000000000000..f2cb44f15a761
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/3.3.sql
@@ -0,0 +1,12 @@
+select c_city, s_city, d_year, sum(lo_revenue) as revenue
+	from customer, lineorder, supplier, date
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_orderdate = d_datekey
+		and c_nation = 'UNITED KINGDOM'
+		and (c_city='UNITED KI1' or c_city='UNITED KI5')
+		and (s_city='UNITED KI1' or s_city='UNITED KI5')
+		and s_nation = 'UNITED KINGDOM'
+		and d_year >= 1992 and d_year <= 1997
+	group by c_city, s_city, d_year
+	order by d_year asc, revenue desc
diff --git a/sql/core/src/test/resources/ssb/3.4.sql b/sql/core/src/test/resources/ssb/3.4.sql
new file mode 100644
index 0000000000000..936f2464a55f1
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/3.4.sql
@@ -0,0 +1,12 @@
+select c_city, s_city, d_year, sum(lo_revenue) as revenue
+	from customer, lineorder, supplier, date
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_orderdate = d_datekey
+		and c_nation = 'UNITED KINGDOM'
+		and (c_city='UNITED KI1' or c_city='UNITED KI5')
+		and (s_city='UNITED KI1' or s_city='UNITED KI5')
+		and s_nation = 'UNITED KINGDOM'
+		and d_yearmonth = 'Dec1997'
+	group by c_city, s_city, d_year
+	order by d_year asc, revenue desc
diff --git a/sql/core/src/test/resources/ssb/4.1.sql b/sql/core/src/test/resources/ssb/4.1.sql
new file mode 100644
index 0000000000000..af19ecdf58c1f
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/4.1.sql
@@ -0,0 +1,11 @@
+select d_year, c_nation, sum(lo_revenue-lo_supplycost) as profit1
+	from date, customer, supplier, part, lineorder
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_partkey = p_partkey
+		and lo_orderdate = d_datekey
+		and c_region = 'AMERICA'
+		and s_region = 'AMERICA'
+		and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
+	group by d_year, c_nation
+	order by d_year, c_nation
diff --git a/sql/core/src/test/resources/ssb/4.2.sql b/sql/core/src/test/resources/ssb/4.2.sql
new file mode 100644
index 0000000000000..2ffe6e2897522
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/4.2.sql
@@ -0,0 +1,12 @@
+select d_year, s_nation, p_category, sum(lo_revenue-lo_supplycost) as profit1
+	from date, customer, supplier, part, lineorder
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_partkey = p_partkey
+		and lo_orderdate = d_datekey
+		and c_region = 'AMERICA'
+		and s_region = 'AMERICA'
+		and (d_year = 1997 or d_year = 1998)
+		and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
+	group by d_year, s_nation, p_category
+	order by d_year, s_nation, p_category
diff --git a/sql/core/src/test/resources/ssb/4.3.sql b/sql/core/src/test/resources/ssb/4.3.sql
new file mode 100644
index 0000000000000..e24e2cc78c914
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/4.3.sql
@@ -0,0 +1,12 @@
+select d_year, s_city, p_brand1, sum(lo_revenue-lo_supplycost) as profit1
+	from date, customer, supplier, part, lineorder
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_partkey = p_partkey
+		and lo_orderdate = d_datekey
+		and c_region = 'AMERICA'
+		and s_nation = 'UNITED STATES'
+		and (d_year = 1997 or d_year = 1998)
+		and p_category = 'MFGR#14'
+	group by d_year, s_city, p_brand1
+	order by d_year, s_city, p_brand1
diff --git a/sql/core/src/test/resources/test-data/comments.csv b/sql/core/src/test/resources/test-data/comments.csv
index 6275be7285b36..c0ace46db8c00 100644
--- a/sql/core/src/test/resources/test-data/comments.csv
+++ b/sql/core/src/test/resources/test-data/comments.csv
@@ -4,3 +4,4 @@
 6,7,8,9,0,2015-08-21 16:58:01
 ~0,9,8,7,6,2015-08-22 17:59:02
 1,2,3,4,5,2015-08-23 18:00:42
+~ comment in last line to test SPARK-22516 - do not add empty line at the end of this file!
\ No newline at end of file
diff --git a/sql/core/src/test/resources/test-data/impala_timestamp.parq b/sql/core/src/test/resources/test-data/impala_timestamp.parq
new file mode 100644
index 0000000000000..21e5318db98c9
Binary files /dev/null and b/sql/core/src/test/resources/test-data/impala_timestamp.parq differ
diff --git a/sql/core/src/test/resources/tpch/q1.sql b/sql/core/src/test/resources/tpch/q1.sql
new file mode 100644
index 0000000000000..73eb8d8417155
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q1.sql
@@ -0,0 +1,23 @@
+-- using default substitutions
+
+select
+	l_returnflag,
+	l_linestatus,
+	sum(l_quantity) as sum_qty,
+	sum(l_extendedprice) as sum_base_price,
+	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+	avg(l_quantity) as avg_qty,
+	avg(l_extendedprice) as avg_price,
+	avg(l_discount) as avg_disc,
+	count(*) as count_order
+from
+	lineitem
+where
+	l_shipdate <= date '1998-12-01' - interval '90' day
+group by
+	l_returnflag,
+	l_linestatus
+order by
+	l_returnflag,
+	l_linestatus
diff --git a/sql/core/src/test/resources/tpch/q10.sql b/sql/core/src/test/resources/tpch/q10.sql
new file mode 100644
index 0000000000000..3b2ae588dee63
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q10.sql
@@ -0,0 +1,34 @@
+-- using default substitutions
+
+select
+	c_custkey,
+	c_name,
+	sum(l_extendedprice * (1 - l_discount)) as revenue,
+	c_acctbal,
+	n_name,
+	c_address,
+	c_phone,
+	c_comment
+from
+	customer,
+	orders,
+	lineitem,
+	nation
+where
+	c_custkey = o_custkey
+	and l_orderkey = o_orderkey
+	and o_orderdate >= date '1993-10-01'
+	and o_orderdate < date '1993-10-01' + interval '3' month
+	and l_returnflag = 'R'
+	and c_nationkey = n_nationkey
+group by
+	c_custkey,
+	c_name,
+	c_acctbal,
+	c_phone,
+	n_name,
+	c_address,
+	c_comment
+order by
+	revenue desc
+limit 20
diff --git a/sql/core/src/test/resources/tpch/q11.sql b/sql/core/src/test/resources/tpch/q11.sql
new file mode 100644
index 0000000000000..531e78c21b047
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q11.sql
@@ -0,0 +1,29 @@
+-- using default substitutions
+
+select
+	ps_partkey,
+	sum(ps_supplycost * ps_availqty) as value
+from
+	partsupp,
+	supplier,
+	nation
+where
+	ps_suppkey = s_suppkey
+	and s_nationkey = n_nationkey
+	and n_name = 'GERMANY'
+group by
+	ps_partkey having
+		sum(ps_supplycost * ps_availqty) > (
+			select
+				sum(ps_supplycost * ps_availqty) * 0.0001000000
+			from
+				partsupp,
+				supplier,
+				nation
+			where
+				ps_suppkey = s_suppkey
+				and s_nationkey = n_nationkey
+				and n_name = 'GERMANY'
+		)
+order by
+	value desc
diff --git a/sql/core/src/test/resources/tpch/q12.sql b/sql/core/src/test/resources/tpch/q12.sql
new file mode 100644
index 0000000000000..d3e70eb481a9d
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q12.sql
@@ -0,0 +1,30 @@
+-- using default substitutions
+
+select
+	l_shipmode,
+	sum(case
+		when o_orderpriority = '1-URGENT'
+			or o_orderpriority = '2-HIGH'
+			then 1
+		else 0
+	end) as high_line_count,
+	sum(case
+		when o_orderpriority <> '1-URGENT'
+			and o_orderpriority <> '2-HIGH'
+			then 1
+		else 0
+	end) as low_line_count
+from
+	orders,
+	lineitem
+where
+	o_orderkey = l_orderkey
+	and l_shipmode in ('MAIL', 'SHIP')
+	and l_commitdate < l_receiptdate
+	and l_shipdate < l_commitdate
+	and l_receiptdate >= date '1994-01-01'
+	and l_receiptdate < date '1994-01-01' + interval '1' year
+group by
+	l_shipmode
+order by
+	l_shipmode
diff --git a/sql/core/src/test/resources/tpch/q13.sql b/sql/core/src/test/resources/tpch/q13.sql
new file mode 100644
index 0000000000000..3375002c5fd44
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q13.sql
@@ -0,0 +1,22 @@
+-- using default substitutions
+
+select
+	c_count,
+	count(*) as custdist
+from
+	(
+		select
+			c_custkey,
+			count(o_orderkey) as c_count
+		from
+			customer left outer join orders on
+				c_custkey = o_custkey
+				and o_comment not like '%special%requests%'
+		group by
+			c_custkey
+	) as c_orders
+group by
+	c_count
+order by
+	custdist desc,
+	c_count desc
diff --git a/sql/core/src/test/resources/tpch/q14.sql b/sql/core/src/test/resources/tpch/q14.sql
new file mode 100644
index 0000000000000..753ea568914cb
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q14.sql
@@ -0,0 +1,15 @@
+-- using default substitutions
+
+select
+	100.00 * sum(case
+		when p_type like 'PROMO%'
+			then l_extendedprice * (1 - l_discount)
+		else 0
+	end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
+from
+	lineitem,
+	part
+where
+	l_partkey = p_partkey
+	and l_shipdate >= date '1995-09-01'
+	and l_shipdate < date '1995-09-01' + interval '1' month
diff --git a/sql/core/src/test/resources/tpch/q15.sql b/sql/core/src/test/resources/tpch/q15.sql
new file mode 100644
index 0000000000000..64d0b48ec09ae
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q15.sql
@@ -0,0 +1,35 @@
+-- using default substitutions
+
+with revenue0 as
+	(select
+		l_suppkey as supplier_no,
+		sum(l_extendedprice * (1 - l_discount)) as total_revenue
+	from
+		lineitem
+	where
+		l_shipdate >= date '1996-01-01'
+		and l_shipdate < date '1996-01-01' + interval '3' month
+	group by
+		l_suppkey)
+
+
+select
+	s_suppkey,
+	s_name,
+	s_address,
+	s_phone,
+	total_revenue
+from
+	supplier,
+	revenue0
+where
+	s_suppkey = supplier_no
+	and total_revenue = (
+		select
+			max(total_revenue)
+		from
+			revenue0
+	)
+order by
+	s_suppkey
+
diff --git a/sql/core/src/test/resources/tpch/q16.sql b/sql/core/src/test/resources/tpch/q16.sql
new file mode 100644
index 0000000000000..a6ac68898ec8d
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q16.sql
@@ -0,0 +1,32 @@
+-- using default substitutions
+
+select
+	p_brand,
+	p_type,
+	p_size,
+	count(distinct ps_suppkey) as supplier_cnt
+from
+	partsupp,
+	part
+where
+	p_partkey = ps_partkey
+	and p_brand <> 'Brand#45'
+	and p_type not like 'MEDIUM POLISHED%'
+	and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
+	and ps_suppkey not in (
+		select
+			s_suppkey
+		from
+			supplier
+		where
+			s_comment like '%Customer%Complaints%'
+	)
+group by
+	p_brand,
+	p_type,
+	p_size
+order by
+	supplier_cnt desc,
+	p_brand,
+	p_type,
+	p_size
diff --git a/sql/core/src/test/resources/tpch/q17.sql b/sql/core/src/test/resources/tpch/q17.sql
new file mode 100644
index 0000000000000..74fb1f653a945
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q17.sql
@@ -0,0 +1,19 @@
+-- using default substitutions
+
+select
+	sum(l_extendedprice) / 7.0 as avg_yearly
+from
+	lineitem,
+	part
+where
+	p_partkey = l_partkey
+	and p_brand = 'Brand#23'
+	and p_container = 'MED BOX'
+	and l_quantity < (
+		select
+			0.2 * avg(l_quantity)
+		from
+			lineitem
+		where
+			l_partkey = p_partkey
+	)
diff --git a/sql/core/src/test/resources/tpch/q18.sql b/sql/core/src/test/resources/tpch/q18.sql
new file mode 100644
index 0000000000000..210fba19ec7f7
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q18.sql
@@ -0,0 +1,35 @@
+-- using default substitutions
+
+select
+	c_name,
+	c_custkey,
+	o_orderkey,
+	o_orderdate,
+	o_totalprice,
+	sum(l_quantity)
+from
+	customer,
+	orders,
+	lineitem
+where
+	o_orderkey in (
+		select
+			l_orderkey
+		from
+			lineitem
+		group by
+			l_orderkey having
+				sum(l_quantity) > 300
+	)
+	and c_custkey = o_custkey
+	and o_orderkey = l_orderkey
+group by
+	c_name,
+	c_custkey,
+	o_orderkey,
+	o_orderdate,
+	o_totalprice
+order by
+	o_totalprice desc,
+	o_orderdate
+limit 100
\ No newline at end of file
diff --git a/sql/core/src/test/resources/tpch/q19.sql b/sql/core/src/test/resources/tpch/q19.sql
new file mode 100644
index 0000000000000..c07327da3ac29
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q19.sql
@@ -0,0 +1,37 @@
+-- using default substitutions
+
+select
+	sum(l_extendedprice* (1 - l_discount)) as revenue
+from
+	lineitem,
+	part
+where
+	(
+		p_partkey = l_partkey
+		and p_brand = 'Brand#12'
+		and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
+		and l_quantity >= 1 and l_quantity <= 1 + 10
+		and p_size between 1 and 5
+		and l_shipmode in ('AIR', 'AIR REG')
+		and l_shipinstruct = 'DELIVER IN PERSON'
+	)
+	or
+	(
+		p_partkey = l_partkey
+		and p_brand = 'Brand#23'
+		and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
+		and l_quantity >= 10 and l_quantity <= 10 + 10
+		and p_size between 1 and 10
+		and l_shipmode in ('AIR', 'AIR REG')
+		and l_shipinstruct = 'DELIVER IN PERSON'
+	)
+	or
+	(
+		p_partkey = l_partkey
+		and p_brand = 'Brand#34'
+		and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
+		and l_quantity >= 20 and l_quantity <= 20 + 10
+		and p_size between 1 and 15
+		and l_shipmode in ('AIR', 'AIR REG')
+		and l_shipinstruct = 'DELIVER IN PERSON'
+	)
diff --git a/sql/core/src/test/resources/tpch/q2.sql b/sql/core/src/test/resources/tpch/q2.sql
new file mode 100644
index 0000000000000..d0e3b7e13e301
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q2.sql
@@ -0,0 +1,46 @@
+-- using default substitutions
+
+select
+	s_acctbal,
+	s_name,
+	n_name,
+	p_partkey,
+	p_mfgr,
+	s_address,
+	s_phone,
+	s_comment
+from
+	part,
+	supplier,
+	partsupp,
+	nation,
+	region
+where
+	p_partkey = ps_partkey
+	and s_suppkey = ps_suppkey
+	and p_size = 15
+	and p_type like '%BRASS'
+	and s_nationkey = n_nationkey
+	and n_regionkey = r_regionkey
+	and r_name = 'EUROPE'
+	and ps_supplycost = (
+		select
+			min(ps_supplycost)
+		from
+			partsupp,
+			supplier,
+			nation,
+			region
+		where
+			p_partkey = ps_partkey
+			and s_suppkey = ps_suppkey
+			and s_nationkey = n_nationkey
+			and n_regionkey = r_regionkey
+			and r_name = 'EUROPE'
+	)
+order by
+	s_acctbal desc,
+	n_name,
+	s_name,
+	p_partkey
+limit 100
diff --git a/sql/core/src/test/resources/tpch/q20.sql b/sql/core/src/test/resources/tpch/q20.sql
new file mode 100644
index 0000000000000..e161d340b9b6b
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q20.sql
@@ -0,0 +1,39 @@
+-- using default substitutions
+
+select
+	s_name,
+	s_address
+from
+	supplier,
+	nation
+where
+	s_suppkey in (
+		select
+			ps_suppkey
+		from
+			partsupp
+		where
+			ps_partkey in (
+				select
+					p_partkey
+				from
+					part
+				where
+					p_name like 'forest%'
+			)
+			and ps_availqty > (
+				select
+					0.5 * sum(l_quantity)
+				from
+					lineitem
+				where
+					l_partkey = ps_partkey
+					and l_suppkey = ps_suppkey
+					and l_shipdate >= date '1994-01-01'
+					and l_shipdate < date '1994-01-01' + interval '1' year
+			)
+	)
+	and s_nationkey = n_nationkey
+	and n_name = 'CANADA'
+order by
+	s_name
diff --git a/sql/core/src/test/resources/tpch/q21.sql b/sql/core/src/test/resources/tpch/q21.sql
new file mode 100644
index 0000000000000..fdcdfbcf7976e
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q21.sql
@@ -0,0 +1,42 @@
+-- using default substitutions
+
+select
+	s_name,
+	count(*) as numwait
+from
+	supplier,
+	lineitem l1,
+	orders,
+	nation
+where
+	s_suppkey = l1.l_suppkey
+	and o_orderkey = l1.l_orderkey
+	and o_orderstatus = 'F'
+	and l1.l_receiptdate > l1.l_commitdate
+	and exists (
+		select
+			*
+		from
+			lineitem l2
+		where
+			l2.l_orderkey = l1.l_orderkey
+			and l2.l_suppkey <> l1.l_suppkey
+	)
+	and not exists (
+		select
+			*
+		from
+			lineitem l3
+		where
+			l3.l_orderkey = l1.l_orderkey
+			and l3.l_suppkey <> l1.l_suppkey
+			and l3.l_receiptdate > l3.l_commitdate
+	)
+	and s_nationkey = n_nationkey
+	and n_name = 'SAUDI ARABIA'
+group by
+	s_name
+order by
+	numwait desc,
+	s_name
+limit 100
\ No newline at end of file
diff --git a/sql/core/src/test/resources/tpch/q22.sql b/sql/core/src/test/resources/tpch/q22.sql
new file mode 100644
index 0000000000000..1d7706e9a0bf4
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q22.sql
@@ -0,0 +1,39 @@
+-- using default substitutions
+
+select
+	cntrycode,
+	count(*) as numcust,
+	sum(c_acctbal) as totacctbal
+from
+	(
+		select
+			substring(c_phone, 1, 2) as cntrycode,
+			c_acctbal
+		from
+			customer
+		where
+			substring(c_phone, 1, 2) in
+				('13', '31', '23', '29', '30', '18', '17')
+			and c_acctbal > (
+				select
+					avg(c_acctbal)
+				from
+					customer
+				where
+					c_acctbal > 0.00
+					and substring(c_phone, 1, 2) in
+						('13', '31', '23', '29', '30', '18', '17')
+			)
+			and not exists (
+				select
+					*
+				from
+					orders
+				where
+					o_custkey = c_custkey
+			)
+	) as custsale
+group by
+	cntrycode
+order by
+	cntrycode
diff --git a/sql/core/src/test/resources/tpch/q3.sql b/sql/core/src/test/resources/tpch/q3.sql
new file mode 100644
index 0000000000000..948d6bcf12f68
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q3.sql
@@ -0,0 +1,25 @@
+-- using default substitutions
+
+select
+	l_orderkey,
+	sum(l_extendedprice * (1 - l_discount)) as revenue,
+	o_orderdate,
+	o_shippriority
+from
+	customer,
+	orders,
+	lineitem
+where
+	c_mktsegment = 'BUILDING'
+	and c_custkey = o_custkey
+	and l_orderkey = o_orderkey
+	and o_orderdate < date '1995-03-15'
+	and l_shipdate > date '1995-03-15'
+group by
+	l_orderkey,
+	o_orderdate,
+	o_shippriority
+order by
+	revenue desc,
+	o_orderdate
+limit 10
diff --git a/sql/core/src/test/resources/tpch/q4.sql b/sql/core/src/test/resources/tpch/q4.sql
new file mode 100644
index 0000000000000..67330e36a066d
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q4.sql
@@ -0,0 +1,23 @@
+-- using default substitutions
+
+select
+	o_orderpriority,
+	count(*) as order_count
+from
+	orders
+where
+	o_orderdate >= date '1993-07-01'
+	and o_orderdate < date '1993-07-01' + interval '3' month
+	and exists (
+		select
+			*
+		from
+			lineitem
+		where
+			l_orderkey = o_orderkey
+			and l_commitdate < l_receiptdate
+	)
+group by
+	o_orderpriority
+order by
+	o_orderpriority
diff --git a/sql/core/src/test/resources/tpch/q5.sql b/sql/core/src/test/resources/tpch/q5.sql
new file mode 100644
index 0000000000000..b973e9f0a0956
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q5.sql
@@ -0,0 +1,26 @@
+-- using default substitutions
+
+select
+	n_name,
+	sum(l_extendedprice * (1 - l_discount)) as revenue
+from
+	customer,
+	orders,
+	lineitem,
+	supplier,
+	nation,
+	region
+where
+	c_custkey = o_custkey
+	and l_orderkey = o_orderkey
+	and l_suppkey = s_suppkey
+	and c_nationkey = s_nationkey
+	and s_nationkey = n_nationkey
+	and n_regionkey = r_regionkey
+	and r_name = 'ASIA'
+	and o_orderdate >= date '1994-01-01'
+	and o_orderdate < date '1994-01-01' + interval '1' year
+group by
+	n_name
+order by
+	revenue desc
diff --git a/sql/core/src/test/resources/tpch/q6.sql b/sql/core/src/test/resources/tpch/q6.sql
new file mode 100644
index 0000000000000..22294579ee043
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q6.sql
@@ -0,0 +1,11 @@
+-- using default substitutions
+
+select
+	sum(l_extendedprice * l_discount) as revenue
+from
+	lineitem
+where
+	l_shipdate >= date '1994-01-01'
+	and l_shipdate < date '1994-01-01' + interval '1' year
+	and l_discount between .06 - 0.01 and .06 + 0.01
+	and l_quantity < 24
diff --git a/sql/core/src/test/resources/tpch/q7.sql b/sql/core/src/test/resources/tpch/q7.sql
new file mode 100644
index 0000000000000..21105c0519e1b
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q7.sql
@@ -0,0 +1,41 @@
+-- using default substitutions
+
+select
+	supp_nation,
+	cust_nation,
+	l_year,
+	sum(volume) as revenue
+from
+	(
+		select
+			n1.n_name as supp_nation,
+			n2.n_name as cust_nation,
+			year(l_shipdate) as l_year,
+			l_extendedprice * (1 - l_discount) as volume
+		from
+			supplier,
+			lineitem,
+			orders,
+			customer,
+			nation n1,
+			nation n2
+		where
+			s_suppkey = l_suppkey
+			and o_orderkey = l_orderkey
+			and c_custkey = o_custkey
+			and s_nationkey = n1.n_nationkey
+			and c_nationkey = n2.n_nationkey
+			and (
+				(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
+				or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
+			)
+			and l_shipdate between date '1995-01-01' and date '1996-12-31'
+	) as shipping
+group by
+	supp_nation,
+	cust_nation,
+	l_year
+order by
+	supp_nation,
+	cust_nation,
+	l_year
diff --git a/sql/core/src/test/resources/tpch/q8.sql b/sql/core/src/test/resources/tpch/q8.sql
new file mode 100644
index 0000000000000..81d81871c4920
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q8.sql
@@ -0,0 +1,39 @@
+-- using default substitutions
+
+select
+	o_year,
+	sum(case
+		when nation = 'BRAZIL' then volume
+		else 0
+	end) / sum(volume) as mkt_share
+from
+	(
+		select
+			year(o_orderdate) as o_year,
+			l_extendedprice * (1 - l_discount) as volume,
+			n2.n_name as nation
+		from
+			part,
+			supplier,
+			lineitem,
+			orders,
+			customer,
+			nation n1,
+			nation n2,
+			region
+		where
+			p_partkey = l_partkey
+			and s_suppkey = l_suppkey
+			and l_orderkey = o_orderkey
+			and o_custkey = c_custkey
+			and c_nationkey = n1.n_nationkey
+			and n1.n_regionkey = r_regionkey
+			and r_name = 'AMERICA'
+			and s_nationkey = n2.n_nationkey
+			and o_orderdate between date '1995-01-01' and date '1996-12-31'
+			and p_type = 'ECONOMY ANODIZED STEEL'
+	) as all_nations
+group by
+	o_year
+order by
+	o_year
diff --git a/sql/core/src/test/resources/tpch/q9.sql b/sql/core/src/test/resources/tpch/q9.sql
new file mode 100644
index 0000000000000..a4e8e8382be6a
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q9.sql
@@ -0,0 +1,34 @@
+-- using default substitutions
+
+select
+	nation,
+	o_year,
+	sum(amount) as sum_profit
+from
+	(
+		select
+			n_name as nation,
+			year(o_orderdate) as o_year,
+			l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
+		from
+			part,
+			supplier,
+			lineitem,
+			partsupp,
+			orders,
+			nation
+		where
+			s_suppkey = l_suppkey
+			and ps_suppkey = l_suppkey
+			and ps_partkey = l_partkey
+			and p_partkey = l_partkey
+			and o_orderkey = l_orderkey
+			and s_nationkey = n_nationkey
+			and p_name like '%green%'
+	) as profit
+group by
+	nation,
+	o_year
+order by
+	nation,
+	o_year desc
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
new file mode 100644
index 0000000000000..7037749f14478
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.util.Utils
+
+abstract class BenchmarkQueryTest extends QueryTest with SharedSQLContext with BeforeAndAfterAll {
+
+  // When Utils.isTesting is true, the RuleExecutor will issue an exception when hitting
+  // the max iteration of analyzer/optimizer batches.
+  assert(Utils.isTesting, "spark.testing is not set to true")
+
+  /**
+   * Drop all the tables
+   */
+  protected override def afterAll(): Unit = {
+    try {
+      // For debugging dump some statistics about how much time was spent in various optimizer rules
+      logWarning(RuleExecutor.dumpTimeSpent())
+      spark.sessionState.catalog.reset()
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  override def beforeAll() {
+    super.beforeAll()
+    RuleExecutor.resetTime()
+  }
+
+  protected def checkGeneratedCode(plan: SparkPlan): Unit = {
+    val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
+    plan foreach {
+      case s: WholeStageCodegenExec =>
+        codegenSubtrees += s
+      case s => s
+    }
+    codegenSubtrees.toSeq.foreach { subtree =>
+      val code = subtree.doCodeGen()._2
+      try {
+        // Just check the generated code can be properly compiled
+        CodeGenerator.compile(code)
+      } catch {
+        case e: Exception =>
+          val msg =
+            s"""
+               |failed to compile:
+               |Subtree:
+               |$subtree
+               |Generated code:
+               |${CodeFormatter.format(code)}
+             """.stripMargin
+          throw new Exception(msg, e)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 06848e4d2b297..e7776e36702ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql
 
 import scala.util.Random
 
+import org.apache.spark.sql.catalyst.expressions.{Alias, Literal}
+import org.apache.spark.sql.catalyst.expressions.aggregate.Count
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
@@ -27,7 +29,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.test.SQLTestData.DecimalData
-import org.apache.spark.sql.types.{Decimal, DecimalType}
+import org.apache.spark.sql.types.DecimalType
 
 case class Fact(date: Int, hour: Int, minute: Int, room_name: String, temp: Double)
 
@@ -456,7 +458,6 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
 
   test("null moments") {
     val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
-
     checkAnswer(
       emptyTableData.agg(variance('a), var_samp('a), var_pop('a), skewness('a), kurtosis('a)),
       Row(null, null, null, null, null))
@@ -666,4 +667,23 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
       assert(exchangePlans.length == 1)
     }
   }
+
+  Seq(true, false).foreach { codegen =>
+    test("SPARK-22951: dropDuplicates on empty dataFrames should produce correct aggregate " +
+      s"results when codegen is enabled: $codegen") {
+      withSQLConf((SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, codegen.toString)) {
+        // explicit global aggregations
+        val emptyAgg = Map.empty[String, String]
+        checkAnswer(spark.emptyDataFrame.agg(emptyAgg), Seq(Row()))
+        checkAnswer(spark.emptyDataFrame.groupBy().agg(emptyAgg), Seq(Row()))
+        checkAnswer(spark.emptyDataFrame.groupBy().agg(count("*")), Seq(Row(0)))
+        checkAnswer(spark.emptyDataFrame.dropDuplicates().agg(emptyAgg), Seq(Row()))
+        checkAnswer(spark.emptyDataFrame.dropDuplicates().groupBy().agg(emptyAgg), Seq(Row()))
+        checkAnswer(spark.emptyDataFrame.dropDuplicates().groupBy().agg(count("*")), Seq(Row(0)))
+
+        // global aggregation is converted to grouping aggregation:
+        assert(spark.emptyDataFrame.dropDuplicates().count() == 0)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index 46b21c3b64a2e..5169d2b5fc6b2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -260,6 +260,14 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
     assert(res2(1).isEmpty)
   }
 
+  // SPARK-22957: check for 32bit overflow when computing rank.
+  // ignored - takes 4 minutes to run.
+  ignore("approx quantile 4: test for Int overflow") {
+    val res = spark.range(3000000000L).stat.approxQuantile("id", Array(0.8, 0.9), 0.05)
+    assert(res(0) > 2200000000.0)
+    assert(res(1) > 2200000000.0)
+  }
+
   test("crosstab") {
     withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
       val rng = new Random()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index c9d2cf5b45ce1..1896697550ae7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -359,6 +359,63 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       testData.select('key).collect().toSeq)
   }
 
+  test("repartition with SortOrder") {
+    // passing SortOrder expressions to .repartition() should result in an informative error
+
+    def checkSortOrderErrorMsg[T](data: => Dataset[T]): Unit = {
+      val ex = intercept[IllegalArgumentException](data)
+      assert(ex.getMessage.contains("repartitionByRange"))
+    }
+
+    checkSortOrderErrorMsg {
+      Seq(0).toDF("a").repartition(2, $"a".asc)
+    }
+
+    checkSortOrderErrorMsg {
+      Seq((0, 0)).toDF("a", "b").repartition(2, $"a".asc, $"b")
+    }
+  }
+
+  test("repartitionByRange") {
+    val data1d = Random.shuffle(0.to(9))
+    val data2d = data1d.map(i => (i, data1d.size - i))
+
+    checkAnswer(
+      data1d.toDF("val").repartitionByRange(data1d.size, $"val".asc)
+        .select(spark_partition_id().as("id"), $"val"),
+      data1d.map(i => Row(i, i)))
+
+    checkAnswer(
+      data1d.toDF("val").repartitionByRange(data1d.size, $"val".desc)
+        .select(spark_partition_id().as("id"), $"val"),
+      data1d.map(i => Row(i, data1d.size - 1 - i)))
+
+    checkAnswer(
+      data1d.toDF("val").repartitionByRange(data1d.size, lit(42))
+        .select(spark_partition_id().as("id"), $"val"),
+      data1d.map(i => Row(0, i)))
+
+    checkAnswer(
+      data1d.toDF("val").repartitionByRange(data1d.size, lit(null), $"val".asc, rand())
+        .select(spark_partition_id().as("id"), $"val"),
+      data1d.map(i => Row(i, i)))
+
+    // .repartitionByRange() assumes .asc by default if no explicit sort order is specified
+    checkAnswer(
+      data2d.toDF("a", "b").repartitionByRange(data2d.size, $"a".desc, $"b")
+        .select(spark_partition_id().as("id"), $"a", $"b"),
+      data2d.toDF("a", "b").repartitionByRange(data2d.size, $"a".desc, $"b".asc)
+        .select(spark_partition_id().as("id"), $"a", $"b"))
+
+    // at least one partition-by expression must be specified
+    intercept[IllegalArgumentException] {
+      data1d.toDF("val").repartitionByRange(data1d.size)
+    }
+    intercept[IllegalArgumentException] {
+      data1d.toDF("val").repartitionByRange(data1d.size, Seq.empty: _*)
+    }
+  }
+
   test("coalesce") {
     intercept[IllegalArgumentException] {
       testData.select('key).coalesce(0)
@@ -1199,6 +1256,34 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     assert(testData.select($"*").showString(1, vertical = true) === expectedAnswer)
   }
 
+  test("SPARK-23023 Cast rows to strings in showString") {
+    val df1 = Seq(Seq(1, 2, 3, 4)).toDF("a")
+    assert(df1.showString(10) ===
+      s"""+------------+
+         ||           a|
+         |+------------+
+         ||[1, 2, 3, 4]|
+         |+------------+
+         |""".stripMargin)
+    val df2 = Seq(Map(1 -> "a", 2 -> "b")).toDF("a")
+    assert(df2.showString(10) ===
+      s"""+----------------+
+         ||               a|
+         |+----------------+
+         ||[1 -> a, 2 -> b]|
+         |+----------------+
+         |""".stripMargin)
+    val df3 = Seq(((1, "a"), 0), ((2, "b"), 0)).toDF("a", "b")
+    assert(df3.showString(10) ===
+      s"""+------+---+
+         ||     a|  b|
+         |+------+---+
+         ||[1, a]|  0|
+         ||[2, b]|  0|
+         |+------+---+
+         |""".stripMargin)
+  }
+
   test("SPARK-7327 show with empty dataFrame") {
     val expectedAnswer = """+---+-----+
                            ||key|value|
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index ea725af8d1ad8..01c988ecc3726 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
 
+import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
 import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -518,9 +519,46 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
       Seq(Row(3, "1", null, 3.0, 4.0, 3.0), Row(5, "1", false, 4.0, 5.0, 5.0)))
   }
 
+  test("Window spill with less than the inMemoryThreshold") {
+    val df = Seq((1, "1"), (2, "2"), (1, "3"), (2, "4")).toDF("key", "value")
+    val window = Window.partitionBy($"key").orderBy($"value")
+
+    withSQLConf(SQLConf.WINDOW_EXEC_BUFFER_IN_MEMORY_THRESHOLD.key -> "2",
+      SQLConf.WINDOW_EXEC_BUFFER_SPILL_THRESHOLD.key -> "2") {
+      assertNotSpilled(sparkContext, "select") {
+        df.select($"key", sum("value").over(window)).collect()
+      }
+    }
+  }
+
+  test("Window spill with more than the inMemoryThreshold but less than the spillThreshold") {
+    val df = Seq((1, "1"), (2, "2"), (1, "3"), (2, "4")).toDF("key", "value")
+    val window = Window.partitionBy($"key").orderBy($"value")
+
+    withSQLConf(SQLConf.WINDOW_EXEC_BUFFER_IN_MEMORY_THRESHOLD.key -> "1",
+      SQLConf.WINDOW_EXEC_BUFFER_SPILL_THRESHOLD.key -> "2") {
+      assertNotSpilled(sparkContext, "select") {
+        df.select($"key", sum("value").over(window)).collect()
+      }
+    }
+  }
+
+  test("Window spill with more than the inMemoryThreshold and spillThreshold") {
+    val df = Seq((1, "1"), (2, "2"), (1, "3"), (2, "4")).toDF("key", "value")
+    val window = Window.partitionBy($"key").orderBy($"value")
+
+    withSQLConf(SQLConf.WINDOW_EXEC_BUFFER_IN_MEMORY_THRESHOLD.key -> "1",
+      SQLConf.WINDOW_EXEC_BUFFER_SPILL_THRESHOLD.key -> "1") {
+      assertSpilled(sparkContext, "select") {
+        df.select($"key", sum("value").over(window)).collect()
+      }
+    }
+  }
+
   test("SPARK-21258: complex object in combination with spilling") {
     // Make sure we trigger the spilling path.
-    withSQLConf(SQLConf.WINDOW_EXEC_BUFFER_SPILL_THRESHOLD.key -> "17") {
+    withSQLConf(SQLConf.WINDOW_EXEC_BUFFER_IN_MEMORY_THRESHOLD.key -> "1",
+      SQLConf.WINDOW_EXEC_BUFFER_SPILL_THRESHOLD.key -> "17") {
       val sampleSchema = new StructType().
         add("f0", StringType).
         add("f1", LongType).
@@ -558,7 +596,9 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
 
       import testImplicits._
 
-      spark.read.schema(sampleSchema).json(input.toDS()).select(c0, c1).foreach { _ => () }
+      assertSpilled(sparkContext, "select") {
+        spark.read.schema(sampleSchema).json(input.toDS()).select(c0, c1).foreach { _ => () }
+      }
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index b02db7721aa7f..49c59cf695dc1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -660,7 +660,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     val e = intercept[AnalysisException] {
       df.as[KryoData]
     }.message
-    assert(e.contains("cannot cast IntegerType to BinaryType"))
+    assert(e.contains("cannot cast int to binary"))
   }
 
   test("Java encoder") {
@@ -958,12 +958,12 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     ).toDS()
 
     val expected =
-      """+-------+
-        ||      f|
-        |+-------+
-        ||[foo,1]|
-        ||[bar,2]|
-        |+-------+
+      """+--------+
+        ||       f|
+        |+--------+
+        ||[foo, 1]|
+        ||[bar, 2]|
+        |+--------+
         |""".stripMargin
 
     checkShowString(ds, expected)
@@ -1156,67 +1156,82 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
   }
 
   Seq(true, false).foreach { eager =>
-    def testCheckpointing(testName: String)(f: => Unit): Unit = {
-      test(s"Dataset.checkpoint() - $testName (eager = $eager)") {
-        withTempDir { dir =>
-          val originalCheckpointDir = spark.sparkContext.checkpointDir
-
-          try {
-            spark.sparkContext.setCheckpointDir(dir.getCanonicalPath)
+    Seq(true, false).foreach { reliable =>
+      def testCheckpointing(testName: String)(f: => Unit): Unit = {
+        test(s"Dataset.checkpoint() - $testName (eager = $eager, reliable = $reliable)") {
+          if (reliable) {
+            withTempDir { dir =>
+              val originalCheckpointDir = spark.sparkContext.checkpointDir
+
+              try {
+                spark.sparkContext.setCheckpointDir(dir.getCanonicalPath)
+                f
+              } finally {
+                // Since the original checkpointDir can be None, we need
+                // to set the variable directly.
+                spark.sparkContext.checkpointDir = originalCheckpointDir
+              }
+            }
+          } else {
+            // Local checkpoints dont require checkpoint_dir
             f
-          } finally {
-            // Since the original checkpointDir can be None, we need
-            // to set the variable directly.
-            spark.sparkContext.checkpointDir = originalCheckpointDir
           }
         }
       }
-    }
 
-    testCheckpointing("basic") {
-      val ds = spark.range(10).repartition('id % 2).filter('id > 5).orderBy('id.desc)
-      val cp = ds.checkpoint(eager)
+      testCheckpointing("basic") {
+        val ds = spark.range(10).repartition('id % 2).filter('id > 5).orderBy('id.desc)
+        val cp = if (reliable) ds.checkpoint(eager) else ds.localCheckpoint(eager)
 
-      val logicalRDD = cp.logicalPlan match {
-        case plan: LogicalRDD => plan
-        case _ =>
-          val treeString = cp.logicalPlan.treeString(verbose = true)
-          fail(s"Expecting a LogicalRDD, but got\n$treeString")
-      }
+        val logicalRDD = cp.logicalPlan match {
+          case plan: LogicalRDD => plan
+          case _ =>
+            val treeString = cp.logicalPlan.treeString(verbose = true)
+            fail(s"Expecting a LogicalRDD, but got\n$treeString")
+        }
 
-      val dsPhysicalPlan = ds.queryExecution.executedPlan
-      val cpPhysicalPlan = cp.queryExecution.executedPlan
+        val dsPhysicalPlan = ds.queryExecution.executedPlan
+        val cpPhysicalPlan = cp.queryExecution.executedPlan
 
-      assertResult(dsPhysicalPlan.outputPartitioning) { logicalRDD.outputPartitioning }
-      assertResult(dsPhysicalPlan.outputOrdering) { logicalRDD.outputOrdering }
+        assertResult(dsPhysicalPlan.outputPartitioning) {
+          logicalRDD.outputPartitioning
+        }
+        assertResult(dsPhysicalPlan.outputOrdering) {
+          logicalRDD.outputOrdering
+        }
 
-      assertResult(dsPhysicalPlan.outputPartitioning) { cpPhysicalPlan.outputPartitioning }
-      assertResult(dsPhysicalPlan.outputOrdering) { cpPhysicalPlan.outputOrdering }
+        assertResult(dsPhysicalPlan.outputPartitioning) {
+          cpPhysicalPlan.outputPartitioning
+        }
+        assertResult(dsPhysicalPlan.outputOrdering) {
+          cpPhysicalPlan.outputOrdering
+        }
 
-      // For a lazy checkpoint() call, the first check also materializes the checkpoint.
-      checkDataset(cp, (9L to 6L by -1L).map(java.lang.Long.valueOf): _*)
+        // For a lazy checkpoint() call, the first check also materializes the checkpoint.
+        checkDataset(cp, (9L to 6L by -1L).map(java.lang.Long.valueOf): _*)
 
-      // Reads back from checkpointed data and check again.
-      checkDataset(cp, (9L to 6L by -1L).map(java.lang.Long.valueOf): _*)
-    }
+        // Reads back from checkpointed data and check again.
+        checkDataset(cp, (9L to 6L by -1L).map(java.lang.Long.valueOf): _*)
+      }
 
-    testCheckpointing("should preserve partitioning information") {
-      val ds = spark.range(10).repartition('id % 2)
-      val cp = ds.checkpoint(eager)
+      testCheckpointing("should preserve partitioning information") {
+        val ds = spark.range(10).repartition('id % 2)
+        val cp = if (reliable) ds.checkpoint(eager) else ds.localCheckpoint(eager)
 
-      val agg = cp.groupBy('id % 2).agg(count('id))
+        val agg = cp.groupBy('id % 2).agg(count('id))
 
-      agg.queryExecution.executedPlan.collectFirst {
-        case ShuffleExchangeExec(_, _: RDDScanExec, _) =>
-        case BroadcastExchangeExec(_, _: RDDScanExec) =>
-      }.foreach { _ =>
-        fail(
-          "No Exchange should be inserted above RDDScanExec since the checkpointed Dataset " +
-            "preserves partitioning information:\n\n" + agg.queryExecution
-        )
-      }
+        agg.queryExecution.executedPlan.collectFirst {
+          case ShuffleExchangeExec(_, _: RDDScanExec, _) =>
+          case BroadcastExchangeExec(_, _: RDDScanExec) =>
+        }.foreach { _ =>
+          fail(
+            "No Exchange should be inserted above RDDScanExec since the checkpointed Dataset " +
+              "preserves partitioning information:\n\n" + agg.queryExecution
+          )
+        }
 
-      checkAnswer(agg, ds.groupBy('id % 2).agg(count('id)))
+        checkAnswer(agg, ds.groupBy('id % 2).agg(count('id)))
+      }
     }
   }
 
@@ -1426,6 +1441,11 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       assert(e.getCause.isInstanceOf[NullPointerException])
     }
   }
+
+  test("SPARK-23025: Add support for null type in scala reflection") {
+    val data = Seq(("a", null))
+    checkDataset(data.toDS(), data: _*)
+  }
 }
 
 case class SingleData(id: Int)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 3a8694839bb24..6bbf38516cdf6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -435,6 +435,52 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
       Seq(Row(Date.valueOf("2015-07-01")), Row(Date.valueOf("2014-12-01"))))
   }
 
+  test("function date_trunc") {
+    val df = Seq(
+      (1, Timestamp.valueOf("2015-07-22 10:01:40.523")),
+      (2, Timestamp.valueOf("2014-12-31 05:29:06.876"))).toDF("i", "t")
+
+    checkAnswer(
+      df.select(date_trunc("YY", col("t"))),
+      Seq(Row(Timestamp.valueOf("2015-01-01 00:00:00")),
+        Row(Timestamp.valueOf("2014-01-01 00:00:00"))))
+
+    checkAnswer(
+      df.selectExpr("date_trunc('MONTH', t)"),
+      Seq(Row(Timestamp.valueOf("2015-07-01 00:00:00")),
+        Row(Timestamp.valueOf("2014-12-01 00:00:00"))))
+
+    checkAnswer(
+      df.selectExpr("date_trunc('DAY', t)"),
+      Seq(Row(Timestamp.valueOf("2015-07-22 00:00:00")),
+        Row(Timestamp.valueOf("2014-12-31 00:00:00"))))
+
+    checkAnswer(
+      df.selectExpr("date_trunc('HOUR', t)"),
+      Seq(Row(Timestamp.valueOf("2015-07-22 10:00:00")),
+        Row(Timestamp.valueOf("2014-12-31 05:00:00"))))
+
+    checkAnswer(
+      df.selectExpr("date_trunc('MINUTE', t)"),
+      Seq(Row(Timestamp.valueOf("2015-07-22 10:01:00")),
+        Row(Timestamp.valueOf("2014-12-31 05:29:00"))))
+
+    checkAnswer(
+      df.selectExpr("date_trunc('SECOND', t)"),
+      Seq(Row(Timestamp.valueOf("2015-07-22 10:01:40")),
+        Row(Timestamp.valueOf("2014-12-31 05:29:06"))))
+
+    checkAnswer(
+      df.selectExpr("date_trunc('WEEK', t)"),
+      Seq(Row(Timestamp.valueOf("2015-07-20 00:00:00")),
+        Row(Timestamp.valueOf("2014-12-29 00:00:00"))))
+
+    checkAnswer(
+      df.selectExpr("date_trunc('QUARTER', t)"),
+      Seq(Row(Timestamp.valueOf("2015-07-01 00:00:00")),
+        Row(Timestamp.valueOf("2014-10-01 00:00:00"))))
+  }
+
   test("from_unixtime") {
     val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
     val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index 6b98209fd49b8..109fcf90a3ec9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -65,7 +65,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSQLContext {
     val m3 = intercept[AnalysisException] {
       df.selectExpr("stack(2, 1, '2.2')")
     }.getMessage
-    assert(m3.contains("data type mismatch: Argument 1 (IntegerType) != Argument 2 (StringType)"))
+    assert(m3.contains("data type mismatch: Argument 1 (int) != Argument 2 (string)"))
 
     // stack on column data
     val df2 = Seq((2, 1, 2, 3)).toDF("n", "a", "b", "c")
@@ -80,7 +80,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSQLContext {
     val m5 = intercept[AnalysisException] {
       df3.selectExpr("stack(2, a, b)")
     }.getMessage
-    assert(m5.contains("data type mismatch: Argument 1 (IntegerType) != Argument 2 (DoubleType)"))
+    assert(m5.contains("data type mismatch: Argument 1 (int) != Argument 2 (double)"))
 
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index fcaca3d75b74f..9fb8be423614b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -297,31 +297,47 @@ object QueryTest {
     })
   }
 
+  private def genError(
+      expectedAnswer: Seq[Row],
+      sparkAnswer: Seq[Row],
+      isSorted: Boolean = false): String = {
+    val getRowType: Option[Row] => String = row =>
+      row.map(row =>
+        if (row.schema == null) {
+          "struct<>"
+        } else {
+          s"${row.schema.catalogString}"
+        }).getOrElse("struct<>")
+
+    s"""
+       |== Results ==
+       |${
+      sideBySide(
+        s"== Correct Answer - ${expectedAnswer.size} ==" +:
+          getRowType(expectedAnswer.headOption) +:
+          prepareAnswer(expectedAnswer, isSorted).map(_.toString()),
+        s"== Spark Answer - ${sparkAnswer.size} ==" +:
+          getRowType(sparkAnswer.headOption) +:
+          prepareAnswer(sparkAnswer, isSorted).map(_.toString())).mkString("\n")
+    }
+    """.stripMargin
+  }
+
+  def includesRows(
+      expectedRows: Seq[Row],
+      sparkAnswer: Seq[Row]): Option[String] = {
+    if (!prepareAnswer(expectedRows, true).toSet.subsetOf(prepareAnswer(sparkAnswer, true).toSet)) {
+      return Some(genError(expectedRows, sparkAnswer, true))
+    }
+    None
+  }
+
   def sameRows(
       expectedAnswer: Seq[Row],
       sparkAnswer: Seq[Row],
       isSorted: Boolean = false): Option[String] = {
     if (prepareAnswer(expectedAnswer, isSorted) != prepareAnswer(sparkAnswer, isSorted)) {
-      val getRowType: Option[Row] => String = row =>
-        row.map(row =>
-            if (row.schema == null) {
-              "struct<>"
-            } else {
-                s"${row.schema.catalogString}"
-            }).getOrElse("struct<>")
-
-      val errorMessage =
-        s"""
-         |== Results ==
-         |${sideBySide(
-        s"== Correct Answer - ${expectedAnswer.size} ==" +:
-         getRowType(expectedAnswer.headOption) +:
-         prepareAnswer(expectedAnswer, isSorted).map(_.toString()),
-        s"== Spark Answer - ${sparkAnswer.size} ==" +:
-         getRowType(sparkAnswer.headOption) +:
-         prepareAnswer(sparkAnswer, isSorted).map(_.toString())).mkString("\n")}
-        """.stripMargin
-      return Some(errorMessage)
+      return Some(genError(expectedAnswer, sparkAnswer, isSorted))
     }
     None
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 5d0bba69daca1..96bf65fce9c4a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -1664,7 +1664,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     e = intercept[AnalysisException] {
       sql(s"select id from `org.apache.spark.sql.hive.orc`.`file_path`")
     }
-    assert(e.message.contains("The ORC data source must be used with Hive support enabled"))
+    assert(e.message.contains("Hive built-in ORC data source must be used with Hive support"))
 
     e = intercept[AnalysisException] {
       sql(s"select id from `com.databricks.spark.avro`.`file_path`")
@@ -2757,4 +2757,20 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  // Only New OrcFileFormat supports this
+  Seq(classOf[org.apache.spark.sql.execution.datasources.orc.OrcFileFormat].getCanonicalName,
+      "parquet").foreach { format =>
+    test(s"SPARK-15474 Write and read back non-emtpy schema with empty dataframe - $format") {
+      withTempPath { file =>
+        val path = file.getCanonicalPath
+        val emptyDf = Seq((true, 1, "str")).toDF.limit(0)
+        emptyDf.write.format(format).save(path)
+
+        val df = spark.read.format(format).load(path)
+        assert(df.schema.sameType(emptyDf.schema))
+        checkAnswer(df, emptyDf)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
new file mode 100644
index 0000000000000..9a0c61b3304c5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.util.resourceToString
+
+/**
+ * This test suite ensures all the Star Schema Benchmark queries can be successfully analyzed,
+ * optimized and compiled without hitting the max iteration threshold.
+ */
+class SSBQuerySuite extends BenchmarkQueryTest {
+
+  override def beforeAll {
+    super.beforeAll
+
+    sql(
+      """
+        |CREATE TABLE `part` (`p_partkey` INT, `p_name` STRING, `p_mfgr` STRING,
+        |`p_category` STRING, `p_brand1` STRING, `p_color` STRING, `p_type` STRING, `p_size` INT,
+        |`p_container` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `supplier` (`s_suppkey` INT, `s_name` STRING, `s_address` STRING,
+        |`s_city` STRING, `s_nation` STRING, `s_region` STRING, `s_phone` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `customer` (`c_custkey` INT, `c_name` STRING, `c_address` STRING,
+        |`c_city` STRING, `c_nation` STRING, `c_region` STRING, `c_phone` STRING,
+        |`c_mktsegment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `date` (`d_datekey` INT, `d_date` STRING, `d_dayofweek` STRING,
+        |`d_month` STRING, `d_year` INT, `d_yearmonthnum` INT, `d_yearmonth` STRING,
+        |`d_daynuminweek` INT, `d_daynuminmonth` INT, `d_daynuminyear` INT, `d_monthnuminyear` INT,
+        |`d_weeknuminyear` INT, `d_sellingseason` STRING, `d_lastdayinweekfl` STRING,
+        |`d_lastdayinmonthfl` STRING, `d_holidayfl` STRING, `d_weekdayfl` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `lineorder` (`lo_orderkey` INT, `lo_linenumber` INT, `lo_custkey` INT,
+        |`lo_partkey` INT, `lo_suppkey` INT, `lo_orderdate` INT, `lo_orderpriority` STRING,
+        |`lo_shippriority` STRING, `lo_quantity` INT, `lo_extendedprice` INT,
+        |`lo_ordertotalprice` INT, `lo_discount` INT, `lo_revenue` INT, `lo_supplycost` INT,
+        |`lo_tax` INT, `lo_commitdate` INT, `lo_shipmode` STRING)
+        |USING parquet
+      """.stripMargin)
+  }
+
+  val ssbQueries = Seq(
+    "1.1", "1.2", "1.3", "2.1", "2.2", "2.3", "3.1", "3.2", "3.3", "3.4", "4.1", "4.2", "4.3")
+
+  ssbQueries.foreach { name =>
+    val queryString = resourceToString(s"ssb/$name.sql",
+      classLoader = Thread.currentThread.getContextClassLoader)
+    test(name) {
+      // check the plans can be properly generated
+      val plan = sql(queryString).queryExecution.executedPlan
+      checkGeneratedCode(plan)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index fba5d2652d3f5..b11e798532056 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -85,13 +85,24 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
   test("analyze empty table") {
     val table = "emptyTable"
     withTable(table) {
-      sql(s"CREATE TABLE $table (key STRING, value STRING) USING PARQUET")
+      val df = Seq.empty[Int].toDF("key")
+      df.write.format("json").saveAsTable(table)
       sql(s"ANALYZE TABLE $table COMPUTE STATISTICS noscan")
       val fetchedStats1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
       assert(fetchedStats1.get.sizeInBytes == 0)
       sql(s"ANALYZE TABLE $table COMPUTE STATISTICS")
       val fetchedStats2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
       assert(fetchedStats2.get.sizeInBytes == 0)
+
+      val expectedColStat =
+        "key" -> ColumnStat(0, None, None, 0, IntegerType.defaultSize, IntegerType.defaultSize)
+
+      // There won't be histogram for empty column.
+      Seq("true", "false").foreach { histogramEnabled =>
+        withSQLConf(SQLConf.HISTOGRAM_ENABLED.key -> histogramEnabled) {
+          checkColStats(df, mutable.LinkedHashMap(expectedColStat))
+        }
+      }
     }
   }
 
@@ -178,7 +189,13 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     val expectedColStats = dataTypes.map { case (tpe, idx) =>
       (s"col$idx", ColumnStat(0, None, None, 1, tpe.defaultSize.toLong, tpe.defaultSize.toLong))
     }
-    checkColStats(df, mutable.LinkedHashMap(expectedColStats: _*))
+
+    // There won't be histograms for null columns.
+    Seq("true", "false").foreach { histogramEnabled =>
+      withSQLConf(SQLConf.HISTOGRAM_ENABLED.key -> histogramEnabled) {
+        checkColStats(df, mutable.LinkedHashMap(expectedColStats: _*))
+      }
+    }
   }
 
   test("number format in statistics") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
index e47d4b0ee25d4..1a584187a06e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
@@ -17,36 +17,18 @@
 
 package org.apache.spark.sql
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.sql.catalyst.util.resourceToString
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.util.Utils
 
 /**
- * This test suite ensures all the TPC-DS queries can be successfully analyzed and optimized
- * without hitting the max iteration threshold.
+ * This test suite ensures all the TPC-DS queries can be successfully analyzed, optimized
+ * and compiled without hitting the max iteration threshold.
  */
-class TPCDSQuerySuite extends QueryTest with SharedSQLContext with BeforeAndAfterAll {
-
-  // When Utils.isTesting is true, the RuleExecutor will issue an exception when hitting
-  // the max iteration of analyzer/optimizer batches.
-  assert(Utils.isTesting, "spark.testing is not set to true")
-
-  /**
-   * Drop all the tables
-   */
-  protected override def afterAll(): Unit = {
-    try {
-      spark.sessionState.catalog.reset()
-    } finally {
-      super.afterAll()
-    }
-  }
+class TPCDSQuerySuite extends BenchmarkQueryTest {
 
   override def beforeAll() {
     super.beforeAll()
+
     sql(
       """
         |CREATE TABLE `catalog_page` (
@@ -350,8 +332,9 @@ class TPCDSQuerySuite extends QueryTest with SharedSQLContext with BeforeAndAfte
       classLoader = Thread.currentThread().getContextClassLoader)
     test(name) {
       withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
-        // Just check the plans can be properly generated
-        sql(queryString).queryExecution.executedPlan
+        // check the plans can be properly generated
+        val plan = sql(queryString).queryExecution.executedPlan
+        checkGeneratedCode(plan)
       }
     }
   }
@@ -365,8 +348,9 @@ class TPCDSQuerySuite extends QueryTest with SharedSQLContext with BeforeAndAfte
     val queryString = resourceToString(s"tpcds-modifiedQueries/$name.sql",
       classLoader = Thread.currentThread().getContextClassLoader)
     test(s"modified-$name") {
-      // Just check the plans can be properly generated
-      sql(queryString).queryExecution.executedPlan
+      // check the plans can be properly generated
+      val plan = sql(queryString).queryExecution.executedPlan
+      checkGeneratedCode(plan)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
new file mode 100644
index 0000000000000..e3e700529bba7
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.util.resourceToString
+
+/**
+ * This test suite ensures all the TPC-H queries can be successfully analyzed, optimized
+ * and compiled without hitting the max iteration threshold.
+ */
+class TPCHQuerySuite extends BenchmarkQueryTest {
+
+  override def beforeAll() {
+    super.beforeAll()
+
+    sql(
+      """
+        |CREATE TABLE `orders` (
+        |`o_orderkey` BIGINT, `o_custkey` BIGINT, `o_orderstatus` STRING,
+        |`o_totalprice` DECIMAL(10,0), `o_orderdate` DATE, `o_orderpriority` STRING,
+        |`o_clerk` STRING, `o_shippriority` INT, `o_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `nation` (
+        |`n_nationkey` BIGINT, `n_name` STRING, `n_regionkey` BIGINT, `n_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `region` (
+        |`r_regionkey` BIGINT, `r_name` STRING, `r_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `part` (`p_partkey` BIGINT, `p_name` STRING, `p_mfgr` STRING,
+        |`p_brand` STRING, `p_type` STRING, `p_size` INT, `p_container` STRING,
+        |`p_retailprice` DECIMAL(10,0), `p_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `partsupp` (`ps_partkey` BIGINT, `ps_suppkey` BIGINT,
+        |`ps_availqty` INT, `ps_supplycost` DECIMAL(10,0), `ps_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `customer` (`c_custkey` BIGINT, `c_name` STRING, `c_address` STRING,
+        |`c_nationkey` STRING, `c_phone` STRING, `c_acctbal` DECIMAL(10,0),
+        |`c_mktsegment` STRING, `c_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `supplier` (`s_suppkey` BIGINT, `s_name` STRING, `s_address` STRING,
+        |`s_nationkey` BIGINT, `s_phone` STRING, `s_acctbal` DECIMAL(10,0), `s_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `lineitem` (`l_orderkey` BIGINT, `l_partkey` BIGINT, `l_suppkey` BIGINT,
+        |`l_linenumber` INT, `l_quantity` DECIMAL(10,0), `l_extendedprice` DECIMAL(10,0),
+        |`l_discount` DECIMAL(10,0), `l_tax` DECIMAL(10,0), `l_returnflag` STRING,
+        |`l_linestatus` STRING, `l_shipdate` DATE, `l_commitdate` DATE, `l_receiptdate` DATE,
+        |`l_shipinstruct` STRING, `l_shipmode` STRING, `l_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+  }
+
+  val tpchQueries = Seq(
+    "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11",
+    "q12", "q13", "q14", "q15", "q16", "q17", "q18", "q19", "q20", "q21", "q22")
+
+  tpchQueries.foreach { name =>
+    val queryString = resourceToString(s"tpch/$name.sql",
+      classLoader = Thread.currentThread().getContextClassLoader)
+    test(name) {
+      // check the plans can be properly generated
+      val plan = sql(queryString).queryExecution.executedPlan
+      checkGeneratedCode(plan)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 7f1c009ca6e7a..db37be68e42e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.api.java._
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.command.ExplainCommand
-import org.apache.spark.sql.functions.{col, udf}
+import org.apache.spark.sql.functions.udf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.test.SQLTestData._
-import org.apache.spark.sql.types.DataTypes
+import org.apache.spark.sql.types.{DataTypes, DoubleType}
 
 private case class FunctionResult(f1: String, f2: String)
 
@@ -128,6 +129,13 @@ class UDFSuite extends QueryTest with SharedSQLContext {
     val df2 = testData.select(bar())
     assert(df2.logicalPlan.asInstanceOf[Project].projectList.forall(!_.deterministic))
     assert(df2.head().getDouble(0) >= 0.0)
+
+    val javaUdf = udf(new UDF0[Double] {
+      override def call(): Double = Math.random()
+    }, DoubleType).asNondeterministic()
+    val df3 = testData.select(javaUdf())
+    assert(df3.logicalPlan.asInstanceOf[Project].projectList.forall(!_.deterministic))
+    assert(df3.head().getDouble(0) >= 0.0)
   }
 
   test("TwoArgument UDF") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
index a08433ba794d9..cc8b600efa46a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -21,7 +21,7 @@ import scala.beans.{BeanInfo, BeanProperty}
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
-import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
+import org.apache.spark.sql.catalyst.expressions.{Cast, ExpressionEvalHelper, GenericInternalRow, Literal}
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
 import org.apache.spark.sql.functions._
@@ -44,6 +44,8 @@ object UDT {
       case v: MyDenseVector => java.util.Arrays.equals(this.data, v.data)
       case _ => false
     }
+
+    override def toString: String = data.mkString("(", ", ", ")")
   }
 
   private[sql] class MyDenseVectorUDT extends UserDefinedType[MyDenseVector] {
@@ -143,7 +145,8 @@ private[spark] class ExampleSubTypeUDT extends UserDefinedType[IExampleSubType]
   override def userClass: Class[IExampleSubType] = classOf[IExampleSubType]
 }
 
-class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetTest {
+class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetTest
+    with ExpressionEvalHelper {
   import testImplicits._
 
   private lazy val pointsRDD = Seq(
@@ -304,4 +307,12 @@ class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetT
       pointsRDD.except(pointsRDD2),
       Seq(Row(0.0, new UDT.MyDenseVector(Array(0.2, 2.0)))))
   }
+
+  test("SPARK-23054 Cast UserDefinedType to string") {
+    val udt = new UDT.MyDenseVectorUDT()
+    val vector = new UDT.MyDenseVector(Array(1.0, 3.0, 5.0, 7.0, 9.0))
+    val data = udt.serialize(vector)
+    val ret = Cast(Literal(data, udt), StringType, None)
+    checkEvaluation(ret, "(1.0, 3.0, 5.0, 7.0, 9.0)")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
index 423e1288e8dcb..c8d045a32d73c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
@@ -20,6 +20,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 
 /**
@@ -52,4 +53,34 @@ class DataSourceScanExecRedactionSuite extends QueryTest with SharedSQLContext {
       assert(df.queryExecution.simpleString.contains(replacement))
     }
   }
+
+  private def isIncluded(queryExecution: QueryExecution, msg: String): Boolean = {
+    queryExecution.toString.contains(msg) ||
+    queryExecution.simpleString.contains(msg) ||
+    queryExecution.stringWithStats.contains(msg)
+  }
+
+  test("explain is redacted using SQLConf") {
+    withTempDir { dir =>
+      val basePath = dir.getCanonicalPath
+      spark.range(0, 10).toDF("a").write.parquet(new Path(basePath, "foo=1").toString)
+      val df = spark.read.parquet(basePath)
+      val replacement = "*********"
+
+      // Respect SparkConf and replace file:/
+      assert(isIncluded(df.queryExecution, replacement))
+
+      assert(isIncluded(df.queryExecution, "FileScan"))
+      assert(!isIncluded(df.queryExecution, "file:/"))
+
+      withSQLConf(SQLConf.SQL_STRING_REDACTION_PATTERN.key -> "(?i)FileScan") {
+        // Respect SQLConf and replace FileScan
+        assert(isIncluded(df.queryExecution, replacement))
+
+        assert(!isIncluded(df.queryExecution, "FileScan"))
+        assert(isIncluded(df.queryExecution, "file:/"))
+      }
+    }
+  }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
index cc943e0356f2a..dcc6fa6403f31 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
@@ -36,7 +36,7 @@ class GlobalTempViewSuite extends QueryTest with SharedSQLContext {
 
   test("basic semantic") {
     val expectedErrorMsg = "not found"
-    try {
+    withGlobalTempView("src") {
       sql("CREATE GLOBAL TEMP VIEW src AS SELECT 1, 'a'")
 
       // If there is no database in table name, we should try local temp view first, if not found,
@@ -79,19 +79,15 @@ class GlobalTempViewSuite extends QueryTest with SharedSQLContext {
       // We can also use Dataset API to replace global temp view
       Seq(2 -> "b").toDF("i", "j").createOrReplaceGlobalTempView("src")
       checkAnswer(spark.table(s"$globalTempDB.src"), Row(2, "b"))
-    } finally {
-      spark.catalog.dropGlobalTempView("src")
     }
   }
 
   test("global temp view is shared among all sessions") {
-    try {
+    withGlobalTempView("src") {
       sql("CREATE GLOBAL TEMP VIEW src AS SELECT 1, 2")
       checkAnswer(spark.table(s"$globalTempDB.src"), Row(1, 2))
       val newSession = spark.newSession()
       checkAnswer(newSession.table(s"$globalTempDB.src"), Row(1, 2))
-    } finally {
-      spark.catalog.dropGlobalTempView("src")
     }
   }
 
@@ -105,27 +101,25 @@ class GlobalTempViewSuite extends QueryTest with SharedSQLContext {
 
   test("CREATE GLOBAL TEMP VIEW USING") {
     withTempPath { path =>
-      try {
+      withGlobalTempView("src") {
         Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath)
         sql(s"CREATE GLOBAL TEMP VIEW src USING parquet OPTIONS (PATH '${path.toURI}')")
         checkAnswer(spark.table(s"$globalTempDB.src"), Row(1, "a"))
         sql(s"INSERT INTO $globalTempDB.src SELECT 2, 'b'")
         checkAnswer(spark.table(s"$globalTempDB.src"), Row(1, "a") :: Row(2, "b") :: Nil)
-      } finally {
-        spark.catalog.dropGlobalTempView("src")
       }
     }
   }
 
   test("CREATE TABLE LIKE should work for global temp view") {
-    try {
-      sql("CREATE GLOBAL TEMP VIEW src AS SELECT 1 AS a, '2' AS b")
-      sql(s"CREATE TABLE cloned LIKE $globalTempDB.src")
-      val tableMeta = spark.sessionState.catalog.getTableMetadata(TableIdentifier("cloned"))
-      assert(tableMeta.schema == new StructType().add("a", "int", false).add("b", "string", false))
-    } finally {
-      spark.catalog.dropGlobalTempView("src")
-      sql("DROP TABLE default.cloned")
+    withTable("cloned") {
+      withGlobalTempView("src") {
+        sql("CREATE GLOBAL TEMP VIEW src AS SELECT 1 AS a, '2' AS b")
+        sql(s"CREATE TABLE cloned LIKE $globalTempDB.src")
+        val tableMeta = spark.sessionState.catalog.getTableMetadata(TableIdentifier("cloned"))
+        assert(tableMeta.schema == new StructType()
+          .add("a", "int", false).add("b", "string", false))
+      }
     }
   }
 
@@ -146,26 +140,25 @@ class GlobalTempViewSuite extends QueryTest with SharedSQLContext {
   }
 
   test("should lookup global temp view if and only if global temp db is specified") {
-    try {
-      sql("CREATE GLOBAL TEMP VIEW same_name AS SELECT 3, 4")
-      sql("CREATE TEMP VIEW same_name AS SELECT 1, 2")
+    withTempView("same_name") {
+      withGlobalTempView("same_name") {
+        sql("CREATE GLOBAL TEMP VIEW same_name AS SELECT 3, 4")
+        sql("CREATE TEMP VIEW same_name AS SELECT 1, 2")
 
-      checkAnswer(sql("SELECT * FROM same_name"), Row(1, 2))
+        checkAnswer(sql("SELECT * FROM same_name"), Row(1, 2))
 
-      // we never lookup global temp views if database is not specified in table name
-      spark.catalog.dropTempView("same_name")
-      intercept[AnalysisException](sql("SELECT * FROM same_name"))
+        // we never lookup global temp views if database is not specified in table name
+        spark.catalog.dropTempView("same_name")
+        intercept[AnalysisException](sql("SELECT * FROM same_name"))
 
-      // Use qualified name to lookup a global temp view.
-      checkAnswer(sql(s"SELECT * FROM $globalTempDB.same_name"), Row(3, 4))
-    } finally {
-      spark.catalog.dropTempView("same_name")
-      spark.catalog.dropGlobalTempView("same_name")
+        // Use qualified name to lookup a global temp view.
+        checkAnswer(sql(s"SELECT * FROM $globalTempDB.same_name"), Row(3, 4))
+      }
     }
   }
 
   test("public Catalog should recognize global temp view") {
-    try {
+    withGlobalTempView("src")  {
       sql("CREATE GLOBAL TEMP VIEW src AS SELECT 1, 2")
 
       assert(spark.catalog.tableExists(globalTempDB, "src"))
@@ -175,8 +168,6 @@ class GlobalTempViewSuite extends QueryTest with SharedSQLContext {
         description = null,
         tableType = "TEMPORARY",
         isTemporary = true).toString)
-    } finally {
-      spark.catalog.dropGlobalTempView("src")
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index c25c90d0c70e2..f8b26f5b28cc7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -241,7 +241,7 @@ class PlannerSuite extends SharedSQLContext {
   test("collapse adjacent repartitions") {
     val doubleRepartitioned = testData.repartition(10).repartition(20).coalesce(5)
     def countRepartitions(plan: LogicalPlan): Int = plan.collect { case r: Repartition => r }.length
-    assert(countRepartitions(doubleRepartitioned.queryExecution.logical) === 3)
+    assert(countRepartitions(doubleRepartitioned.queryExecution.analyzed) === 3)
     assert(countRepartitions(doubleRepartitioned.queryExecution.optimizedPlan) === 2)
     doubleRepartitioned.queryExecution.optimizedPlan match {
       case Repartition (numPartitions, shuffle, Repartition(_, shuffleChild, _)) =>
@@ -260,11 +260,16 @@ class PlannerSuite extends SharedSQLContext {
   // do they satisfy the distribution requirements? As a result, we need at least four test cases.
 
   private def assertDistributionRequirementsAreSatisfied(outputPlan: SparkPlan): Unit = {
-    if (outputPlan.children.length > 1
-        && outputPlan.requiredChildDistribution.toSet != Set(UnspecifiedDistribution)) {
-      val childPartitionings = outputPlan.children.map(_.outputPartitioning)
-      if (!Partitioning.allCompatible(childPartitionings)) {
-        fail(s"Partitionings are not compatible: $childPartitionings")
+    if (outputPlan.children.length > 1) {
+      val childPartitionings = outputPlan.children.zip(outputPlan.requiredChildDistribution)
+        .filter {
+          case (_, UnspecifiedDistribution) => false
+          case (_, _: BroadcastDistribution) => false
+          case _ => true
+        }.map(_._1.outputPartitioning)
+
+      if (childPartitionings.map(_.numPartitions).toSet.size > 1) {
+        fail(s"Partitionings doesn't have same number of partitions: $childPartitionings")
       }
     }
     outputPlan.children.zip(outputPlan.requiredChildDistribution).foreach {
@@ -274,40 +279,7 @@ class PlannerSuite extends SharedSQLContext {
     }
   }
 
-  test("EnsureRequirements with incompatible child partitionings which satisfy distribution") {
-    // Consider an operator that requires inputs that are clustered by two expressions (e.g.
-    // sort merge join where there are multiple columns in the equi-join condition)
-    val clusteringA = Literal(1) :: Nil
-    val clusteringB = Literal(2) :: Nil
-    val distribution = ClusteredDistribution(clusteringA ++ clusteringB)
-    // Say that the left and right inputs are each partitioned by _one_ of the two join columns:
-    val leftPartitioning = HashPartitioning(clusteringA, 1)
-    val rightPartitioning = HashPartitioning(clusteringB, 1)
-    // Individually, each input's partitioning satisfies the clustering distribution:
-    assert(leftPartitioning.satisfies(distribution))
-    assert(rightPartitioning.satisfies(distribution))
-    // However, these partitionings are not compatible with each other, so we still need to
-    // repartition both inputs prior to performing the join:
-    assert(!leftPartitioning.compatibleWith(rightPartitioning))
-    assert(!rightPartitioning.compatibleWith(leftPartitioning))
-    val inputPlan = DummySparkPlan(
-      children = Seq(
-        DummySparkPlan(outputPartitioning = leftPartitioning),
-        DummySparkPlan(outputPartitioning = rightPartitioning)
-      ),
-      requiredChildDistribution = Seq(distribution, distribution),
-      requiredChildOrdering = Seq(Seq.empty, Seq.empty)
-    )
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
-    assertDistributionRequirementsAreSatisfied(outputPlan)
-    if (outputPlan.collect { case e: ShuffleExchangeExec => true }.isEmpty) {
-      fail(s"Exchange should have been added:\n$outputPlan")
-    }
-  }
-
   test("EnsureRequirements with child partitionings with different numbers of output partitions") {
-    // This is similar to the previous test, except it checks that partitionings are not compatible
-    // unless they produce the same number of partitions.
     val clustering = Literal(1) :: Nil
     val distribution = ClusteredDistribution(clustering)
     val inputPlan = DummySparkPlan(
@@ -386,18 +358,15 @@ class PlannerSuite extends SharedSQLContext {
     }
   }
 
-  test("EnsureRequirements eliminates Exchange if child has Exchange with same partitioning") {
+  test("EnsureRequirements eliminates Exchange if child has same partitioning") {
     val distribution = ClusteredDistribution(Literal(1) :: Nil)
-    val finalPartitioning = HashPartitioning(Literal(1) :: Nil, 5)
-    val childPartitioning = HashPartitioning(Literal(2) :: Nil, 5)
-    assert(!childPartitioning.satisfies(distribution))
-    val inputPlan = ShuffleExchangeExec(finalPartitioning,
-      DummySparkPlan(
-        children = DummySparkPlan(outputPartitioning = childPartitioning) :: Nil,
-        requiredChildDistribution = Seq(distribution),
-        requiredChildOrdering = Seq(Seq.empty)),
-      None)
+    val partitioning = HashPartitioning(Literal(1) :: Nil, 5)
+    assert(partitioning.satisfies(distribution))
 
+    val inputPlan = ShuffleExchangeExec(
+      partitioning,
+      DummySparkPlan(outputPartitioning = partitioning),
+      None)
     val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
     assertDistributionRequirementsAreSatisfied(outputPlan)
     if (outputPlan.collect { case e: ShuffleExchangeExec => true }.size == 2) {
@@ -407,17 +376,13 @@ class PlannerSuite extends SharedSQLContext {
 
   test("EnsureRequirements does not eliminate Exchange with different partitioning") {
     val distribution = ClusteredDistribution(Literal(1) :: Nil)
-    // Number of partitions differ
-    val finalPartitioning = HashPartitioning(Literal(1) :: Nil, 8)
-    val childPartitioning = HashPartitioning(Literal(2) :: Nil, 5)
-    assert(!childPartitioning.satisfies(distribution))
-    val inputPlan = ShuffleExchangeExec(finalPartitioning,
-      DummySparkPlan(
-        children = DummySparkPlan(outputPartitioning = childPartitioning) :: Nil,
-        requiredChildDistribution = Seq(distribution),
-        requiredChildOrdering = Seq(Seq.empty)),
-      None)
+    val partitioning = HashPartitioning(Literal(2) :: Nil, 5)
+    assert(!partitioning.satisfies(distribution))
 
+    val inputPlan = ShuffleExchangeExec(
+      partitioning,
+      DummySparkPlan(outputPartitioning = partitioning),
+      None)
     val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
     assertDistributionRequirementsAreSatisfied(outputPlan)
     if (outputPlan.collect { case e: ShuffleExchangeExec => true }.size == 1) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 08a4a21b20f61..14082197ba0bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -69,21 +69,25 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
   }
 
   test("create a permanent view on a temp view") {
-    withView("jtv1", "temp_jtv1", "global_temp_jtv1") {
-      sql("CREATE TEMPORARY VIEW temp_jtv1 AS SELECT * FROM jt WHERE id > 3")
-      var e = intercept[AnalysisException] {
-        sql("CREATE VIEW jtv1 AS SELECT * FROM temp_jtv1 WHERE id < 6")
-      }.getMessage
-      assert(e.contains("Not allowed to create a permanent view `jtv1` by " +
-        "referencing a temporary view `temp_jtv1`"))
-
-      val globalTempDB = spark.sharedState.globalTempViewManager.database
-      sql("CREATE GLOBAL TEMP VIEW global_temp_jtv1 AS SELECT * FROM jt WHERE id > 0")
-      e = intercept[AnalysisException] {
-        sql(s"CREATE VIEW jtv1 AS SELECT * FROM $globalTempDB.global_temp_jtv1 WHERE id < 6")
-      }.getMessage
-      assert(e.contains(s"Not allowed to create a permanent view `jtv1` by referencing " +
-        s"a temporary view `global_temp`.`global_temp_jtv1`"))
+    withView("jtv1") {
+      withTempView("temp_jtv1") {
+        withGlobalTempView("global_temp_jtv1") {
+          sql("CREATE TEMPORARY VIEW temp_jtv1 AS SELECT * FROM jt WHERE id > 3")
+          var e = intercept[AnalysisException] {
+            sql("CREATE VIEW jtv1 AS SELECT * FROM temp_jtv1 WHERE id < 6")
+          }.getMessage
+          assert(e.contains("Not allowed to create a permanent view `jtv1` by " +
+            "referencing a temporary view `temp_jtv1`"))
+
+          val globalTempDB = spark.sharedState.globalTempViewManager.database
+          sql("CREATE GLOBAL TEMP VIEW global_temp_jtv1 AS SELECT * FROM jt WHERE id > 0")
+          e = intercept[AnalysisException] {
+            sql(s"CREATE VIEW jtv1 AS SELECT * FROM $globalTempDB.global_temp_jtv1 WHERE id < 6")
+          }.getMessage
+          assert(e.contains(s"Not allowed to create a permanent view `jtv1` by referencing " +
+            s"a temporary view `global_temp`.`global_temp_jtv1`"))
+        }
+      }
     }
   }
 
@@ -289,7 +293,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         sql("CREATE TEMPORARY VIEW testView AS SELECT id FROM jt")
       }
 
-      assert(e.message.contains("Temporary table") && e.message.contains("already exists"))
+      assert(e.message.contains("Temporary view") && e.message.contains("already exists"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
index 232c1beae7998..3e31d22e15c0e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
@@ -70,6 +70,7 @@ class UnsafeFixedWidthAggregationMapSuite
 
       TaskContext.setTaskContext(new TaskContextImpl(
         stageId = 0,
+        stageAttemptNumber = 0,
         partitionId = 0,
         taskAttemptId = Random.nextInt(10000),
         attemptNumber = 0,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala
index 604502f2a57d0..6af9f8b77f8d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala
@@ -116,6 +116,7 @@ class UnsafeKVExternalSorterSuite extends SparkFunSuite with SharedSQLContext {
     val taskMemMgr = new TaskMemoryManager(memoryManager, 0)
     TaskContext.setTaskContext(new TaskContextImpl(
       stageId = 0,
+      stageAttemptNumber = 0,
       partitionId = 0,
       taskAttemptId = 98456,
       attemptNumber = 0,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
index dff88ce7f1b9a..a3ae93810aa3c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
@@ -114,7 +114,7 @@ class UnsafeRowSerializerSuite extends SparkFunSuite with LocalSparkContext {
         (i, converter(Row(i)))
       }
       val taskMemoryManager = new TaskMemoryManager(sc.env.memoryManager, 0)
-      val taskContext = new TaskContextImpl(0, 0, 0, 0, taskMemoryManager, new Properties, null)
+      val taskContext = new TaskContextImpl(0, 0, 0, 0, 0, taskMemoryManager, new Properties, null)
 
       val sorter = new ExternalSorter[Int, UnsafeRow, UnsafeRow](
         taskContext,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
index 10f1ee279bedf..3fad7dfddadcc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
@@ -35,7 +35,8 @@ class SortBasedAggregationStoreSuite  extends SparkFunSuite with LocalSparkConte
     val conf = new SparkConf()
     sc = new SparkContext("local[2, 4]", "test", conf)
     val taskManager = new TaskMemoryManager(new TestMemoryManager(conf), 0)
-    TaskContext.setTaskContext(new TaskContextImpl(0, 0, 0, 0, taskManager, new Properties, null))
+    TaskContext.setTaskContext(
+      new TaskContextImpl(0, 0, 0, 0, 0, taskManager, new Properties, null))
   }
 
   override def afterAll(): Unit = TaskContext.unset()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
index 57958f7239224..261df06100aef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
@@ -25,7 +25,7 @@ import java.util.Locale
 import com.google.common.io.Files
 import org.apache.arrow.memory.RootAllocator
 import org.apache.arrow.vector.{VectorLoader, VectorSchemaRoot}
-import org.apache.arrow.vector.file.json.JsonFileReader
+import org.apache.arrow.vector.ipc.JsonFileReader
 import org.apache.arrow.vector.util.Validator
 import org.scalatest.BeforeAndAfterAll
 
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{BinaryType, IntegerType, StructField, StructType}
+import org.apache.spark.sql.types.{BinaryType, Decimal, IntegerType, StructField, StructType}
 import org.apache.spark.util.Utils
 
 
@@ -76,16 +76,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 16
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 16
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "b_s",
          |      "type" : {
@@ -94,16 +85,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 16
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 16
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -143,16 +125,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "b_i",
          |      "type" : {
@@ -161,16 +134,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -210,16 +174,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 64
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 64
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "b_l",
          |      "type" : {
@@ -228,16 +183,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 64
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 64
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -276,16 +222,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "precision" : "SINGLE"
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "b_f",
          |      "type" : {
@@ -293,16 +230,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "precision" : "SINGLE"
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -341,16 +269,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "precision" : "DOUBLE"
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 64
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "b_d",
          |      "type" : {
@@ -358,16 +277,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "precision" : "DOUBLE"
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 64
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -394,6 +304,70 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
     collectAndValidate(df, json, "floating_point-double_precision.json")
   }
 
+  test("decimal conversion") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a_d",
+         |      "type" : {
+         |        "name" : "decimal",
+         |        "precision" : 38,
+         |        "scale" : 18
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ]
+         |    }, {
+         |      "name" : "b_d",
+         |      "type" : {
+         |        "name" : "decimal",
+         |        "precision" : 38,
+         |        "scale" : 18
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ]
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 7,
+         |    "columns" : [ {
+         |      "name" : "a_d",
+         |      "count" : 7,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [
+         |        "1000000000000000000",
+         |        "2000000000000000000",
+         |        "10000000000000000",
+         |        "200000000000000000000",
+         |        "100000000000000",
+         |        "20000000000000000000000",
+         |        "30000000000000000000" ]
+         |    }, {
+         |      "name" : "b_d",
+         |      "count" : 7,
+         |      "VALIDITY" : [ 1, 0, 0, 1, 0, 1, 0 ],
+         |      "DATA" : [
+         |        "1100000000000000000",
+         |        "0",
+         |        "0",
+         |        "2200000000000000000",
+         |        "0",
+         |        "3300000000000000000",
+         |        "0" ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val a_d = List(1.0, 2.0, 0.01, 200.0, 0.0001, 20000.0, 30.0).map(Decimal(_))
+    val b_d = List(Some(Decimal(1.1)), None, None, Some(Decimal(2.2)), None, Some(Decimal(3.3)),
+      Some(Decimal("123456789012345678901234567890")))
+    val df = a_d.zip(b_d).toDF("a_d", "b_d")
+
+    collectAndValidate(df, json, "decimalData.json")
+  }
+
   test("index conversion") {
     val data = List[Int](1, 2, 3, 4, 5, 6)
     val json =
@@ -408,16 +382,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -449,16 +414,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 16
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 16
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "b",
          |      "type" : {
@@ -466,16 +422,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "precision" : "SINGLE"
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "c",
          |      "type" : {
@@ -484,16 +431,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "d",
          |      "type" : {
@@ -501,16 +439,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "precision" : "DOUBLE"
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 64
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "e",
          |      "type" : {
@@ -519,16 +448,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 64
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 64
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -583,57 +503,21 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "name" : "utf8"
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "OFFSET",
-         |          "typeBitWidth" : 32
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 8
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "lower_case",
          |      "type" : {
          |        "name" : "utf8"
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "OFFSET",
-         |          "typeBitWidth" : 32
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 8
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "null_str",
          |      "type" : {
          |        "name" : "utf8"
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "OFFSET",
-         |          "typeBitWidth" : 32
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 8
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -681,16 +565,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "name" : "bool"
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 1
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -721,16 +596,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 8
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 8
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -760,19 +626,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "name" : "binary"
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "OFFSET",
-         |          "typeBitWidth" : 32
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 8
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -807,16 +661,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "unit" : "DAY"
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -855,16 +700,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
            |        "timezone" : "America/Los_Angeles"
            |      },
            |      "nullable" : true,
-           |      "children" : [ ],
-           |      "typeLayout" : {
-           |        "vectors" : [ {
-           |          "type" : "VALIDITY",
-           |          "typeBitWidth" : 1
-           |        }, {
-           |          "type" : "DATA",
-           |          "typeBitWidth" : 64
-           |        } ]
-           |      }
+           |      "children" : [ ]
            |    } ]
            |  },
            |  "batches" : [ {
@@ -904,16 +740,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "precision" : "SINGLE"
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "NaN_d",
          |      "type" : {
@@ -921,16 +748,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "precision" : "DOUBLE"
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 64
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -939,12 +757,12 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |      "name" : "NaN_f",
          |      "count" : 2,
          |      "VALIDITY" : [ 1, 1 ],
-         |      "DATA" : [ 1.2000000476837158, "NaN" ]
+         |      "DATA" : [ 1.2000000476837158, NaN ]
          |    }, {
          |      "name" : "NaN_d",
          |      "count" : 2,
          |      "VALIDITY" : [ 1, 1 ],
-         |      "DATA" : [ "NaN", 1.2 ]
+         |      "DATA" : [ NaN, 1.2 ]
          |    } ]
          |  } ]
          |}
@@ -976,26 +794,8 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |          "bitWidth" : 32,
          |          "isSigned" : true
          |        },
-         |        "children" : [ ],
-         |        "typeLayout" : {
-         |          "vectors" : [ {
-         |            "type" : "VALIDITY",
-         |            "typeBitWidth" : 1
-         |          }, {
-         |            "type" : "DATA",
-         |            "typeBitWidth" : 32
-         |          } ]
-         |        }
-         |      } ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "OFFSET",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |        "children" : [ ]
+         |      } ]
          |    }, {
          |      "name" : "b_arr",
          |      "nullable" : true,
@@ -1010,26 +810,8 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |          "bitWidth" : 32,
          |          "isSigned" : true
          |        },
-         |        "children" : [ ],
-         |        "typeLayout" : {
-         |          "vectors" : [ {
-         |            "type" : "VALIDITY",
-         |            "typeBitWidth" : 1
-         |          }, {
-         |            "type" : "DATA",
-         |            "typeBitWidth" : 32
-         |          } ]
-         |        }
-         |      } ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "OFFSET",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |        "children" : [ ]
+         |      } ]
          |    }, {
          |      "name" : "c_arr",
          |      "nullable" : true,
@@ -1044,26 +826,8 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |          "bitWidth" : 32,
          |          "isSigned" : true
          |        },
-         |        "children" : [ ],
-         |        "typeLayout" : {
-         |          "vectors" : [ {
-         |            "type" : "VALIDITY",
-         |            "typeBitWidth" : 1
-         |          }, {
-         |            "type" : "DATA",
-         |            "typeBitWidth" : 32
-         |          } ]
-         |        }
-         |      } ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "OFFSET",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |        "children" : [ ]
+         |      } ]
          |    }, {
          |      "name" : "d_arr",
          |      "nullable" : true,
@@ -1084,36 +848,9 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |            "bitWidth" : 32,
          |            "isSigned" : true
          |          },
-         |          "children" : [ ],
-         |          "typeLayout" : {
-         |            "vectors" : [ {
-         |              "type" : "VALIDITY",
-         |              "typeBitWidth" : 1
-         |            }, {
-         |              "type" : "DATA",
-         |              "typeBitWidth" : 32
-         |            } ]
-         |          }
-         |        } ],
-         |        "typeLayout" : {
-         |          "vectors" : [ {
-         |            "type" : "VALIDITY",
-         |            "typeBitWidth" : 1
-         |          }, {
-         |            "type" : "OFFSET",
-         |            "typeBitWidth" : 32
-         |          } ]
-         |        }
-         |      } ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "OFFSET",
-         |          "typeBitWidth" : 32
+         |          "children" : [ ]
          |        } ]
-         |      }
+         |      } ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -1204,23 +941,8 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |          "bitWidth" : 32,
          |          "isSigned" : true
          |        },
-         |        "children" : [ ],
-         |        "typeLayout" : {
-         |          "vectors" : [ {
-         |            "type" : "VALIDITY",
-         |            "typeBitWidth" : 1
-         |          }, {
-         |            "type" : "DATA",
-         |            "typeBitWidth" : 32
-         |          } ]
-         |        }
-         |      } ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        } ]
-         |      }
+         |        "children" : [ ]
+         |      } ]
          |    }, {
          |      "name" : "b_struct",
          |      "nullable" : true,
@@ -1235,23 +957,8 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |          "bitWidth" : 32,
          |          "isSigned" : true
          |        },
-         |        "children" : [ ],
-         |        "typeLayout" : {
-         |          "vectors" : [ {
-         |            "type" : "VALIDITY",
-         |            "typeBitWidth" : 1
-         |          }, {
-         |            "type" : "DATA",
-         |            "typeBitWidth" : 32
-         |          } ]
-         |        }
-         |      } ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        } ]
-         |      }
+         |        "children" : [ ]
+         |      } ]
          |    }, {
          |      "name" : "c_struct",
          |      "nullable" : false,
@@ -1266,23 +973,8 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |          "bitWidth" : 32,
          |          "isSigned" : true
          |        },
-         |        "children" : [ ],
-         |        "typeLayout" : {
-         |          "vectors" : [ {
-         |            "type" : "VALIDITY",
-         |            "typeBitWidth" : 1
-         |          }, {
-         |            "type" : "DATA",
-         |            "typeBitWidth" : 32
-         |          } ]
-         |        }
-         |      } ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        } ]
-         |      }
+         |        "children" : [ ]
+         |      } ]
          |    }, {
          |      "name" : "d_struct",
          |      "nullable" : true,
@@ -1303,30 +995,9 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |            "bitWidth" : 32,
          |            "isSigned" : true
          |          },
-         |          "children" : [ ],
-         |          "typeLayout" : {
-         |            "vectors" : [ {
-         |              "type" : "VALIDITY",
-         |              "typeBitWidth" : 1
-         |            }, {
-         |              "type" : "DATA",
-         |              "typeBitWidth" : 32
-         |            } ]
-         |          }
-         |        } ],
-         |        "typeLayout" : {
-         |          "vectors" : [ {
-         |            "type" : "VALIDITY",
-         |            "typeBitWidth" : 1
-         |          } ]
-         |        }
-         |      } ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
+         |          "children" : [ ]
          |        } ]
-         |      }
+         |      } ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -1413,16 +1084,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "b",
          |      "type" : {
@@ -1431,16 +1093,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -1471,16 +1124,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "b",
          |      "type" : {
@@ -1489,16 +1133,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -1582,7 +1217,6 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
       assert(msg.getCause.getClass === classOf[UnsupportedOperationException])
     }
 
-    runUnsupported { decimalData.toArrowPayload.collect() }
     runUnsupported { mapData.toDF().toArrowPayload.collect() }
     runUnsupported { complexData.toArrowPayload.collect() }
   }
@@ -1600,16 +1234,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "b_i",
          |      "type" : {
@@ -1618,16 +1243,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
@@ -1658,16 +1274,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : true,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    }, {
          |      "name" : "a_i",
          |      "type" : {
@@ -1676,16 +1283,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
          |        "bitWidth" : 32
          |      },
          |      "nullable" : false,
-         |      "children" : [ ],
-         |      "typeLayout" : {
-         |        "vectors" : [ {
-         |          "type" : "VALIDITY",
-         |          "typeBitWidth" : 1
-         |        }, {
-         |          "type" : "DATA",
-         |          "typeBitWidth" : 32
-         |        } ]
-         |      }
+         |      "children" : [ ]
          |    } ]
          |  },
          |  "batches" : [ {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala
index a71e30aa3ca96..c42bc60a59d67 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.execution.arrow
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.ArrayData
-import org.apache.spark.sql.execution.vectorized.ArrowColumnVector
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.ArrowColumnVector
 import org.apache.spark.unsafe.types.UTF8String
 
 class ArrowWriterSuite extends SparkFunSuite {
@@ -49,6 +49,7 @@ class ArrowWriterSuite extends SparkFunSuite {
             case LongType => reader.getLong(rowId)
             case FloatType => reader.getFloat(rowId)
             case DoubleType => reader.getDouble(rowId)
+            case DecimalType.Fixed(precision, scale) => reader.getDecimal(rowId, precision, scale)
             case StringType => reader.getUTF8String(rowId)
             case BinaryType => reader.getBinary(rowId)
             case DateType => reader.getInt(rowId)
@@ -66,6 +67,7 @@ class ArrowWriterSuite extends SparkFunSuite {
     check(LongType, Seq(1L, 2L, null, 4L))
     check(FloatType, Seq(1.0f, 2.0f, null, 4.0f))
     check(DoubleType, Seq(1.0d, 2.0d, null, 4.0d))
+    check(DecimalType.SYSTEM_DEFAULT, Seq(Decimal(1), Decimal(2), null, Decimal(4)))
     check(StringType, Seq("a", "b", null, "d").map(UTF8String.fromString))
     check(BinaryType, Seq("a".getBytes(), "b".getBytes(), null, "d".getBytes()))
     check(DateType, Seq(0, 1, 2, null, 4))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala
index 01773c238b0db..f039aeaad442c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala
@@ -202,6 +202,42 @@ class MiscBenchmark extends BenchmarkBase {
     generate inline array wholestage off          6901 / 6928          2.4         411.3       1.0X
     generate inline array wholestage on           1001 / 1010         16.8          59.7       6.9X
      */
+
+    val M = 60000
+    runBenchmark("generate big struct array", M) {
+      import sparkSession.implicits._
+      val df = sparkSession.sparkContext.parallelize(Seq(("1",
+        Array.fill(M)({
+          val i = math.random
+          (i.toString, (i + 1).toString, (i + 2).toString, (i + 3).toString)
+        })))).toDF("col", "arr")
+
+      df.selectExpr("*", "expode(arr) as arr_col")
+        .select("col", "arr_col.*").count
+    }
+
+    /*
+    Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+    Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
+
+    test the impact of adding the optimization of Generate.unrequiredChildIndex,
+    we can see enormous improvement of x250 in this case! and it grows O(n^2).
+
+    with Optimization ON:
+
+    generate big struct array:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    generate big struct array wholestage off       331 /  378          0.2        5524.9       1.0X
+    generate big struct array wholestage on        205 /  232          0.3        3413.1       1.6X
+
+    with Optimization OFF:
+
+    generate big struct array:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    generate big struct array wholestage off    49697 / 51496          0.0      828277.7       1.0X
+    generate big struct array wholestage on     50558 / 51434          0.0      842641.6       1.0X
+     */
+
   }
 
   ignore("generate regular generator") {
@@ -227,4 +263,5 @@ class MiscBenchmark extends BenchmarkBase {
     generate stack wholestage on                   836 /  847         20.1          49.8      15.5X
      */
   }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index e662e294228db..295943e418e82 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.test.SQLTestData._
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel._
+import org.apache.spark.util.Utils
 
 class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
   import testImplicits._
@@ -40,7 +41,8 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
     data.createOrReplaceTempView(s"testData$dataType")
     val storageLevel = MEMORY_ONLY
     val plan = spark.sessionState.executePlan(data.logicalPlan).sparkPlan
-    val inMemoryRelation = InMemoryRelation(useCompression = true, 5, storageLevel, plan, None)
+    val inMemoryRelation = InMemoryRelation(useCompression = true, 5, storageLevel, plan, None,
+      data.logicalPlan.stats)
 
     assert(inMemoryRelation.cachedColumnBuffers.getStorageLevel == storageLevel)
     inMemoryRelation.cachedColumnBuffers.collect().head match {
@@ -116,7 +118,8 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
 
   test("simple columnar query") {
     val plan = spark.sessionState.executePlan(testData.logicalPlan).sparkPlan
-    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None)
+    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None,
+      testData.logicalPlan.stats)
 
     checkAnswer(scan, testData.collect().toSeq)
   }
@@ -132,8 +135,10 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
   }
 
   test("projection") {
-    val plan = spark.sessionState.executePlan(testData.select('value, 'key).logicalPlan).sparkPlan
-    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None)
+    val logicalPlan = testData.select('value, 'key).logicalPlan
+    val plan = spark.sessionState.executePlan(logicalPlan).sparkPlan
+    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None,
+      logicalPlan.stats)
 
     checkAnswer(scan, testData.collect().map {
       case Row(key: Int, value: String) => value -> key
@@ -149,7 +154,8 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
 
   test("SPARK-1436 regression: in-memory columns must be able to be accessed multiple times") {
     val plan = spark.sessionState.executePlan(testData.logicalPlan).sparkPlan
-    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None)
+    val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None,
+      testData.logicalPlan.stats)
 
     checkAnswer(scan, testData.collect().toSeq)
     checkAnswer(scan, testData.collect().toSeq)
@@ -323,7 +329,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
   test("SPARK-17549: cached table size should be correctly calculated") {
     val data = spark.sparkContext.parallelize(1 to 10, 5).toDF()
     val plan = spark.sessionState.executePlan(data.logicalPlan).sparkPlan
-    val cached = InMemoryRelation(true, 5, MEMORY_ONLY, plan, None)
+    val cached = InMemoryRelation(true, 5, MEMORY_ONLY, plan, None, data.logicalPlan.stats)
 
     // Materialize the data.
     val expectedAnswer = data.collect()
@@ -448,8 +454,8 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
 
   test("SPARK-22249: buildFilter should not throw exception when In contains an empty list") {
     val attribute = AttributeReference("a", IntegerType)()
-    val testRelation = InMemoryRelation(false, 1, MEMORY_ONLY,
-      LocalTableScanExec(Seq(attribute), Nil), None)
+    val localTableScanExec = LocalTableScanExec(Seq(attribute), Nil)
+    val testRelation = InMemoryRelation(false, 1, MEMORY_ONLY, localTableScanExec, None, null)
     val tableScanExec = InMemoryTableScanExec(Seq(attribute),
       Seq(In(attribute, Nil)), testRelation)
     assert(tableScanExec.partitionFilters.isEmpty)
@@ -479,4 +485,43 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("SPARK-22673: InMemoryRelation should utilize existing stats of the plan to be cached") {
+    withSQLConf("spark.sql.cbo.enabled" -> "true") {
+      withTempPath { workDir =>
+        withTable("table1") {
+          val workDirPath = workDir.getAbsolutePath
+          val data = Seq(100, 200, 300, 400).toDF("count")
+          data.write.parquet(workDirPath)
+          val dfFromFile = spark.read.parquet(workDirPath).cache()
+          val inMemoryRelation = dfFromFile.queryExecution.optimizedPlan.collect {
+            case plan: InMemoryRelation => plan
+          }.head
+          // InMemoryRelation's stats is file size before the underlying RDD is materialized
+          assert(inMemoryRelation.computeStats().sizeInBytes === 700)
+
+          // InMemoryRelation's stats is updated after materializing RDD
+          dfFromFile.collect()
+          assert(inMemoryRelation.computeStats().sizeInBytes === 16)
+
+          // test of catalog table
+          val dfFromTable = spark.catalog.createTable("table1", workDirPath).cache()
+          val inMemoryRelation2 = dfFromTable.queryExecution.optimizedPlan.
+            collect { case plan: InMemoryRelation => plan }.head
+
+          // Even CBO enabled, InMemoryRelation's stats keeps as the file size before table's stats
+          // is calculated
+          assert(inMemoryRelation2.computeStats().sizeInBytes === 700)
+
+          // InMemoryRelation's stats should be updated after calculating stats of the table
+          // clear cache to simulate a fresh environment
+          dfFromTable.unpersist(blocking = true)
+          spark.sql("ANALYZE TABLE table1 COMPUTE STATISTICS")
+          val inMemoryRelation3 = spark.read.table("table1").cache().queryExecution.optimizedPlan.
+            collect { case plan: InMemoryRelation => plan }.head
+          assert(inMemoryRelation3.computeStats().sizeInBytes === 48)
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CommandUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CommandUtilsSuite.scala
new file mode 100644
index 0000000000000..f3e15189a6418
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CommandUtilsSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.catalog.CatalogStatistics
+
+class CommandUtilsSuite extends SparkFunSuite {
+
+  test("Check if compareAndGetNewStats returns correct results") {
+    val oldStats1 = CatalogStatistics(sizeInBytes = 10, rowCount = Some(100))
+    val newStats1 = CommandUtils.compareAndGetNewStats(
+      Some(oldStats1), newTotalSize = 10, newRowCount = Some(100))
+    assert(newStats1.isEmpty)
+    val newStats2 = CommandUtils.compareAndGetNewStats(
+      Some(oldStats1), newTotalSize = -1, newRowCount = None)
+    assert(newStats2.isEmpty)
+    val newStats3 = CommandUtils.compareAndGetNewStats(
+      Some(oldStats1), newTotalSize = 20, newRowCount = Some(-1))
+    assert(newStats3.isDefined)
+    newStats3.foreach { stat =>
+      assert(stat.sizeInBytes === 20)
+      assert(stat.rowCount.isEmpty)
+    }
+    val newStats4 = CommandUtils.compareAndGetNewStats(
+      Some(oldStats1), newTotalSize = -1, newRowCount = Some(200))
+    assert(newStats4.isDefined)
+    newStats4.foreach { stat =>
+      assert(stat.sizeInBytes === 10)
+      assert(stat.rowCount.isDefined && stat.rowCount.get === 200)
+    }
+  }
+
+  test("Check if compareAndGetNewStats can handle large values") {
+    // Tests for large values
+    val oldStats2 = CatalogStatistics(sizeInBytes = BigInt(Long.MaxValue) * 2)
+    val newStats5 = CommandUtils.compareAndGetNewStats(
+      Some(oldStats2), newTotalSize = BigInt(Long.MaxValue) * 2, None)
+    assert(newStats5.isEmpty)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index eb7c33590b602..2b1aea08b1223 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -54,6 +54,13 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
     }
   }
 
+  private def intercept(sqlCommand: String, messages: String*): Unit = {
+    val e = intercept[ParseException](parser.parsePlan(sqlCommand)).getMessage
+    messages.foreach { message =>
+      assert(e.contains(message))
+    }
+  }
+
   private def parseAs[T: ClassTag](query: String): T = {
     parser.parsePlan(query) match {
       case t: T => t
@@ -494,6 +501,37 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
     }
   }
 
+  test("Duplicate clauses - create table") {
+    def createTableHeader(duplicateClause: String, isNative: Boolean): String = {
+      val fileFormat = if (isNative) "USING parquet" else "STORED AS parquet"
+      s"CREATE TABLE my_tab(a INT, b STRING) $fileFormat $duplicateClause $duplicateClause"
+    }
+
+    Seq(true, false).foreach { isNative =>
+      intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')", isNative),
+        "Found duplicate clauses: TBLPROPERTIES")
+      intercept(createTableHeader("LOCATION '/tmp/file'", isNative),
+        "Found duplicate clauses: LOCATION")
+      intercept(createTableHeader("COMMENT 'a table'", isNative),
+        "Found duplicate clauses: COMMENT")
+      intercept(createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS", isNative),
+        "Found duplicate clauses: CLUSTERED BY")
+    }
+
+    // Only for native data source tables
+    intercept(createTableHeader("PARTITIONED BY (b)", isNative = true),
+      "Found duplicate clauses: PARTITIONED BY")
+
+    // Only for Hive serde tables
+    intercept(createTableHeader("PARTITIONED BY (k int)", isNative = false),
+      "Found duplicate clauses: PARTITIONED BY")
+    intercept(createTableHeader("STORED AS parquet", isNative = false),
+      "Found duplicate clauses: STORED AS/BY")
+    intercept(
+      createTableHeader("ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'", isNative = false),
+      "Found duplicate clauses: ROW FORMAT")
+  }
+
   test("create table - with location") {
     val v1 = "CREATE TABLE my_tab(a INT, b STRING) USING parquet LOCATION '/tmp/file'"
 
@@ -1153,38 +1191,119 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
     }
   }
 
+  test("Test CTAS against data source tables") {
+    val s1 =
+      """
+        |CREATE TABLE IF NOT EXISTS mydb.page_view
+        |USING parquet
+        |COMMENT 'This is the staging page view table'
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    val s2 =
+      """
+        |CREATE TABLE IF NOT EXISTS mydb.page_view
+        |USING parquet
+        |LOCATION '/user/external/page_view'
+        |COMMENT 'This is the staging page view table'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    val s3 =
+      """
+        |CREATE TABLE IF NOT EXISTS mydb.page_view
+        |USING parquet
+        |COMMENT 'This is the staging page view table'
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    checkParsing(s1)
+    checkParsing(s2)
+    checkParsing(s3)
+
+    def checkParsing(sql: String): Unit = {
+      val (desc, exists) = extractTableDesc(sql)
+      assert(exists)
+      assert(desc.identifier.database == Some("mydb"))
+      assert(desc.identifier.table == "page_view")
+      assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
+      assert(desc.schema.isEmpty) // will be populated later when the table is actually created
+      assert(desc.comment == Some("This is the staging page view table"))
+      assert(desc.viewText.isEmpty)
+      assert(desc.viewDefaultDatabase.isEmpty)
+      assert(desc.viewQueryColumnNames.isEmpty)
+      assert(desc.partitionColumnNames.isEmpty)
+      assert(desc.provider == Some("parquet"))
+      assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
+    }
+  }
+
   test("Test CTAS #1") {
     val s1 =
-      """CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+      """
+        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
         |COMMENT 'This is the staging page view table'
         |STORED AS RCFILE
         |LOCATION '/user/external/page_view'
         |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
-        |AS SELECT * FROM src""".stripMargin
+        |AS SELECT * FROM src
+      """.stripMargin
 
-    val (desc, exists) = extractTableDesc(s1)
-    assert(exists)
-    assert(desc.identifier.database == Some("mydb"))
-    assert(desc.identifier.table == "page_view")
-    assert(desc.tableType == CatalogTableType.EXTERNAL)
-    assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
-    assert(desc.schema.isEmpty) // will be populated later when the table is actually created
-    assert(desc.comment == Some("This is the staging page view table"))
-    // TODO will be SQLText
-    assert(desc.viewText.isEmpty)
-    assert(desc.viewDefaultDatabase.isEmpty)
-    assert(desc.viewQueryColumnNames.isEmpty)
-    assert(desc.partitionColumnNames.isEmpty)
-    assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
-    assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
-    assert(desc.storage.serde ==
-      Some("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
-    assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
+    val s2 =
+      """
+        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |STORED AS RCFILE
+        |COMMENT 'This is the staging page view table'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |LOCATION '/user/external/page_view'
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    val s3 =
+      """
+        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |LOCATION '/user/external/page_view'
+        |STORED AS RCFILE
+        |COMMENT 'This is the staging page view table'
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    checkParsing(s1)
+    checkParsing(s2)
+    checkParsing(s3)
+
+    def checkParsing(sql: String): Unit = {
+      val (desc, exists) = extractTableDesc(sql)
+      assert(exists)
+      assert(desc.identifier.database == Some("mydb"))
+      assert(desc.identifier.table == "page_view")
+      assert(desc.tableType == CatalogTableType.EXTERNAL)
+      assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
+      assert(desc.schema.isEmpty) // will be populated later when the table is actually created
+      assert(desc.comment == Some("This is the staging page view table"))
+      // TODO will be SQLText
+      assert(desc.viewText.isEmpty)
+      assert(desc.viewDefaultDatabase.isEmpty)
+      assert(desc.viewQueryColumnNames.isEmpty)
+      assert(desc.partitionColumnNames.isEmpty)
+      assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
+      assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
+      assert(desc.storage.serde ==
+        Some("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
+      assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
+    }
   }
 
   test("Test CTAS #2") {
-    val s2 =
-      """CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+    val s1 =
+      """
+        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
         |COMMENT 'This is the staging page view table'
         |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
         | STORED AS
@@ -1192,26 +1311,45 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
         | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'
         |LOCATION '/user/external/page_view'
         |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
-        |AS SELECT * FROM src""".stripMargin
+        |AS SELECT * FROM src
+      """.stripMargin
 
-    val (desc, exists) = extractTableDesc(s2)
-    assert(exists)
-    assert(desc.identifier.database == Some("mydb"))
-    assert(desc.identifier.table == "page_view")
-    assert(desc.tableType == CatalogTableType.EXTERNAL)
-    assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
-    assert(desc.schema.isEmpty) // will be populated later when the table is actually created
-    // TODO will be SQLText
-    assert(desc.comment == Some("This is the staging page view table"))
-    assert(desc.viewText.isEmpty)
-    assert(desc.viewDefaultDatabase.isEmpty)
-    assert(desc.viewQueryColumnNames.isEmpty)
-    assert(desc.partitionColumnNames.isEmpty)
-    assert(desc.storage.properties == Map())
-    assert(desc.storage.inputFormat == Some("parquet.hive.DeprecatedParquetInputFormat"))
-    assert(desc.storage.outputFormat == Some("parquet.hive.DeprecatedParquetOutputFormat"))
-    assert(desc.storage.serde == Some("parquet.hive.serde.ParquetHiveSerDe"))
-    assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
+    val s2 =
+      """
+        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
+        | STORED AS
+        | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
+        | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'
+        |COMMENT 'This is the staging page view table'
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    checkParsing(s1)
+    checkParsing(s2)
+
+    def checkParsing(sql: String): Unit = {
+      val (desc, exists) = extractTableDesc(sql)
+      assert(exists)
+      assert(desc.identifier.database == Some("mydb"))
+      assert(desc.identifier.table == "page_view")
+      assert(desc.tableType == CatalogTableType.EXTERNAL)
+      assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
+      assert(desc.schema.isEmpty) // will be populated later when the table is actually created
+      // TODO will be SQLText
+      assert(desc.comment == Some("This is the staging page view table"))
+      assert(desc.viewText.isEmpty)
+      assert(desc.viewDefaultDatabase.isEmpty)
+      assert(desc.viewQueryColumnNames.isEmpty)
+      assert(desc.partitionColumnNames.isEmpty)
+      assert(desc.storage.properties == Map())
+      assert(desc.storage.inputFormat == Some("parquet.hive.DeprecatedParquetInputFormat"))
+      assert(desc.storage.outputFormat == Some("parquet.hive.DeprecatedParquetOutputFormat"))
+      assert(desc.storage.serde == Some("parquet.hive.serde.ParquetHiveSerDe"))
+      assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
+    }
   }
 
   test("Test CTAS #3") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 878f435c75cb3..6ca21b5aa1595 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -117,6 +117,21 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSQLContext with Befo
     }
   }
 
+  test("SPARK-22431: table with nested type col with special char") {
+    withTable("t") {
+      spark.sql("CREATE TABLE t(q STRUCT<`$a`:INT, col2:STRING>, i1 INT) USING PARQUET")
+      checkAnswer(spark.table("t"), Nil)
+    }
+  }
+
+  test("SPARK-22431: view with nested type") {
+    withView("t", "v") {
+      spark.sql("CREATE VIEW t AS SELECT STRUCT('a' AS `$a`, 1 AS b) q")
+      checkAnswer(spark.table("t"), Row(Row("a", 1)) :: Nil)
+      spark.sql("CREATE VIEW v AS SELECT STRUCT('a' AS `a`, 1 AS b) q")
+      checkAnswer(spark.table("t"), Row(Row("a", 1)) :: Nil)
+    }
+  }
 }
 
 abstract class DDLSuite extends QueryTest with SQLTestUtils {
@@ -820,6 +835,31 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("rename temporary view - destination table with database name,with:CREATE TEMPORARY view") {
+    withTempView("view1") {
+      sql(
+        """
+          |CREATE TEMPORARY VIEW view1
+          |USING org.apache.spark.sql.sources.DDLScanSource
+          |OPTIONS (
+          |  From '1',
+          |  To '10',
+          |  Table 'test1'
+          |)
+        """.stripMargin)
+
+      val e = intercept[AnalysisException] {
+        sql("ALTER TABLE view1 RENAME TO default.tab2")
+      }
+      assert(e.getMessage.contains(
+        "RENAME TEMPORARY VIEW from '`view1`' to '`default`.`tab2`': " +
+          "cannot specify database name 'default' in the destination table"))
+
+      val catalog = spark.sessionState.catalog
+      assert(catalog.listTables("default") == Seq(TableIdentifier("view1")))
+    }
+  }
+
   test("rename temporary view") {
     withTempView("tab1", "tab2") {
       spark.range(10).createOrReplaceTempView("tab1")
@@ -868,6 +908,42 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("rename temporary view - destination table already exists, with: CREATE TEMPORARY view") {
+    withTempView("view1", "view2") {
+      sql(
+        """
+          |CREATE TEMPORARY VIEW view1
+          |USING org.apache.spark.sql.sources.DDLScanSource
+          |OPTIONS (
+          |  From '1',
+          |  To '10',
+          |  Table 'test1'
+          |)
+        """.stripMargin)
+
+      sql(
+        """
+          |CREATE TEMPORARY VIEW view2
+          |USING org.apache.spark.sql.sources.DDLScanSource
+          |OPTIONS (
+          |  From '1',
+          |  To '10',
+          |  Table 'test1'
+          |)
+        """.stripMargin)
+
+      val e = intercept[AnalysisException] {
+        sql("ALTER TABLE view1 RENAME TO view2")
+      }
+      assert(e.getMessage.contains(
+        "RENAME TEMPORARY VIEW from '`view1`' to '`view2`': destination table already exists"))
+
+      val catalog = spark.sessionState.catalog
+      assert(catalog.listTables("default") ==
+        Seq(TableIdentifier("view1"), TableIdentifier("view2")))
+    }
+  }
+
   test("alter table: bucketing is not supported") {
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
@@ -976,6 +1052,10 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       sql("SHOW DATABASES LIKE '*db1A'"),
       Row("showdb1a") :: Nil)
 
+    checkAnswer(
+      sql("SHOW DATABASES '*db1A'"),
+      Row("showdb1a") :: Nil)
+
     checkAnswer(
       sql("SHOW DATABASES LIKE 'showdb1A'"),
       Row("showdb1a") :: Nil)
@@ -1709,12 +1789,22 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   test("block creating duplicate temp table") {
-    withView("t_temp") {
+    withTempView("t_temp") {
       sql("CREATE TEMPORARY VIEW t_temp AS SELECT 1, 2")
       val e = intercept[TempTableAlreadyExistsException] {
         sql("CREATE TEMPORARY TABLE t_temp (c3 int, c4 string) USING JSON")
       }.getMessage
-      assert(e.contains("Temporary table 't_temp' already exists"))
+      assert(e.contains("Temporary view 't_temp' already exists"))
+    }
+  }
+
+  test("block creating duplicate temp view") {
+    withTempView("t_temp") {
+      sql("CREATE TEMPORARY VIEW t_temp AS SELECT 1, 2")
+      val e = intercept[TempTableAlreadyExistsException] {
+        sql("CREATE TEMPORARY VIEW t_temp (c3 int, c4 string) USING JSON")
+      }.getMessage
+      assert(e.contains("Temporary view 't_temp' already exists"))
     }
   }
 
@@ -1956,8 +2046,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
           s"""
              |CREATE TABLE t(a int, b int, c int, d int)
              |USING parquet
-             |PARTITIONED BY(a, b)
              |LOCATION "${dir.toURI}"
+             |PARTITIONED BY(a, b)
            """.stripMargin)
         spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 3, 4")
         checkAnswer(spark.table("t"), Row(3, 4, 1, 2) :: Nil)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala
index caf03885e3873..c1f2c18d1417d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources
 import java.io.{File, FilenameFilter}
 
 import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.test.SharedSQLContext
 
 class HadoopFsRelationSuite extends QueryTest with SharedSQLContext {
@@ -39,4 +40,44 @@ class HadoopFsRelationSuite extends QueryTest with SharedSQLContext {
       assert(df.queryExecution.logical.stats.sizeInBytes === BigInt(totalSize))
     }
   }
+
+  test("SPARK-22790: spark.sql.sources.compressionFactor takes effect") {
+    import testImplicits._
+    Seq(1.0, 0.5).foreach { compressionFactor =>
+      withSQLConf("spark.sql.sources.fileCompressionFactor" -> compressionFactor.toString,
+        "spark.sql.autoBroadcastJoinThreshold" -> "400") {
+        withTempPath { workDir =>
+          // the file size is 740 bytes
+          val workDirPath = workDir.getAbsolutePath
+          val data1 = Seq(100, 200, 300, 400).toDF("count")
+          data1.write.parquet(workDirPath + "/data1")
+          val df1FromFile = spark.read.parquet(workDirPath + "/data1")
+          val data2 = Seq(100, 200, 300, 400).toDF("count")
+          data2.write.parquet(workDirPath + "/data2")
+          val df2FromFile = spark.read.parquet(workDirPath + "/data2")
+          val joinedDF = df1FromFile.join(df2FromFile, Seq("count"))
+          if (compressionFactor == 0.5) {
+            val bJoinExec = joinedDF.queryExecution.executedPlan.collect {
+              case bJoin: BroadcastHashJoinExec => bJoin
+            }
+            assert(bJoinExec.nonEmpty)
+            val smJoinExec = joinedDF.queryExecution.executedPlan.collect {
+              case smJoin: SortMergeJoinExec => smJoin
+            }
+            assert(smJoinExec.isEmpty)
+          } else {
+            // compressionFactor is 1.0
+            val bJoinExec = joinedDF.queryExecution.executedPlan.collect {
+              case bJoin: BroadcastHashJoinExec => bJoin
+            }
+            assert(bJoinExec.isEmpty)
+            val smJoinExec = joinedDF.queryExecution.executedPlan.collect {
+              case smJoin: SortMergeJoinExec => smJoin
+            }
+            assert(smJoinExec.nonEmpty)
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index e439699605abb..4398e547d9217 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -482,19 +482,53 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
     }
   }
 
+  test("save csv with quote escaping, using charToEscapeQuoteEscaping option") {
+    withTempPath { path =>
+
+      // original text
+      val df1 = Seq(
+        """You are "beautiful"""",
+        """Yes, \"in the inside"\"""
+      ).toDF()
+
+      // text written in CSV with following options:
+      // quote character: "
+      // escape character: \
+      // character to escape quote escaping: #
+      val df2 = Seq(
+        """"You are \"beautiful\""""",
+        """"Yes, #\\"in the inside\"#\""""
+      ).toDF()
+
+      df2.coalesce(1).write.text(path.getAbsolutePath)
+
+      val df3 = spark.read
+        .format("csv")
+        .option("quote", "\"")
+        .option("escape", "\\")
+        .option("charToEscapeQuoteEscaping", "#")
+        .load(path.getAbsolutePath)
+
+      checkAnswer(df1, df3)
+    }
+  }
+
   test("commented lines in CSV data") {
-    val results = spark.read
-      .format("csv")
-      .options(Map("comment" -> "~", "header" -> "false"))
-      .load(testFile(commentsFile))
-      .collect()
+    Seq("false", "true").foreach { multiLine =>
 
-    val expected =
-      Seq(Seq("1", "2", "3", "4", "5.01", "2015-08-20 15:57:00"),
-        Seq("6", "7", "8", "9", "0", "2015-08-21 16:58:01"),
-        Seq("1", "2", "3", "4", "5", "2015-08-23 18:00:42"))
+      val results = spark.read
+        .format("csv")
+        .options(Map("comment" -> "~", "header" -> "false", "multiLine" -> multiLine))
+        .load(testFile(commentsFile))
+        .collect()
 
-    assert(results.toSeq.map(_.toSeq) === expected)
+      val expected =
+        Seq(Seq("1", "2", "3", "4", "5.01", "2015-08-20 15:57:00"),
+          Seq("6", "7", "8", "9", "0", "2015-08-21 16:58:01"),
+          Seq("1", "2", "3", "4", "5", "2015-08-23 18:00:42"))
+
+      assert(results.toSeq.map(_.toSeq) === expected)
+    }
   }
 
   test("inferring schema with commented lines in CSV data") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
similarity index 81%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index de6f0d67f1734..8680b86517b19 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -15,25 +15,32 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive.orc
+package org.apache.spark.sql.execution.datasources.orc
 
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 
 import scala.collection.JavaConverters._
 
-import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument}
+import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
 
-import org.apache.spark.sql.{Column, DataFrame, QueryTest}
+import org.apache.spark.sql.{Column, DataFrame}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types._
 
 /**
- * A test suite that tests ORC filter API based filter pushdown optimization.
+ * A test suite that tests Apache ORC filter API based filter pushdown optimization.
+ * OrcFilterSuite and HiveOrcFilterSuite is logically duplicated to provide the same test coverage.
+ * The difference are the packages containing 'Predicate' and 'SearchArgument' classes.
+ * - OrcFilterSuite uses 'org.apache.orc.storage.ql.io.sarg' package.
+ * - HiveOrcFilterSuite uses 'org.apache.hadoop.hive.ql.io.sarg' package.
  */
-class OrcFilterSuite extends QueryTest with OrcTest {
+class OrcFilterSuite extends OrcTest with SharedSQLContext {
+
   private def checkFilterPredicate(
       df: DataFrame,
       predicate: Predicate,
@@ -55,7 +62,7 @@ class OrcFilterSuite extends QueryTest with OrcTest {
       DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
     assert(selectedFilters.nonEmpty, "No filter is pushed down")
 
-    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters.toArray)
+    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters)
     assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $selectedFilters")
     checker(maybeFilter.get)
   }
@@ -99,7 +106,7 @@ class OrcFilterSuite extends QueryTest with OrcTest {
       DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
     assert(selectedFilters.nonEmpty, "No filter is pushed down")
 
-    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters.toArray)
+    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters)
     assert(maybeFilter.isEmpty, s"Could generate filter predicate for $selectedFilters")
   }
 
@@ -284,44 +291,55 @@ class OrcFilterSuite extends QueryTest with OrcTest {
 
   test("filter pushdown - combinations with logical operators") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
-      // Because `ExpressionTree` is not accessible at Hive 1.2.x, this should be checked
-      // in string form in order to check filter creation including logical operators
-      // such as `and`, `or` or `not`. So, this function uses `SearchArgument.toString()`
-      // to produce string expression and then compare it to given string expression below.
-      // This might have to be changed after Hive version is upgraded.
       checkFilterPredicate(
         '_1.isNotNull,
-        """leaf-0 = (IS_NULL _1)
-          |expr = (not leaf-0)""".stripMargin.trim
+        "leaf-0 = (IS_NULL _1), expr = (not leaf-0)"
       )
       checkFilterPredicate(
         '_1 =!= 1,
-        """leaf-0 = (IS_NULL _1)
-          |leaf-1 = (EQUALS _1 1)
-          |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
+        "leaf-0 = (IS_NULL _1), leaf-1 = (EQUALS _1 1), expr = (and (not leaf-0) (not leaf-1))"
       )
       checkFilterPredicate(
         !('_1 < 4),
-        """leaf-0 = (IS_NULL _1)
-          |leaf-1 = (LESS_THAN _1 4)
-          |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
+        "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 4), expr = (and (not leaf-0) (not leaf-1))"
       )
       checkFilterPredicate(
         '_1 < 2 || '_1 > 3,
-        """leaf-0 = (LESS_THAN _1 2)
-          |leaf-1 = (LESS_THAN_EQUALS _1 3)
-          |expr = (or leaf-0 (not leaf-1))""".stripMargin.trim
+        "leaf-0 = (LESS_THAN _1 2), leaf-1 = (LESS_THAN_EQUALS _1 3), " +
+          "expr = (or leaf-0 (not leaf-1))"
       )
       checkFilterPredicate(
         '_1 < 2 && '_1 > 3,
-        """leaf-0 = (IS_NULL _1)
-          |leaf-1 = (LESS_THAN _1 2)
-          |leaf-2 = (LESS_THAN_EQUALS _1 3)
-          |expr = (and (not leaf-0) leaf-1 (not leaf-2))""".stripMargin.trim
+        "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 2), leaf-2 = (LESS_THAN_EQUALS _1 3), " +
+          "expr = (and (not leaf-0) leaf-1 (not leaf-2))"
       )
     }
   }
 
+  test("filter pushdown - date") {
+    val dates = Seq("2017-08-18", "2017-08-19", "2017-08-20", "2017-08-21").map { day =>
+      Date.valueOf(day)
+    }
+    withOrcDataFrame(dates.map(Tuple1(_))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === dates(0), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < dates(1), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= dates(3), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(dates(0)) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(dates(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(dates(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(dates(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(dates(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(dates(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
   test("no filter pushdown - non-supported types") {
     implicit class IntToBinary(int: Int) {
       def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8)
@@ -334,14 +352,35 @@ class OrcFilterSuite extends QueryTest with OrcTest {
     withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
       checkNoFilterPredicate('_1 <=> 1.b)
     }
-    // DateType
-    val stringDate = "2015-01-01"
-    withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df =>
-      checkNoFilterPredicate('_1 === Date.valueOf(stringDate))
-    }
     // MapType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
       checkNoFilterPredicate('_1.isNotNull)
     }
   }
+
+  test("SPARK-12218 Converting conjunctions into ORC SearchArguments") {
+    import org.apache.spark.sql.sources._
+    // The `LessThan` should be converted while the `StringContains` shouldn't
+    val schema = new StructType(
+      Array(
+        StructField("a", IntegerType, nullable = true),
+        StructField("b", StringType, nullable = true)))
+    assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
+      OrcFilters.createFilter(schema, Array(
+        LessThan("a", 10),
+        StringContains("b", "prefix")
+      )).get.toString
+    }
+
+    // The `LessThan` should be converted while the whole inner `And` shouldn't
+    assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
+      OrcFilters.createFilter(schema, Array(
+        LessThan("a", 10),
+        Not(And(
+          GreaterThan("a", 1),
+          StringContains("b", "prefix")
+        ))
+      )).get.toString
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
similarity index 82%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
index d1ce3f1e2f058..d1911ea7f32a9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
@@ -15,19 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive.orc
+package org.apache.spark.sql.execution.datasources.orc
 
 import java.io.File
 
-import scala.reflect.ClassTag
-import scala.reflect.runtime.universe.TypeTag
-
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.sql._
-import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.util.Utils
+import org.apache.spark.sql.test.SharedSQLContext
 
 // The data where the partitioning key exists only in the directory structure.
 case class OrcParData(intField: Int, stringField: String)
@@ -35,28 +28,8 @@ case class OrcParData(intField: Int, stringField: String)
 // The data that also includes the partitioning key
 case class OrcParDataWithKey(intField: Int, pi: Int, stringField: String, ps: String)
 
-// TODO This test suite duplicates ParquetPartitionDiscoverySuite a lot
-class OrcPartitionDiscoverySuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll {
-  import spark._
-  import spark.implicits._
-
-  val defaultPartitionName = ConfVars.DEFAULTPARTITIONNAME.defaultStrVal
-
-  def withTempDir(f: File => Unit): Unit = {
-    val dir = Utils.createTempDir().getCanonicalFile
-    try f(dir) finally Utils.deleteRecursively(dir)
-  }
-
-  def makeOrcFile[T <: Product: ClassTag: TypeTag](
-      data: Seq[T], path: File): Unit = {
-    data.toDF().write.mode("overwrite").orc(path.getCanonicalPath)
-  }
-
-
-  def makeOrcFile[T <: Product: ClassTag: TypeTag](
-      df: DataFrame, path: File): Unit = {
-    df.write.mode("overwrite").orc(path.getCanonicalPath)
-  }
+abstract class OrcPartitionDiscoveryTest extends OrcTest {
+  val defaultPartitionName = "__HIVE_DEFAULT_PARTITION__"
 
   protected def withTempTable(tableName: String)(f: => Unit): Unit = {
     try f finally spark.catalog.dropTempView(tableName)
@@ -90,7 +63,7 @@ class OrcPartitionDiscoverySuite extends QueryTest with TestHiveSingleton with B
           makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
       }
 
-      read.orc(base.getCanonicalPath).createOrReplaceTempView("t")
+      spark.read.orc(base.getCanonicalPath).createOrReplaceTempView("t")
 
       withTempTable("t") {
         checkAnswer(
@@ -137,7 +110,7 @@ class OrcPartitionDiscoverySuite extends QueryTest with TestHiveSingleton with B
           makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
       }
 
-      read.orc(base.getCanonicalPath).createOrReplaceTempView("t")
+      spark.read.orc(base.getCanonicalPath).createOrReplaceTempView("t")
 
       withTempTable("t") {
         checkAnswer(
@@ -186,8 +159,8 @@ class OrcPartitionDiscoverySuite extends QueryTest with TestHiveSingleton with B
           makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
       }
 
-      read
-        .option(ConfVars.DEFAULTPARTITIONNAME.varname, defaultPartitionName)
+      spark.read
+        .option("hive.exec.default.partition.name", defaultPartitionName)
         .orc(base.getCanonicalPath)
         .createOrReplaceTempView("t")
 
@@ -228,8 +201,8 @@ class OrcPartitionDiscoverySuite extends QueryTest with TestHiveSingleton with B
           makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps))
       }
 
-      read
-        .option(ConfVars.DEFAULTPARTITIONNAME.varname, defaultPartitionName)
+      spark.read
+        .option("hive.exec.default.partition.name", defaultPartitionName)
         .orc(base.getCanonicalPath)
         .createOrReplaceTempView("t")
 
@@ -253,3 +226,4 @@ class OrcPartitionDiscoverySuite extends QueryTest with TestHiveSingleton with B
   }
 }
 
+class OrcPartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSQLContext
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
similarity index 66%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index 1fa9091f967a3..f58c331f33ca8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -15,24 +15,28 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive.orc
+package org.apache.spark.sql.execution.datasources.orc
 
+import java.io.File
 import java.nio.charset.StandardCharsets
 import java.sql.Timestamp
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hive.ql.io.orc.{OrcStruct, SparkOrcNewRecordReader}
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType}
+import org.apache.hadoop.mapreduce.lib.input.FileSplit
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+import org.apache.orc.{OrcConf, OrcFile}
 import org.apache.orc.OrcConf.COMPRESS
-import org.scalatest.BeforeAndAfterAll
+import org.apache.orc.mapred.OrcStruct
+import org.apache.orc.mapreduce.OrcInputFormat
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
-import org.apache.spark.sql.execution.datasources.{LogicalRelation, RecordReaderIterator}
-import org.apache.spark.sql.hive.HiveUtils
-import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, RecordReaderIterator}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{IntegerType, StructType}
 import org.apache.spark.util.Utils
 
@@ -57,7 +61,8 @@ case class Contact(name: String, phone: String)
 
 case class Person(name: String, age: Int, contacts: Seq[Contact])
 
-class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
+abstract class OrcQueryTest extends OrcTest {
+  import testImplicits._
 
   test("Read/write All Types") {
     val data = (0 to 255).map { i =>
@@ -73,7 +78,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
 
   test("Read/write binary data") {
     withOrcFile(BinaryData("test".getBytes(StandardCharsets.UTF_8)) :: Nil) { file =>
-      val bytes = read.orc(file).head().getAs[Array[Byte]](0)
+      val bytes = spark.read.orc(file).head().getAs[Array[Byte]](0)
       assert(new String(bytes, StandardCharsets.UTF_8) === "test")
     }
   }
@@ -91,7 +96,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
 
     withOrcFile(data) { file =>
       checkAnswer(
-        read.orc(file),
+        spark.read.orc(file),
         data.toDF().collect())
     }
   }
@@ -172,7 +177,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
 
     withOrcFile(data) { file =>
       checkAnswer(
-        read.orc(file),
+        spark.read.orc(file),
         Row(Seq.fill(5)(null): _*))
     }
   }
@@ -183,9 +188,13 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
       spark.range(0, 10).write
         .option(COMPRESS.getAttribute, "ZLIB")
         .orc(file.getCanonicalPath)
-      val expectedCompressionKind =
-        OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
-      assert("ZLIB" === expectedCompressionKind.name())
+
+      val maybeOrcFile = file.listFiles().find(_.getName.endsWith(".zlib.orc"))
+      assert(maybeOrcFile.isDefined)
+
+      val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
+      val conf = OrcFile.readerOptions(new Configuration())
+      assert("ZLIB" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
     }
 
     // `compression` overrides `orc.compress`.
@@ -194,9 +203,13 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
         .option("compression", "ZLIB")
         .option(COMPRESS.getAttribute, "SNAPPY")
         .orc(file.getCanonicalPath)
-      val expectedCompressionKind =
-        OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
-      assert("ZLIB" === expectedCompressionKind.name())
+
+      val maybeOrcFile = file.listFiles().find(_.getName.endsWith(".zlib.orc"))
+      assert(maybeOrcFile.isDefined)
+
+      val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
+      val conf = OrcFile.readerOptions(new Configuration())
+      assert("ZLIB" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
     }
   }
 
@@ -206,39 +219,39 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
       spark.range(0, 10).write
         .option("compression", "ZLIB")
         .orc(file.getCanonicalPath)
-      val expectedCompressionKind =
-        OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
-      assert("ZLIB" === expectedCompressionKind.name())
+
+      val maybeOrcFile = file.listFiles().find(_.getName.endsWith(".zlib.orc"))
+      assert(maybeOrcFile.isDefined)
+
+      val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
+      val conf = OrcFile.readerOptions(new Configuration())
+      assert("ZLIB" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
     }
 
     withTempPath { file =>
       spark.range(0, 10).write
         .option("compression", "SNAPPY")
         .orc(file.getCanonicalPath)
-      val expectedCompressionKind =
-        OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
-      assert("SNAPPY" === expectedCompressionKind.name())
+
+      val maybeOrcFile = file.listFiles().find(_.getName.endsWith(".snappy.orc"))
+      assert(maybeOrcFile.isDefined)
+
+      val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
+      val conf = OrcFile.readerOptions(new Configuration())
+      assert("SNAPPY" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
     }
 
     withTempPath { file =>
       spark.range(0, 10).write
         .option("compression", "NONE")
         .orc(file.getCanonicalPath)
-      val expectedCompressionKind =
-        OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
-      assert("NONE" === expectedCompressionKind.name())
-    }
-  }
 
-  // Following codec is not supported in Hive 1.2.1, ignore it now
-  ignore("LZO compression options for writing to an ORC file not supported in Hive 1.2.1") {
-    withTempPath { file =>
-      spark.range(0, 10).write
-        .option("compression", "LZO")
-        .orc(file.getCanonicalPath)
-      val expectedCompressionKind =
-        OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
-      assert("LZO" === expectedCompressionKind.name())
+      val maybeOrcFile = file.listFiles().find(_.getName.endsWith(".orc"))
+      assert(maybeOrcFile.isDefined)
+
+      val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
+      val conf = OrcFile.readerOptions(new Configuration())
+      assert("NONE" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
     }
   }
 
@@ -256,22 +269,28 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
 
   test("appending") {
     val data = (0 until 10).map(i => (i, i.toString))
-    createDataFrame(data).toDF("c1", "c2").createOrReplaceTempView("tmp")
+    spark.createDataFrame(data).toDF("c1", "c2").createOrReplaceTempView("tmp")
     withOrcTable(data, "t") {
       sql("INSERT INTO TABLE t SELECT * FROM tmp")
-      checkAnswer(table("t"), (data ++ data).map(Row.fromTuple))
+      checkAnswer(spark.table("t"), (data ++ data).map(Row.fromTuple))
     }
-    sessionState.catalog.dropTable(TableIdentifier("tmp"), ignoreIfNotExists = true, purge = false)
+    spark.sessionState.catalog.dropTable(
+      TableIdentifier("tmp"),
+      ignoreIfNotExists = true,
+      purge = false)
   }
 
   test("overwriting") {
     val data = (0 until 10).map(i => (i, i.toString))
-    createDataFrame(data).toDF("c1", "c2").createOrReplaceTempView("tmp")
+    spark.createDataFrame(data).toDF("c1", "c2").createOrReplaceTempView("tmp")
     withOrcTable(data, "t") {
       sql("INSERT OVERWRITE TABLE t SELECT * FROM tmp")
-      checkAnswer(table("t"), data.map(Row.fromTuple))
+      checkAnswer(spark.table("t"), data.map(Row.fromTuple))
     }
-    sessionState.catalog.dropTable(TableIdentifier("tmp"), ignoreIfNotExists = true, purge = false)
+    spark.sessionState.catalog.dropTable(
+      TableIdentifier("tmp"),
+      ignoreIfNotExists = true,
+      purge = false)
   }
 
   test("self-join") {
@@ -334,60 +353,16 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
-      spark.range(0, 10).select('id as "Acol").write.format("orc").save(path)
-      spark.read.format("orc").load(path).schema("Acol")
+      spark.range(0, 10).select('id as "Acol").write.orc(path)
+      spark.read.orc(path).schema("Acol")
       intercept[IllegalArgumentException] {
-        spark.read.format("orc").load(path).schema("acol")
+        spark.read.orc(path).schema("acol")
       }
-      checkAnswer(spark.read.format("orc").load(path).select("acol").sort("acol"),
+      checkAnswer(spark.read.orc(path).select("acol").sort("acol"),
         (0 until 10).map(Row(_)))
     }
   }
 
-  test("SPARK-8501: Avoids discovery schema from empty ORC files") {
-    withTempPath { dir =>
-      val path = dir.getCanonicalPath
-
-      withTable("empty_orc") {
-        withTempView("empty", "single") {
-          spark.sql(
-            s"""CREATE TABLE empty_orc(key INT, value STRING)
-               |STORED AS ORC
-               |LOCATION '${dir.toURI}'
-             """.stripMargin)
-
-          val emptyDF = Seq.empty[(Int, String)].toDF("key", "value").coalesce(1)
-          emptyDF.createOrReplaceTempView("empty")
-
-          // This creates 1 empty ORC file with Hive ORC SerDe.  We are using this trick because
-          // Spark SQL ORC data source always avoids write empty ORC files.
-          spark.sql(
-            s"""INSERT INTO TABLE empty_orc
-               |SELECT key, value FROM empty
-             """.stripMargin)
-
-          val errorMessage = intercept[AnalysisException] {
-            spark.read.orc(path)
-          }.getMessage
-
-          assert(errorMessage.contains("Unable to infer schema for ORC"))
-
-          val singleRowDF = Seq((0, "foo")).toDF("key", "value").coalesce(1)
-          singleRowDF.createOrReplaceTempView("single")
-
-          spark.sql(
-            s"""INSERT INTO TABLE empty_orc
-               |SELECT key, value FROM single
-             """.stripMargin)
-
-          val df = spark.read.orc(path)
-          assert(df.schema === singleRowDF.schema.asNullable)
-          checkAnswer(df, singleRowDF)
-        }
-      }
-    }
-  }
-
   test("SPARK-10623 Enable ORC PPD") {
     withTempPath { dir =>
       withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true") {
@@ -405,7 +380,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
         }
         // It needs to repartition data so that we can have several ORC files
         // in order to skip stripes in ORC.
-        createDataFrame(data).toDF("a", "b").repartition(10).write.orc(path)
+        spark.createDataFrame(data).toDF("a", "b").repartition(10).write.orc(path)
         val df = spark.read.orc(path)
 
         def checkPredicate(pred: Column, answer: Seq[Row]): Unit = {
@@ -440,77 +415,6 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
     }
   }
 
-  test("Verify the ORC conversion parameter: CONVERT_METASTORE_ORC") {
-    withTempView("single") {
-      val singleRowDF = Seq((0, "foo")).toDF("key", "value")
-      singleRowDF.createOrReplaceTempView("single")
-
-      Seq("true", "false").foreach { orcConversion =>
-        withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> orcConversion) {
-          withTable("dummy_orc") {
-            withTempPath { dir =>
-              val path = dir.getCanonicalPath
-              spark.sql(
-                s"""
-                   |CREATE TABLE dummy_orc(key INT, value STRING)
-                   |STORED AS ORC
-                   |LOCATION '${dir.toURI}'
-                 """.stripMargin)
-
-              spark.sql(
-                s"""
-                   |INSERT INTO TABLE dummy_orc
-                   |SELECT key, value FROM single
-                 """.stripMargin)
-
-              val df = spark.sql("SELECT * FROM dummy_orc WHERE key=0")
-              checkAnswer(df, singleRowDF)
-
-              val queryExecution = df.queryExecution
-              if (orcConversion == "true") {
-                queryExecution.analyzed.collectFirst {
-                  case _: LogicalRelation => ()
-                }.getOrElse {
-                  fail(s"Expecting the query plan to convert orc to data sources, " +
-                    s"but got:\n$queryExecution")
-                }
-              } else {
-                queryExecution.analyzed.collectFirst {
-                  case _: HiveTableRelation => ()
-                }.getOrElse {
-                  fail(s"Expecting no conversion from orc to data sources, " +
-                    s"but got:\n$queryExecution")
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  test("converted ORC table supports resolving mixed case field") {
-    withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") {
-      withTable("dummy_orc") {
-        withTempPath { dir =>
-          val df = spark.range(5).selectExpr("id", "id as valueField", "id as partitionValue")
-          df.write
-            .partitionBy("partitionValue")
-            .mode("overwrite")
-            .orc(dir.getAbsolutePath)
-
-          spark.sql(s"""
-            |create external table dummy_orc (id long, valueField long)
-            |partitioned by (partitionValue int)
-            |stored as orc
-            |location "${dir.toURI}"""".stripMargin)
-          spark.sql(s"msck repair table dummy_orc")
-          checkAnswer(spark.sql("select * from dummy_orc"), df)
-        }
-      }
-    }
-  }
-
   test("SPARK-14962 Produce correct results on array type with isnotnull") {
     withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true") {
       val data = (0 until 10).map(i => Tuple1(Array(i)))
@@ -544,7 +448,8 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
       withTempPath { file =>
         // It needs to repartition data so that we can have several ORC files
         // in order to skip stripes in ORC.
-        createDataFrame(data).toDF("a").repartition(10).write.orc(file.getCanonicalPath)
+        spark.createDataFrame(data).toDF("a").repartition(10)
+          .write.orc(file.getCanonicalPath)
         val df = spark.read.orc(file.getCanonicalPath).where("a == 2")
         val actual = stripSparkFilter(df).count()
 
@@ -563,7 +468,8 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
       withTempPath { file =>
         // It needs to repartition data so that we can have several ORC files
         // in order to skip stripes in ORC.
-        createDataFrame(data).toDF("a").repartition(10).write.orc(file.getCanonicalPath)
+        spark.createDataFrame(data).toDF("a").repartition(10)
+          .write.orc(file.getCanonicalPath)
         val df = spark.read.orc(file.getCanonicalPath).where(s"a == '$timeString'")
         val actual = stripSparkFilter(df).count()
 
@@ -596,14 +502,18 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
   test("Empty schema does not read data from ORC file") {
     val data = Seq((1, 1), (2, 2))
     withOrcFile(data) { path =>
-      val requestedSchema = StructType(Nil)
       val conf = new Configuration()
-      val physicalSchema = OrcFileOperator.readSchema(Seq(path), Some(conf)).get
-      OrcFileFormat.setRequiredColumns(conf, physicalSchema, requestedSchema)
-      val maybeOrcReader = OrcFileOperator.getFileReader(path, Some(conf))
-      assert(maybeOrcReader.isDefined)
-      val orcRecordReader = new SparkOrcNewRecordReader(
-        maybeOrcReader.get, conf, 0, maybeOrcReader.get.getContentLength)
+      conf.set(OrcConf.INCLUDE_COLUMNS.getAttribute, "")
+      conf.setBoolean("hive.io.file.read.all.columns", false)
+
+      val orcRecordReader = {
+        val file = new File(path).listFiles().find(_.getName.endsWith(".snappy.orc")).head
+        val split = new FileSplit(new Path(file.toURI), 0, file.length, Array.empty[String])
+        val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
+        val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
+        val oif = new OrcInputFormat[OrcStruct]
+        oif.createRecordReader(split, hadoopAttemptContext)
+      }
 
       val recordsIterator = new RecordReaderIterator[OrcStruct](orcRecordReader)
       try {
@@ -614,11 +524,135 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
     }
   }
 
-   test("read from multiple orc input paths") {
-     val path1 = Utils.createTempDir()
-     val path2 = Utils.createTempDir()
-     makeOrcFile((1 to 10).map(Tuple1.apply), path1)
-     makeOrcFile((1 to 10).map(Tuple1.apply), path2)
-     assertResult(20)(read.orc(path1.getCanonicalPath, path2.getCanonicalPath).count())
-   }
+  test("read from multiple orc input paths") {
+    val path1 = Utils.createTempDir()
+    val path2 = Utils.createTempDir()
+    makeOrcFile((1 to 10).map(Tuple1.apply), path1)
+    makeOrcFile((1 to 10).map(Tuple1.apply), path2)
+    val df = spark.read.orc(path1.getCanonicalPath, path2.getCanonicalPath)
+    assert(df.count() == 20)
+  }
+
+  test("Enabling/disabling ignoreCorruptFiles") {
+    def testIgnoreCorruptFiles(): Unit = {
+      withTempDir { dir =>
+        val basePath = dir.getCanonicalPath
+        spark.range(1).toDF("a").write.orc(new Path(basePath, "first").toString)
+        spark.range(1, 2).toDF("a").write.orc(new Path(basePath, "second").toString)
+        spark.range(2, 3).toDF("a").write.json(new Path(basePath, "third").toString)
+        val df = spark.read.orc(
+          new Path(basePath, "first").toString,
+          new Path(basePath, "second").toString,
+          new Path(basePath, "third").toString)
+        checkAnswer(df, Seq(Row(0), Row(1)))
+      }
+    }
+
+    def testIgnoreCorruptFilesWithoutSchemaInfer(): Unit = {
+      withTempDir { dir =>
+        val basePath = dir.getCanonicalPath
+        spark.range(1).toDF("a").write.orc(new Path(basePath, "first").toString)
+        spark.range(1, 2).toDF("a").write.orc(new Path(basePath, "second").toString)
+        spark.range(2, 3).toDF("a").write.json(new Path(basePath, "third").toString)
+        val df = spark.read.schema("a long").orc(
+          new Path(basePath, "first").toString,
+          new Path(basePath, "second").toString,
+          new Path(basePath, "third").toString)
+        checkAnswer(df, Seq(Row(0), Row(1)))
+      }
+    }
+
+    withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "true") {
+      testIgnoreCorruptFiles()
+      testIgnoreCorruptFilesWithoutSchemaInfer()
+    }
+
+    withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "false") {
+      val m1 = intercept[SparkException] {
+        testIgnoreCorruptFiles()
+      }.getMessage
+      assert(m1.contains("Could not read footer for file"))
+      val m2 = intercept[SparkException] {
+        testIgnoreCorruptFilesWithoutSchemaInfer()
+      }.getMessage
+      assert(m2.contains("Malformed ORC file"))
+    }
+  }
+}
+
+class OrcQuerySuite extends OrcQueryTest with SharedSQLContext {
+  import testImplicits._
+
+  test("LZO compression options for writing to an ORC file") {
+    withTempPath { file =>
+      spark.range(0, 10).write
+        .option("compression", "LZO")
+        .orc(file.getCanonicalPath)
+
+      val maybeOrcFile = file.listFiles().find(_.getName.endsWith(".lzo.orc"))
+      assert(maybeOrcFile.isDefined)
+
+      val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
+      val conf = OrcFile.readerOptions(new Configuration())
+      assert("LZO" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+    }
+  }
+
+  test("Schema discovery on empty ORC files") {
+    // SPARK-8501 is fixed.
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      withTable("empty_orc") {
+        withTempView("empty", "single") {
+          spark.sql(
+            s"""CREATE TABLE empty_orc(key INT, value STRING)
+               |USING ORC
+               |LOCATION '${dir.toURI}'
+             """.stripMargin)
+
+          val emptyDF = Seq.empty[(Int, String)].toDF("key", "value").coalesce(1)
+          emptyDF.createOrReplaceTempView("empty")
+
+          // This creates 1 empty ORC file with ORC SerDe.  We are using this trick because
+          // Spark SQL ORC data source always avoids write empty ORC files.
+          spark.sql(
+            s"""INSERT INTO TABLE empty_orc
+               |SELECT key, value FROM empty
+             """.stripMargin)
+
+          val df = spark.read.orc(path)
+          assert(df.schema === emptyDF.schema.asNullable)
+          checkAnswer(df, emptyDF)
+        }
+      }
+    }
+  }
+
+  test("SPARK-21791 ORC should support column names with dot") {
+    withTempDir { dir =>
+      val path = new File(dir, "orc").getCanonicalPath
+      Seq(Some(1), None).toDF("col.dots").write.orc(path)
+      assert(spark.read.orc(path).collect().length == 2)
+    }
+  }
+
+  test("SPARK-20728 Make ORCFileFormat configurable between sql/hive and sql/core") {
+    withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> "hive") {
+      val e = intercept[AnalysisException] {
+        sql("CREATE TABLE spark_20728(a INT) USING ORC")
+      }
+      assert(e.message.contains("Hive built-in ORC data source must be used with Hive support"))
+    }
+
+    withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> "native") {
+      withTable("spark_20728") {
+        sql("CREATE TABLE spark_20728(a INT) USING ORC")
+        val fileFormat = sql("SELECT * FROM spark_20728").queryExecution.analyzed.collectFirst {
+          case l: LogicalRelation => l.relation.asInstanceOf[HadoopFsRelation].fileFormat.getClass
+        }
+        assert(fileFormat == Some(classOf[OrcFileFormat]))
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
similarity index 63%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index 2a086be57f517..6f5f2fd795f74 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive.orc
+package org.apache.spark.sql.execution.datasources.orc
 
 import java.io.File
 import java.util.Locale
@@ -23,50 +23,30 @@ import java.util.Locale
 import org.apache.orc.OrcConf.COMPRESS
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.sql.{QueryTest, Row}
-import org.apache.spark.sql.execution.datasources.orc.OrcOptions
-import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources._
-import org.apache.spark.sql.types._
+import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.util.Utils
 
 case class OrcData(intField: Int, stringField: String)
 
-abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll {
-  import spark._
+abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
+  import testImplicits._
 
   var orcTableDir: File = null
   var orcTableAsDir: File = null
 
-  override def beforeAll(): Unit = {
+  protected override def beforeAll(): Unit = {
     super.beforeAll()
 
     orcTableAsDir = Utils.createTempDir("orctests", "sparksql")
-
-    // Hack: to prepare orc data files using hive external tables
     orcTableDir = Utils.createTempDir("orctests", "sparksql")
-    import org.apache.spark.sql.hive.test.TestHive.implicits._
 
     sparkContext
       .makeRDD(1 to 10)
       .map(i => OrcData(i, s"part-$i"))
       .toDF()
-      .createOrReplaceTempView(s"orc_temp_table")
-
-    sql(
-      s"""CREATE EXTERNAL TABLE normal_orc(
-         |  intField INT,
-         |  stringField STRING
-         |)
-         |STORED AS ORC
-         |LOCATION '${orcTableAsDir.toURI}'
-       """.stripMargin)
-
-    sql(
-      s"""INSERT INTO TABLE normal_orc
-         |SELECT intField, stringField FROM orc_temp_table
-       """.stripMargin)
+      .createOrReplaceTempView("orc_temp_table")
   }
 
   test("create temporary orc table") {
@@ -152,56 +132,13 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA
   }
 
   test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
-    val conf = sqlContext.sessionState.conf
+    val conf = spark.sessionState.conf
     val option = new OrcOptions(Map(COMPRESS.getAttribute.toUpperCase(Locale.ROOT) -> "NONE"), conf)
     assert(option.compressionCodec == "NONE")
   }
 
-  test("SPARK-19459/SPARK-18220: read char/varchar column written by Hive") {
-    val location = Utils.createTempDir()
-    val uri = location.toURI
-    try {
-      hiveClient.runSqlHive("USE default")
-      hiveClient.runSqlHive(
-        """
-          |CREATE EXTERNAL TABLE hive_orc(
-          |  a STRING,
-          |  b CHAR(10),
-          |  c VARCHAR(10),
-          |  d ARRAY<CHAR(3)>)
-          |STORED AS orc""".stripMargin)
-      // Hive throws an exception if I assign the location in the create table statement.
-      hiveClient.runSqlHive(
-        s"ALTER TABLE hive_orc SET LOCATION '$uri'")
-      hiveClient.runSqlHive(
-        """
-          |INSERT INTO TABLE hive_orc
-          |SELECT 'a', 'b', 'c', ARRAY(CAST('d' AS CHAR(3)))
-          |FROM (SELECT 1) t""".stripMargin)
-
-      // We create a different table in Spark using the same schema which points to
-      // the same location.
-      spark.sql(
-        s"""
-           |CREATE EXTERNAL TABLE spark_orc(
-           |  a STRING,
-           |  b CHAR(10),
-           |  c VARCHAR(10),
-           |  d ARRAY<CHAR(3)>)
-           |STORED AS orc
-           |LOCATION '$uri'""".stripMargin)
-      val result = Row("a", "b         ", "c", Seq("d  "))
-      checkAnswer(spark.table("hive_orc"), result)
-      checkAnswer(spark.table("spark_orc"), result)
-    } finally {
-      hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc")
-      hiveClient.runSqlHive("DROP TABLE IF EXISTS spark_orc")
-      Utils.deleteRecursively(location)
-    }
-  }
-
   test("SPARK-21839: Add SQL config for ORC compression") {
-    val conf = sqlContext.sessionState.conf
+    val conf = spark.sessionState.conf
     // Test if the default of spark.sql.orc.compression.codec is snappy
     assert(new OrcOptions(Map.empty[String, String], conf).compressionCodec == "SNAPPY")
 
@@ -225,13 +162,28 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA
   }
 }
 
-class OrcSourceSuite extends OrcSuite {
-  override def beforeAll(): Unit = {
+class OrcSourceSuite extends OrcSuite with SharedSQLContext {
+
+  protected override def beforeAll(): Unit = {
     super.beforeAll()
 
+    sql(
+      s"""CREATE TABLE normal_orc(
+         |  intField INT,
+         |  stringField STRING
+         |)
+         |USING ORC
+         |LOCATION '${orcTableAsDir.toURI}'
+       """.stripMargin)
+
+    sql(
+      s"""INSERT INTO TABLE normal_orc
+         |SELECT intField, stringField FROM orc_temp_table
+       """.stripMargin)
+
     spark.sql(
       s"""CREATE TEMPORARY VIEW normal_orc_source
-         |USING org.apache.spark.sql.hive.orc
+         |USING ORC
          |OPTIONS (
          |  PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}'
          |)
@@ -239,43 +191,10 @@ class OrcSourceSuite extends OrcSuite {
 
     spark.sql(
       s"""CREATE TEMPORARY VIEW normal_orc_as_source
-         |USING org.apache.spark.sql.hive.orc
+         |USING ORC
          |OPTIONS (
          |  PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}'
          |)
        """.stripMargin)
   }
-
-  test("SPARK-12218 Converting conjunctions into ORC SearchArguments") {
-    // The `LessThan` should be converted while the `StringContains` shouldn't
-    val schema = new StructType(
-      Array(
-        StructField("a", IntegerType, nullable = true),
-        StructField("b", StringType, nullable = true)))
-    assertResult(
-      """leaf-0 = (LESS_THAN a 10)
-        |expr = leaf-0
-      """.stripMargin.trim
-    ) {
-      OrcFilters.createFilter(schema, Array(
-        LessThan("a", 10),
-        StringContains("b", "prefix")
-      )).get.toString
-    }
-
-    // The `LessThan` should be converted while the whole inner `And` shouldn't
-    assertResult(
-      """leaf-0 = (LESS_THAN a 10)
-        |expr = leaf-0
-      """.stripMargin.trim
-    ) {
-      OrcFilters.createFilter(schema, Array(
-        LessThan("a", 10),
-        Not(And(
-          GreaterThan("a", 1),
-          StringContains("b", "prefix")
-        ))
-      )).get.toString
-    }
-  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
similarity index 71%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcTest.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
index a2f08c5ba72c6..38b34a03e3e4c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
@@ -15,20 +15,51 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive.orc
+package org.apache.spark.sql.execution.datasources.orc
 
 import java.io.File
 
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
 
+import org.scalatest.BeforeAndAfterAll
+
 import org.apache.spark.sql._
-import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
 import org.apache.spark.sql.test.SQLTestUtils
 
-private[sql] trait OrcTest extends SQLTestUtils with TestHiveSingleton {
+/**
+ * OrcTest
+ *   -> OrcSuite
+ *       -> OrcSourceSuite
+ *       -> HiveOrcSourceSuite
+ *   -> OrcQueryTests
+ *       -> OrcQuerySuite
+ *       -> HiveOrcQuerySuite
+ *   -> OrcPartitionDiscoveryTest
+ *       -> OrcPartitionDiscoverySuite
+ *       -> HiveOrcPartitionDiscoverySuite
+ *   -> OrcFilterSuite
+ *   -> HiveOrcFilterSuite
+ */
+abstract class OrcTest extends QueryTest with SQLTestUtils with BeforeAndAfterAll {
   import testImplicits._
 
+  val orcImp: String = "native"
+
+  private var originalConfORCImplementation = "native"
+
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    originalConfORCImplementation = spark.conf.get(ORC_IMPLEMENTATION)
+    spark.conf.set(ORC_IMPLEMENTATION.key, orcImp)
+  }
+
+  protected override def afterAll(): Unit = {
+    spark.conf.set(ORC_IMPLEMENTATION.key, originalConfORCImplementation)
+    super.afterAll()
+  }
+
   /**
    * Writes `data` to a Orc file, which is then passed to `f` and will be deleted after `f`
    * returns.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala
new file mode 100644
index 0000000000000..ed8fd2b453456
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import java.io.File
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.Path
+import org.apache.parquet.hadoop.ParquetOutputFormat
+
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSQLContext
+
+class ParquetCompressionCodecPrecedenceSuite extends ParquetTest with SharedSQLContext {
+  test("Test `spark.sql.parquet.compression.codec` config") {
+    Seq("NONE", "UNCOMPRESSED", "SNAPPY", "GZIP", "LZO").foreach { c =>
+      withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> c) {
+        val expected = if (c == "NONE") "UNCOMPRESSED" else c
+        val option = new ParquetOptions(Map.empty[String, String], spark.sessionState.conf)
+        assert(option.compressionCodecClassName == expected)
+      }
+    }
+  }
+
+  test("[SPARK-21786] Test Acquiring 'compressionCodecClassName' for parquet in right order.") {
+    // When "compression" is configured, it should be the first choice.
+    withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> "snappy") {
+      val props = Map("compression" -> "uncompressed", ParquetOutputFormat.COMPRESSION -> "gzip")
+      val option = new ParquetOptions(props, spark.sessionState.conf)
+      assert(option.compressionCodecClassName == "UNCOMPRESSED")
+    }
+
+    // When "compression" is not configured, "parquet.compression" should be the preferred choice.
+    withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> "snappy") {
+      val props = Map(ParquetOutputFormat.COMPRESSION -> "gzip")
+      val option = new ParquetOptions(props, spark.sessionState.conf)
+      assert(option.compressionCodecClassName == "GZIP")
+    }
+
+    // When both "compression" and "parquet.compression" are not configured,
+    // spark.sql.parquet.compression.codec should be the right choice.
+    withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> "snappy") {
+      val props = Map.empty[String, String]
+      val option = new ParquetOptions(props, spark.sessionState.conf)
+      assert(option.compressionCodecClassName == "SNAPPY")
+    }
+  }
+
+  private def getTableCompressionCodec(path: String): Seq[String] = {
+    val hadoopConf = spark.sessionState.newHadoopConf()
+    val codecs = for {
+      footer <- readAllFootersWithoutSummaryFiles(new Path(path), hadoopConf)
+      block <- footer.getParquetMetadata.getBlocks.asScala
+      column <- block.getColumns.asScala
+    } yield column.getCodec.name()
+    codecs.distinct
+  }
+
+  private def createTableWithCompression(
+      tableName: String,
+      isPartitioned: Boolean,
+      compressionCodec: String,
+      rootDir: File): Unit = {
+    val options =
+      s"""
+        |OPTIONS('path'='${rootDir.toURI.toString.stripSuffix("/")}/$tableName',
+        |'parquet.compression'='$compressionCodec')
+       """.stripMargin
+    val partitionCreate = if (isPartitioned) "PARTITIONED BY (p)" else ""
+    sql(
+      s"""
+        |CREATE TABLE $tableName USING Parquet $options $partitionCreate
+        |AS SELECT 1 AS col1, 2 AS p
+       """.stripMargin)
+  }
+
+  private def checkCompressionCodec(compressionCodec: String, isPartitioned: Boolean): Unit = {
+    withTempDir { tmpDir =>
+      val tempTableName = "TempParquetTable"
+      withTable(tempTableName) {
+        createTableWithCompression(tempTableName, isPartitioned, compressionCodec, tmpDir)
+        val partitionPath = if (isPartitioned) "p=2" else ""
+        val path = s"${tmpDir.getPath.stripSuffix("/")}/$tempTableName/$partitionPath"
+        val realCompressionCodecs = getTableCompressionCodec(path)
+        assert(realCompressionCodecs.forall(_ == compressionCodec))
+      }
+    }
+  }
+
+  test("Create parquet table with compression") {
+    Seq(true, false).foreach { isPartitioned =>
+      Seq("UNCOMPRESSED", "SNAPPY", "GZIP").foreach { compressionCodec =>
+        checkCompressionCodec(compressionCodec, isPartitioned)
+      }
+    }
+  }
+
+  test("Create table with unknown compression") {
+    Seq(true, false).foreach { isPartitioned =>
+      val exception = intercept[IllegalArgumentException] {
+        checkCompressionCodec("aa", isPartitioned)
+      }
+      assert(exception.getMessage.contains("Codec [aa] is not available"))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
index 9dc56292c3720..24044e66e8468 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
@@ -19,7 +19,16 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import java.io.File
 
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
+import org.apache.parquet.CorruptStatistics
+import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER
+import org.apache.parquet.hadoop.ParquetFileReader
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
+
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 
 class ParquetInteroperabilitySuite extends ParquetCompatibilityTest with SharedSQLContext {
@@ -87,4 +96,118 @@ class ParquetInteroperabilitySuite extends ParquetCompatibilityTest with SharedS
           Row(Seq(2, 3))))
     }
   }
+
+  test("parquet timestamp conversion") {
+    // Make a table with one parquet file written by impala, and one parquet file written by spark.
+    // We should only adjust the timestamps in the impala file, and only if the conf is set
+    val impalaFile = "test-data/impala_timestamp.parq"
+
+    // here are the timestamps in the impala file, as they were saved by impala
+    val impalaFileData =
+      Seq(
+        "2001-01-01 01:01:01",
+        "2002-02-02 02:02:02",
+        "2003-03-03 03:03:03"
+      ).map(java.sql.Timestamp.valueOf)
+    val impalaPath = Thread.currentThread().getContextClassLoader.getResource(impalaFile)
+      .toURI.getPath
+    withTempPath { tableDir =>
+      val ts = Seq(
+        "2004-04-04 04:04:04",
+        "2005-05-05 05:05:05",
+        "2006-06-06 06:06:06"
+      ).map { s => java.sql.Timestamp.valueOf(s) }
+      import testImplicits._
+      withSQLConf(
+        (SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key,
+          SQLConf.ParquetOutputTimestampType.INT96.toString)) {
+        // match the column names of the file from impala
+        val df = spark.createDataset(ts).toDF().repartition(1).withColumnRenamed("value", "ts")
+        df.write.parquet(tableDir.getAbsolutePath)
+      }
+      FileUtils.copyFile(new File(impalaPath), new File(tableDir, "part-00001.parq"))
+
+      Seq(false, true).foreach { int96TimestampConversion =>
+        Seq(false, true).foreach { vectorized =>
+          withSQLConf(
+              (SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key,
+                SQLConf.ParquetOutputTimestampType.INT96.toString),
+              (SQLConf.PARQUET_INT96_TIMESTAMP_CONVERSION.key, int96TimestampConversion.toString()),
+              (SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, vectorized.toString())
+          ) {
+            val readBack = spark.read.parquet(tableDir.getAbsolutePath).collect()
+            assert(readBack.size === 6)
+            // if we apply the conversion, we'll get the "right" values, as saved by impala in the
+            // original file.  Otherwise, they're off by the local timezone offset, set to
+            // America/Los_Angeles in tests
+            val impalaExpectations = if (int96TimestampConversion) {
+              impalaFileData
+            } else {
+              impalaFileData.map { ts =>
+                DateTimeUtils.toJavaTimestamp(DateTimeUtils.convertTz(
+                  DateTimeUtils.fromJavaTimestamp(ts),
+                  DateTimeUtils.TimeZoneUTC,
+                  DateTimeUtils.getTimeZone(conf.sessionLocalTimeZone)))
+              }
+            }
+            val fullExpectations = (ts ++ impalaExpectations).map(_.toString).sorted.toArray
+            val actual = readBack.map(_.getTimestamp(0).toString).sorted
+            withClue(
+              s"int96TimestampConversion = $int96TimestampConversion; vectorized = $vectorized") {
+              assert(fullExpectations === actual)
+
+              // Now test that the behavior is still correct even with a filter which could get
+              // pushed down into parquet.  We don't need extra handling for pushed down
+              // predicates because (a) in ParquetFilters, we ignore TimestampType and (b) parquet
+              // does not read statistics from int96 fields, as they are unsigned.  See
+              // scalastyle:off line.size.limit
+              // https://github.com/apache/parquet-mr/blob/2fd62ee4d524c270764e9b91dca72e5cf1a005b7/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java#L419
+              // https://github.com/apache/parquet-mr/blob/2fd62ee4d524c270764e9b91dca72e5cf1a005b7/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java#L348
+              // scalastyle:on line.size.limit
+              //
+              // Just to be defensive in case anything ever changes in parquet, this test checks
+              // the assumption on column stats, and also the end-to-end behavior.
+
+              val hadoopConf = sparkContext.hadoopConfiguration
+              val fs = FileSystem.get(hadoopConf)
+              val parts = fs.listStatus(new Path(tableDir.getAbsolutePath), new PathFilter {
+                override def accept(path: Path): Boolean = !path.getName.startsWith("_")
+              })
+              // grab the meta data from the parquet file.  The next section of asserts just make
+              // sure the test is configured correctly.
+              assert(parts.size == 2)
+              parts.foreach { part =>
+                val oneFooter =
+                  ParquetFileReader.readFooter(hadoopConf, part.getPath, NO_FILTER)
+                assert(oneFooter.getFileMetaData.getSchema.getColumns.size === 1)
+                assert(oneFooter.getFileMetaData.getSchema.getColumns.get(0).getType() ===
+                  PrimitiveTypeName.INT96)
+                val oneBlockMeta = oneFooter.getBlocks().get(0)
+                val oneBlockColumnMeta = oneBlockMeta.getColumns().get(0)
+                val columnStats = oneBlockColumnMeta.getStatistics
+                // This is the important assert.  Column stats are written, but they are ignored
+                // when the data is read back as mentioned above, b/c int96 is unsigned.  This
+                // assert makes sure this holds even if we change parquet versions (if eg. there
+                // were ever statistics even on unsigned columns).
+
+                // Note: This is not true in palantir/parquet-mr and statistics are always returned
+                // and they are always unsigned.
+                assert(!(oneFooter.getFileMetaData.getCreatedBy.contains("impala") ^
+                  columnStats.isEmpty))
+              }
+
+              // These queries should return the entire dataset with the conversion applied,
+              // but if the predicates were applied to the raw values in parquet, they would
+              // incorrectly filter data out.
+              val query = spark.read.parquet(tableDir.getAbsolutePath)
+                .where("ts > '2001-01-01 01:00:00'")
+              val countWithFilter = query.count()
+              val exp = if (int96TimestampConversion) 6 else 5
+              assert(countWithFilter === exp, query)
+            }
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index a48312f546e3d..9207a8067387a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -322,14 +322,27 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
           new Path(basePath, "first").toString,
           new Path(basePath, "second").toString,
           new Path(basePath, "third").toString)
-        checkAnswer(
-          df,
-          Seq(Row(0), Row(1)))
+        checkAnswer(df, Seq(Row(0), Row(1)))
+      }
+    }
+
+    def testIgnoreCorruptFilesWithoutSchemaInfer(): Unit = {
+      withTempDir { dir =>
+        val basePath = dir.getCanonicalPath
+        spark.range(1).toDF("a").write.parquet(new Path(basePath, "first").toString)
+        spark.range(1, 2).toDF("a").write.parquet(new Path(basePath, "second").toString)
+        spark.range(2, 3).toDF("a").write.json(new Path(basePath, "third").toString)
+        val df = spark.read.schema("a long").parquet(
+          new Path(basePath, "first").toString,
+          new Path(basePath, "second").toString,
+          new Path(basePath, "third").toString)
+        checkAnswer(df, Seq(Row(0), Row(1)))
       }
     }
 
     withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "true") {
       testIgnoreCorruptFiles()
+      testIgnoreCorruptFilesWithoutSchemaInfer()
     }
 
     withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "false") {
@@ -337,6 +350,10 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
         testIgnoreCorruptFiles()
       }
       assert(exception.getMessage().contains("is not a Parquet file"))
+      val exception2 = intercept[SparkException] {
+        testIgnoreCorruptFilesWithoutSchemaInfer()
+      }
+      assert(exception2.getMessage().contains("is not a Parquet file"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
index cb7393cdd2b9d..33287044f279e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
@@ -185,10 +185,9 @@ class TextSuite extends QueryTest with SharedSQLContext {
     val data = df.collect()
     assert(data(0) == Row("This is a test file for the text data source"))
     assert(data(1) == Row("1+1"))
-    // non ascii characters are not allowed in the code, so we disable the scalastyle here.
-    // scalastyle:off
+    // scalastyle:off nonascii
     assert(data(2) == Row("数据砖头"))
-    // scalastyle:on
+    // scalastyle:on nonascii
     assert(data(3) == Row("\"doh\""))
     assert(data.length == 4)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala
new file mode 100644
index 0000000000000..8bd736bee69de
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.text
+
+import java.io.File
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.{StringType, StructType}
+
+class WholeTextFileSuite extends QueryTest with SharedSQLContext {
+
+  // Hadoop's FileSystem caching does not use the Configuration as part of its cache key, which
+  // can cause Filesystem.get(Configuration) to return a cached instance created with a different
+  // configuration than the one passed to get() (see HADOOP-8490 for more details). This caused
+  // hard-to-reproduce test failures, since any suites that were run after this one would inherit
+  // the new value of "fs.local.block.size" (see SPARK-5227 and SPARK-5679). To work around this,
+  // we disable FileSystem caching in this suite.
+  protected override def sparkConf =
+    super.sparkConf.set("spark.hadoop.fs.file.impl.disable.cache", "true")
+
+  private def testFile: String = {
+    Thread.currentThread().getContextClassLoader.getResource("test-data/text-suite.txt").toString
+  }
+
+  test("reading text file with option wholetext=true") {
+    val df = spark.read.option("wholetext", "true")
+      .format("text").load(testFile)
+    // schema
+    assert(df.schema == new StructType().add("value", StringType))
+
+    // verify content
+    val data = df.collect()
+    assert(data(0) ==
+      Row(
+        // scalastyle:off nonascii
+        """This is a test file for the text data source
+          |1+1
+          |数据砖头
+          |"doh"
+          |""".stripMargin))
+    // scalastyle:on nonascii
+    assert(data.length == 1)
+  }
+
+  test("correctness of wholetext option") {
+    import org.apache.spark.sql.catalyst.util._
+    withTempDir { dir =>
+      val file1 = new File(dir, "text1.txt")
+      stringToFile(file1,
+        """text file 1 contents.
+          |From: None to: ??
+        """.stripMargin)
+      val file2 = new File(dir, "text2.txt")
+      stringToFile(file2, "text file 2 contents.")
+      val file3 = new File(dir, "text3.txt")
+      stringToFile(file3, "text file 3 contents.")
+      val df = spark.read.option("wholetext", "true").text(dir.getAbsolutePath)
+      // Since wholetext option reads each file into a single row, df.length should be no. of files.
+      val data = df.sort("value").collect()
+      assert(data.length == 3)
+      // Each files should represent a single Row/element in Dataframe/Dataset
+      assert(data(0) == Row(
+        """text file 1 contents.
+          |From: None to: ??
+        """.stripMargin))
+      assert(data(1) == Row(
+        """text file 2 contents.""".stripMargin))
+      assert(data(2) == Row(
+        """text file 3 contents.""".stripMargin))
+    }
+  }
+
+
+  test("Correctness of wholetext option with gzip compression mode.") {
+    withTempDir { dir =>
+      val path = dir.getCanonicalPath
+      val df1 = spark.range(0, 1000).selectExpr("CAST(id AS STRING) AS s").repartition(1)
+      df1.write.option("compression", "gzip").mode("overwrite").text(path)
+      // On reading through wholetext mode, one file will be read as a single row, i.e. not
+      // delimited by "next line" character.
+      val expected = Row(Range(0, 1000).mkString("", "\n", "\n"))
+      Seq(10, 100, 1000).foreach { bytes =>
+        withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> bytes.toString) {
+          val df2 = spark.read.option("wholetext", "true").format("text").load(path)
+          val result = df2.collect().head
+          assert(result === expected)
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index a0fad862b44c7..0bcd54e1fceab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -22,7 +22,7 @@ import scala.reflect.ClassTag
 import org.apache.spark.AccumulatorSuite
 import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.{BitwiseAnd, BitwiseOr, Cast, Literal, ShiftLeft}
-import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.{BinaryExecNode, SparkPlan, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.exchange.EnsureRequirements
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -223,4 +223,114 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
     assert(HashJoin.rewriteKeyExpr(l :: ss :: Nil) === l :: ss :: Nil)
     assert(HashJoin.rewriteKeyExpr(i :: ss :: Nil) === i :: ss :: Nil)
   }
+
+  test("Shouldn't change broadcast join buildSide if user clearly specified") {
+
+    withTempView("t1", "t2") {
+      spark.createDataFrame(Seq((1, "4"), (2, "2"))).toDF("key", "value").createTempView("t1")
+      spark.createDataFrame(Seq((1, "1"), (2, "12.3"), (2, "123"))).toDF("key", "value")
+        .createTempView("t2")
+
+      val t1Size = spark.table("t1").queryExecution.analyzed.children.head.stats.sizeInBytes
+      val t2Size = spark.table("t2").queryExecution.analyzed.children.head.stats.sizeInBytes
+      assert(t1Size < t2Size)
+
+      // INNER JOIN && t1Size < t2Size => BuildLeft
+      assertJoinBuildSide(
+        "SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 JOIN t2 ON t1.key = t2.key", bh, BuildLeft)
+      // LEFT JOIN => BuildRight
+      assertJoinBuildSide(
+        "SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 LEFT JOIN t2 ON t1.key = t2.key", bh, BuildRight)
+      // RIGHT JOIN => BuildLeft
+      assertJoinBuildSide(
+        "SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 RIGHT JOIN t2 ON t1.key = t2.key", bh, BuildLeft)
+      // INNER JOIN && broadcast(t1) => BuildLeft
+      assertJoinBuildSide(
+        "SELECT /*+ MAPJOIN(t1) */ * FROM t1 JOIN t2 ON t1.key = t2.key", bh, BuildLeft)
+      // INNER JOIN && broadcast(t2) => BuildRight
+      assertJoinBuildSide(
+        "SELECT /*+ MAPJOIN(t2) */ * FROM t1 JOIN t2 ON t1.key = t2.key", bh, BuildRight)
+
+
+      withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
+        // INNER JOIN && t1Size < t2Size => BuildLeft
+        assertJoinBuildSide("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 JOIN t2", bl, BuildLeft)
+        // FULL JOIN && t1Size < t2Size => BuildLeft
+        assertJoinBuildSide("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 FULL JOIN t2", bl, BuildLeft)
+        // LEFT JOIN => BuildRight
+        assertJoinBuildSide("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 LEFT JOIN t2", bl, BuildRight)
+        // RIGHT JOIN => BuildLeft
+        assertJoinBuildSide("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 RIGHT JOIN t2", bl, BuildLeft)
+        // INNER JOIN && broadcast(t1) => BuildLeft
+        assertJoinBuildSide("SELECT /*+ MAPJOIN(t1) */ * FROM t1 JOIN t2", bl, BuildLeft)
+        // INNER JOIN && broadcast(t2) => BuildRight
+        assertJoinBuildSide("SELECT /*+ MAPJOIN(t2) */ * FROM t1 JOIN t2", bl, BuildRight)
+        // FULL OUTER && broadcast(t1) => BuildLeft
+        assertJoinBuildSide("SELECT /*+ MAPJOIN(t1) */ * FROM t1 FULL OUTER JOIN t2", bl, BuildLeft)
+        // FULL OUTER && broadcast(t2) => BuildRight
+        assertJoinBuildSide(
+          "SELECT /*+ MAPJOIN(t2) */ * FROM t1 FULL OUTER JOIN t2", bl, BuildRight)
+        // FULL OUTER && t1Size < t2Size => BuildLeft
+        assertJoinBuildSide("SELECT * FROM t1 FULL OUTER JOIN t2", bl, BuildLeft)
+      }
+    }
+  }
+
+  test("Shouldn't bias towards build right if user didn't specify") {
+
+    withTempView("t1", "t2") {
+      spark.createDataFrame(Seq((1, "4"), (2, "2"))).toDF("key", "value").createTempView("t1")
+      spark.createDataFrame(Seq((1, "1"), (2, "12.3"), (2, "123"))).toDF("key", "value")
+        .createTempView("t2")
+
+      val t1Size = spark.table("t1").queryExecution.analyzed.children.head.stats.sizeInBytes
+      val t2Size = spark.table("t2").queryExecution.analyzed.children.head.stats.sizeInBytes
+      assert(t1Size < t2Size)
+
+      assertJoinBuildSide("SELECT * FROM t1 JOIN t2 ON t1.key = t2.key", bh, BuildLeft)
+      assertJoinBuildSide("SELECT * FROM t2 JOIN t1 ON t1.key = t2.key", bh, BuildRight)
+
+      assertJoinBuildSide("SELECT * FROM t1 LEFT JOIN t2 ON t1.key = t2.key", bh, BuildRight)
+      assertJoinBuildSide("SELECT * FROM t2 LEFT JOIN t1 ON t1.key = t2.key", bh, BuildRight)
+
+      assertJoinBuildSide("SELECT * FROM t1 RIGHT JOIN t2 ON t1.key = t2.key", bh, BuildLeft)
+      assertJoinBuildSide("SELECT * FROM t2 RIGHT JOIN t1 ON t1.key = t2.key", bh, BuildLeft)
+
+      withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
+        assertJoinBuildSide("SELECT * FROM t1 FULL OUTER JOIN t2", bl, BuildLeft)
+        assertJoinBuildSide("SELECT * FROM t2 FULL OUTER JOIN t1", bl, BuildRight)
+
+        assertJoinBuildSide("SELECT * FROM t1 LEFT JOIN t2", bl, BuildRight)
+        assertJoinBuildSide("SELECT * FROM t2 LEFT JOIN t1", bl, BuildRight)
+
+        assertJoinBuildSide("SELECT * FROM t1 RIGHT JOIN t2", bl, BuildLeft)
+        assertJoinBuildSide("SELECT * FROM t2 RIGHT JOIN t1", bl, BuildLeft)
+      }
+    }
+  }
+
+  private val bh = BroadcastHashJoinExec.toString
+  private val bl = BroadcastNestedLoopJoinExec.toString
+
+  private def assertJoinBuildSide(sqlStr: String, joinMethod: String, buildSide: BuildSide): Any = {
+    val executedPlan = sql(sqlStr).queryExecution.executedPlan
+    executedPlan match {
+      case b: BroadcastNestedLoopJoinExec =>
+        assert(b.getClass.getSimpleName === joinMethod)
+        assert(b.buildSide === buildSide)
+      case b: BroadcastHashJoinExec =>
+        assert(b.getClass.getSimpleName === joinMethod)
+        assert(b.buildSide === buildSide)
+      case w: WholeStageCodegenExec =>
+        assert(w.children.head.getClass.getSimpleName === joinMethod)
+        if (w.children.head.isInstanceOf[BroadcastNestedLoopJoinExec]) {
+          assert(
+            w.children.head.asInstanceOf[BroadcastNestedLoopJoinExec].buildSide === buildSide)
+        } else if (w.children.head.isInstanceOf[BroadcastHashJoinExec]) {
+          assert(w.children.head.asInstanceOf[BroadcastHashJoinExec].buildSide === buildSide)
+        } else {
+          fail()
+        }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index d588af3e19dde..a3a3f3851e21c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -33,14 +33,6 @@ import org.apache.spark.util.{AccumulatorContext, JsonProtocol}
 class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with SharedSQLContext {
   import testImplicits._
 
-  private def statusStore: SQLAppStatusStore = {
-    new SQLAppStatusStore(sparkContext.statusStore.store)
-  }
-
-  private def currentExecutionIds(): Set[Long] = {
-    statusStore.executionsList.map(_.executionId).toSet
-  }
-
   /**
    * Generates a `DataFrame` by filling randomly generated bytes for hash collision.
    */
@@ -486,15 +478,22 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
       spark.range(10).write.parquet(dir)
       spark.read.parquet(dir).createOrReplaceTempView("pqS")
 
+      // The executed plan looks like:
+      // Exchange RoundRobinPartitioning(2)
+      // +- BroadcastNestedLoopJoin BuildLeft, Cross
+      //   :- BroadcastExchange IdentityBroadcastMode
+      //   :  +- Exchange RoundRobinPartitioning(3)
+      //   :     +- *Range (0, 30, step=1, splits=2)
+      //   +- *FileScan parquet [id#465L] Batched: true, Format: Parquet, Location: ...(ignored)
       val res3 = InputOutputMetricsHelper.run(
         spark.range(30).repartition(3).crossJoin(sql("select * from pqS")).repartition(2).toDF()
       )
       // The query above is executed in the following stages:
-      //   1. sql("select * from pqS")    => (10, 0, 10)
-      //   2. range(30)                   => (30, 0, 30)
-      //   3. crossJoin(...) of 1. and 2. => (0, 30, 300)
+      //   1. range(30)                   => (30, 0, 30)
+      //   2. sql("select * from pqS")    => (0, 30, 0)
+      //   3. crossJoin(...) of 1. and 2. => (10, 0, 300)
       //   4. shuffle & return results    => (0, 300, 0)
-      assert(res3 === (10L, 0L, 10L) :: (30L, 0L, 30L) :: (0L, 30L, 300L) :: (0L, 300L, 0L) :: Nil)
+      assert(res3 === (30L, 0L, 30L) :: (0L, 30L, 0L) :: (10L, 0L, 300L) :: (0L, 300L, 0L) :: Nil)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
index d89c4b14619fa..122d28798136f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -31,17 +31,14 @@ import org.apache.spark.util.Utils
 
 
 trait SQLMetricsTestUtils extends SQLTestUtils {
-
   import testImplicits._
 
-  private def statusStore: SQLAppStatusStore = {
-    new SQLAppStatusStore(sparkContext.statusStore.store)
-  }
-
-  private def currentExecutionIds(): Set[Long] = {
+  protected def currentExecutionIds(): Set[Long] = {
     statusStore.executionsList.map(_.executionId).toSet
   }
 
+  protected def statusStore: SQLAppStatusStore = spark.sharedState.statusStore
+
   /**
    * Get execution metrics for the SQL execution and verify metrics values.
    *
@@ -57,7 +54,6 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
     assert(executionIds.size == 1)
     val executionId = executionIds.head
 
-    val executionData = statusStore.execution(executionId).get
     val executedNode = statusStore.planGraph(executionId).nodes.head
 
     val metricsNames = Seq(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
index 7f66c64ed2aa7..12e93cbc8c77f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
@@ -75,13 +75,17 @@ class BatchEvalPythonExecSuite extends SparkPlanTest with SharedSQLContext {
     assert(qualifiedPlanNodes.size == 2)
   }
 
-  test("Python UDF: no push down on predicates starting from the first non-deterministic") {
+  test("Python UDF: push down on deterministic predicates after the first non-deterministic") {
     val df = Seq(("Hello", 4)).toDF("a", "b")
       .where("dummyPythonUDF(a) and rand() > 0.3 and b > 4")
+
     val qualifiedPlanNodes = df.queryExecution.executedPlan.collect {
-      case f @ FilterExec(And(_: And, _: GreaterThan), InputAdapter(_: BatchEvalPythonExec)) => f
+      case f @ FilterExec(
+          And(_: AttributeReference, _: GreaterThan),
+          InputAdapter(_: BatchEvalPythonExec)) => f
+      case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(_: FilterExec)) => b
     }
-    assert(qualifiedPlanNodes.size == 1)
+    assert(qualifiedPlanNodes.size == 2)
   }
 
   test("Python UDF refers to the attributes from more than one child") {
@@ -110,4 +114,5 @@ class MyDummyPythonUDF extends UserDefinedPythonFunction(
   name = "dummyUDF",
   func = new DummyUDF,
   dataType = BooleanType,
-  pythonEvalType = PythonEvalType.SQL_BATCHED_UDF)
+  pythonEvalType = PythonEvalType.SQL_BATCHED_UDF,
+  udfDeterministic = true)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
index 83018f95aa55d..12eaf63415081 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
@@ -92,7 +92,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
 
   test("deriveCompactInterval") {
     // latestCompactBatchId(4) + 1 <= default(5)
-    // then use latestestCompactBatchId + 1 === 5
+    // then use latestCompactBatchId + 1 === 5
     assert(5 === deriveCompactInterval(5, 4))
     // First divisor of 10 greater than 4 === 5
     assert(5 === deriveCompactInterval(4, 9))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala
new file mode 100644
index 0000000000000..00d4f0b8503d8
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkV2Suite.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.streaming.sources._
+import org.apache.spark.sql.streaming.{OutputMode, StreamTest}
+
+class MemorySinkV2Suite extends StreamTest with BeforeAndAfter {
+  test("data writer") {
+    val partition = 1234
+    val writer = new MemoryDataWriter(partition, OutputMode.Append())
+    writer.write(Row(1))
+    writer.write(Row(2))
+    writer.write(Row(44))
+    val msg = writer.commit()
+    assert(msg.data.map(_.getInt(0)) == Seq(1, 2, 44))
+    assert(msg.partition == partition)
+
+    // Buffer should be cleared, so repeated commits should give empty.
+    assert(writer.commit().data.isEmpty)
+  }
+
+  test("continuous writer") {
+    val sink = new MemorySinkV2
+    val writer = new ContinuousMemoryWriter(sink, OutputMode.Append())
+    writer.commit(0,
+      Array(
+        MemoryWriterCommitMessage(0, Seq(Row(1), Row(2))),
+        MemoryWriterCommitMessage(1, Seq(Row(3), Row(4))),
+        MemoryWriterCommitMessage(2, Seq(Row(6), Row(7)))
+      ))
+    assert(sink.latestBatchId.contains(0))
+    assert(sink.latestBatchData.map(_.getInt(0)).sorted == Seq(1, 2, 3, 4, 6, 7))
+    writer.commit(19,
+      Array(
+        MemoryWriterCommitMessage(3, Seq(Row(11), Row(22))),
+        MemoryWriterCommitMessage(0, Seq(Row(33)))
+      ))
+    assert(sink.latestBatchId.contains(19))
+    assert(sink.latestBatchData.map(_.getInt(0)).sorted == Seq(11, 22, 33))
+
+    assert(sink.allData.map(_.getInt(0)).sorted == Seq(1, 2, 3, 4, 6, 7, 11, 22, 33))
+  }
+
+  test("microbatch writer") {
+    val sink = new MemorySinkV2
+    new MemoryWriter(sink, 0, OutputMode.Append()).commit(
+      Array(
+        MemoryWriterCommitMessage(0, Seq(Row(1), Row(2))),
+        MemoryWriterCommitMessage(1, Seq(Row(3), Row(4))),
+        MemoryWriterCommitMessage(2, Seq(Row(6), Row(7)))
+      ))
+    assert(sink.latestBatchId.contains(0))
+    assert(sink.latestBatchData.map(_.getInt(0)).sorted == Seq(1, 2, 3, 4, 6, 7))
+    new MemoryWriter(sink, 19, OutputMode.Append()).commit(
+      Array(
+        MemoryWriterCommitMessage(3, Seq(Row(11), Row(22))),
+        MemoryWriterCommitMessage(0, Seq(Row(33)))
+      ))
+    assert(sink.latestBatchId.contains(19))
+    assert(sink.latestBatchData.map(_.getInt(0)).sorted == Seq(11, 22, 33))
+
+    assert(sink.allData.map(_.getInt(0)).sorted == Seq(1, 2, 3, 4, 6, 7, 11, 22, 33))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceV2Suite.scala
new file mode 100644
index 0000000000000..85085d43061bd
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceV2Suite.scala
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.util.Optional
+import java.util.concurrent.TimeUnit
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.execution.streaming.continuous._
+import org.apache.spark.sql.execution.streaming.sources.{RateStreamBatchTask, RateStreamMicroBatchReader, RateStreamSourceV2}
+import org.apache.spark.sql.sources.v2.DataSourceV2Options
+import org.apache.spark.sql.sources.v2.streaming.{ContinuousReadSupport, MicroBatchReadSupport}
+import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.util.ManualClock
+
+class RateSourceV2Suite extends StreamTest {
+  import testImplicits._
+
+  case class AdvanceRateManualClock(seconds: Long) extends AddData {
+    override def addData(query: Option[StreamExecution]): (BaseStreamingSource, Offset) = {
+      assert(query.nonEmpty)
+      val rateSource = query.get.logicalPlan.collect {
+        case StreamingExecutionRelation(source: RateStreamMicroBatchReader, _) => source
+      }.head
+      rateSource.clock.asInstanceOf[ManualClock].advance(TimeUnit.SECONDS.toMillis(seconds))
+      rateSource.setOffsetRange(Optional.empty(), Optional.empty())
+      (rateSource, rateSource.getEndOffset())
+    }
+  }
+
+  test("microbatch in registry") {
+    DataSource.lookupDataSource("ratev2", spark.sqlContext.conf).newInstance() match {
+      case ds: MicroBatchReadSupport =>
+        val reader = ds.createMicroBatchReader(Optional.empty(), "", DataSourceV2Options.empty())
+        assert(reader.isInstanceOf[RateStreamMicroBatchReader])
+      case _ =>
+        throw new IllegalStateException("Could not find v2 read support for rate")
+    }
+  }
+
+  test("basic microbatch execution") {
+    val input = spark.readStream
+      .format("rateV2")
+      .option("numPartitions", "1")
+      .option("rowsPerSecond", "10")
+      .option("useManualClock", "true")
+      .load()
+    testStream(input, useV2Sink = true)(
+      AdvanceRateManualClock(seconds = 1),
+      CheckLastBatch((0 until 10).map(v => new java.sql.Timestamp(v * 100L) -> v): _*),
+      StopStream,
+      StartStream(),
+      // Advance 2 seconds because creating a new RateSource will also create a new ManualClock
+      AdvanceRateManualClock(seconds = 2),
+      CheckLastBatch((10 until 20).map(v => new java.sql.Timestamp(v * 100L) -> v): _*)
+    )
+  }
+
+  test("microbatch - numPartitions propagated") {
+    val reader = new RateStreamMicroBatchReader(
+      new DataSourceV2Options(Map("numPartitions" -> "11", "rowsPerSecond" -> "33").asJava))
+    reader.setOffsetRange(Optional.empty(), Optional.empty())
+    val tasks = reader.createReadTasks()
+    assert(tasks.size == 11)
+  }
+
+  test("microbatch - set offset") {
+    val reader = new RateStreamMicroBatchReader(DataSourceV2Options.empty())
+    val startOffset = RateStreamOffset(Map((0, ValueRunTimeMsPair(0, 1000))))
+    val endOffset = RateStreamOffset(Map((0, ValueRunTimeMsPair(0, 2000))))
+    reader.setOffsetRange(Optional.of(startOffset), Optional.of(endOffset))
+    assert(reader.getStartOffset() == startOffset)
+    assert(reader.getEndOffset() == endOffset)
+  }
+
+  test("microbatch - infer offsets") {
+    val reader = new RateStreamMicroBatchReader(
+      new DataSourceV2Options(Map("numPartitions" -> "1", "rowsPerSecond" -> "100").asJava))
+    reader.clock.waitTillTime(reader.clock.getTimeMillis() + 100)
+    reader.setOffsetRange(Optional.empty(), Optional.empty())
+    reader.getStartOffset() match {
+      case r: RateStreamOffset =>
+        assert(r.partitionToValueAndRunTimeMs(0).runTimeMs == reader.creationTimeMs)
+      case _ => throw new IllegalStateException("unexpected offset type")
+    }
+    reader.getEndOffset() match {
+      case r: RateStreamOffset =>
+        // End offset may be a bit beyond 100 ms/9 rows after creation if the wait lasted
+        // longer than 100ms. It should never be early.
+        assert(r.partitionToValueAndRunTimeMs(0).value >= 9)
+        assert(r.partitionToValueAndRunTimeMs(0).runTimeMs >= reader.creationTimeMs + 100)
+
+      case _ => throw new IllegalStateException("unexpected offset type")
+    }
+  }
+
+  test("microbatch - predetermined batch size") {
+    val reader = new RateStreamMicroBatchReader(
+      new DataSourceV2Options(Map("numPartitions" -> "1", "rowsPerSecond" -> "20").asJava))
+    val startOffset = RateStreamOffset(Map((0, ValueRunTimeMsPair(0, 1000))))
+    val endOffset = RateStreamOffset(Map((0, ValueRunTimeMsPair(20, 2000))))
+    reader.setOffsetRange(Optional.of(startOffset), Optional.of(endOffset))
+    val tasks = reader.createReadTasks()
+    assert(tasks.size == 1)
+    assert(tasks.get(0).asInstanceOf[RateStreamBatchTask].vals.size == 20)
+  }
+
+  test("microbatch - data read") {
+    val reader = new RateStreamMicroBatchReader(
+      new DataSourceV2Options(Map("numPartitions" -> "11", "rowsPerSecond" -> "33").asJava))
+    val startOffset = RateStreamSourceV2.createInitialOffset(11, reader.creationTimeMs)
+    val endOffset = RateStreamOffset(startOffset.partitionToValueAndRunTimeMs.toSeq.map {
+      case (part, ValueRunTimeMsPair(currentVal, currentReadTime)) =>
+        (part, ValueRunTimeMsPair(currentVal + 33, currentReadTime + 1000))
+    }.toMap)
+
+    reader.setOffsetRange(Optional.of(startOffset), Optional.of(endOffset))
+    val tasks = reader.createReadTasks()
+    assert(tasks.size == 11)
+
+    val readData = tasks.asScala
+      .map(_.createDataReader())
+      .flatMap { reader =>
+        val buf = scala.collection.mutable.ListBuffer[Row]()
+        while (reader.next()) buf.append(reader.get())
+        buf
+      }
+
+    assert(readData.map(_.getLong(1)).sorted == Range(0, 33))
+  }
+
+  test("continuous in registry") {
+    DataSource.lookupDataSource("rate", spark.sqlContext.conf).newInstance() match {
+      case ds: ContinuousReadSupport =>
+        val reader = ds.createContinuousReader(Optional.empty(), "", DataSourceV2Options.empty())
+        assert(reader.isInstanceOf[RateStreamContinuousReader])
+      case _ =>
+        throw new IllegalStateException("Could not find v2 read support for rate")
+    }
+  }
+
+  test("continuous data") {
+    val reader = new RateStreamContinuousReader(
+      new DataSourceV2Options(Map("numPartitions" -> "2", "rowsPerSecond" -> "20").asJava))
+    reader.setOffset(Optional.empty())
+    val tasks = reader.createReadTasks()
+    assert(tasks.size == 2)
+
+    val data = scala.collection.mutable.ListBuffer[Row]()
+    tasks.asScala.foreach {
+      case t: RateStreamContinuousReadTask =>
+        val startTimeMs = reader.getStartOffset()
+          .asInstanceOf[RateStreamOffset]
+          .partitionToValueAndRunTimeMs(t.partitionIndex)
+          .runTimeMs
+        val r = t.createDataReader().asInstanceOf[RateStreamContinuousDataReader]
+        for (rowIndex <- 0 to 9) {
+          r.next()
+          data.append(r.get())
+          assert(r.getOffset() ==
+            RateStreamPartitionOffset(
+              t.partitionIndex,
+              t.partitionIndex + rowIndex * 2,
+              startTimeMs + (rowIndex + 1) * 100))
+        }
+        assert(System.currentTimeMillis() >= startTimeMs + 1000)
+
+      case _ => throw new IllegalStateException("Unexpected task type")
+    }
+
+    assert(data.map(_.getLong(1)).toSeq.sorted == Range(0, 20))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
similarity index 63%
rename from sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index eba8d55daad58..7d84f45d36bee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -36,14 +36,18 @@ import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.execution.{LeafExecNode, QueryExecution, SparkPlanInfo, SQLExecution}
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.status.ElementTrackingStore
 import org.apache.spark.status.config._
 import org.apache.spark.util.{AccumulatorMetadata, JsonProtocol, LongAccumulator}
 import org.apache.spark.util.kvstore.InMemoryStore
 
-class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTestUtils {
+
+class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTestUtils {
   import testImplicits._
 
-  override protected def sparkConf = super.sparkConf.set(LIVE_ENTITY_UPDATE_PERIOD, 0L)
+  override protected def sparkConf = {
+    super.sparkConf.set(LIVE_ENTITY_UPDATE_PERIOD, 0L).set(ASYNC_TRACKING_ENABLED, false)
+  }
 
   private def createTestDataFrame: DataFrame = {
     Seq(
@@ -58,21 +62,21 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
     properties
   }
 
-  private def createStageInfo(stageId: Int, attemptId: Int): StageInfo = new StageInfo(
-    stageId = stageId,
-    attemptId = attemptId,
-    // The following fields are not used in tests
-    name = "",
-    numTasks = 0,
-    rddInfos = Nil,
-    parentIds = Nil,
-    details = ""
-  )
+  private def createStageInfo(stageId: Int, attemptId: Int): StageInfo = {
+    new StageInfo(stageId = stageId,
+      attemptId = attemptId,
+      // The following fields are not used in tests
+      name = "",
+      numTasks = 0,
+      rddInfos = Nil,
+      parentIds = Nil,
+      details = "")
+  }
 
   private def createTaskInfo(
       taskId: Int,
       attemptNumber: Int,
-      accums: Map[Long, Long] = Map()): TaskInfo = {
+      accums: Map[Long, Long] = Map.empty): TaskInfo = {
     val info = new TaskInfo(
       taskId = taskId,
       attemptNumber = attemptNumber,
@@ -96,27 +100,37 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
     }.toSeq
   }
 
-  /** Return the shared SQL store from the active SparkSession. */
-  private def statusStore: SQLAppStatusStore =
-    new SQLAppStatusStore(spark.sparkContext.statusStore.store)
-
-  /**
-   * Runs a test with a temporary SQLAppStatusStore tied to a listener bus. Events can be sent to
-   * the listener bus to update the store, and all data will be cleaned up at the end of the test.
-   */
-  private def sqlStoreTest(name: String)
-      (fn: (SQLAppStatusStore, SparkListenerBus) => Unit): Unit = {
-    test(name) {
-      val store = new InMemoryStore()
-      val bus = new ReplayListenerBus()
-      val listener = new SQLAppStatusListener(sparkConf, store, true)
-      bus.addListener(listener)
-      val sqlStore = new SQLAppStatusStore(store, Some(listener))
-      fn(sqlStore, bus)
+  private def assertJobs(
+      exec: Option[SQLExecutionUIData],
+      running: Seq[Int] = Nil,
+      completed: Seq[Int] = Nil,
+      failed: Seq[Int] = Nil): Unit = {
+    val actualRunning = new ListBuffer[Int]()
+    val actualCompleted = new ListBuffer[Int]()
+    val actualFailed = new ListBuffer[Int]()
+
+    exec.get.jobs.foreach { case (jobId, jobStatus) =>
+      jobStatus match {
+        case JobExecutionStatus.RUNNING => actualRunning += jobId
+        case JobExecutionStatus.SUCCEEDED => actualCompleted += jobId
+        case JobExecutionStatus.FAILED => actualFailed += jobId
+        case _ => fail(s"Unexpected status $jobStatus")
+      }
     }
+
+    assert(actualRunning.sorted === running)
+    assert(actualCompleted.sorted === completed)
+    assert(actualFailed.sorted === failed)
+  }
+
+  private def createStatusStore(): SQLAppStatusStore = {
+    val conf = sparkContext.conf
+    val store = new ElementTrackingStore(new InMemoryStore, conf)
+    val listener = new SQLAppStatusListener(conf, store, live = true)
+    new SQLAppStatusStore(store, Some(listener))
   }
 
-  sqlStoreTest("basic") { (store, bus) =>
+  test("basic") {
     def checkAnswer(actual: Map[Long, String], expected: Map[Long, Long]): Unit = {
       assert(actual.size == expected.size)
       expected.foreach { case (id, value) =>
@@ -130,6 +144,9 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
       }
     }
 
+    val statusStore = createStatusStore()
+    val listener = statusStore.listener.get
+
     val executionId = 0
     val df = createTestDataFrame
     val accumulatorIds =
@@ -142,7 +159,7 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
       (id, accumulatorValue)
     }.toMap
 
-    bus.postToAll(SparkListenerSQLExecutionStart(
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(
       executionId,
       "test",
       "test",
@@ -150,7 +167,7 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
       SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
       System.currentTimeMillis()))
 
-    bus.postToAll(SparkListenerJobStart(
+    listener.onJobStart(SparkListenerJobStart(
       jobId = 0,
       time = System.currentTimeMillis(),
       stageInfos = Seq(
@@ -158,45 +175,45 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
         createStageInfo(1, 0)
       ),
       createProperties(executionId)))
-    bus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 0)))
+    listener.onStageSubmitted(SparkListenerStageSubmitted(createStageInfo(0, 0)))
 
-    assert(store.executionMetrics(0).isEmpty)
+    assert(statusStore.executionMetrics(executionId).isEmpty)
 
-    bus.postToAll(SparkListenerExecutorMetricsUpdate("", Seq(
+    listener.onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate("", Seq(
       // (task id, stage id, stage attempt, accum updates)
       (0L, 0, 0, createAccumulatorInfos(accumulatorUpdates)),
       (1L, 0, 0, createAccumulatorInfos(accumulatorUpdates))
     )))
 
-    checkAnswer(store.executionMetrics(0), accumulatorUpdates.mapValues(_ * 2))
+    checkAnswer(statusStore.executionMetrics(executionId), accumulatorUpdates.mapValues(_ * 2))
 
     // Driver accumulator updates don't belong to this execution should be filtered and no
     // exception will be thrown.
-    bus.postToAll(SparkListenerDriverAccumUpdates(0, Seq((999L, 2L))))
+    listener.onOtherEvent(SparkListenerDriverAccumUpdates(0, Seq((999L, 2L))))
 
-    checkAnswer(store.executionMetrics(0), accumulatorUpdates.mapValues(_ * 2))
+    checkAnswer(statusStore.executionMetrics(executionId), accumulatorUpdates.mapValues(_ * 2))
 
-    bus.postToAll(SparkListenerExecutorMetricsUpdate("", Seq(
+    listener.onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate("", Seq(
       // (task id, stage id, stage attempt, accum updates)
       (0L, 0, 0, createAccumulatorInfos(accumulatorUpdates)),
       (1L, 0, 0, createAccumulatorInfos(accumulatorUpdates.mapValues(_ * 2)))
     )))
 
-    checkAnswer(store.executionMetrics(0), accumulatorUpdates.mapValues(_ * 3))
+    checkAnswer(statusStore.executionMetrics(executionId), accumulatorUpdates.mapValues(_ * 3))
 
     // Retrying a stage should reset the metrics
-    bus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 1)))
+    listener.onStageSubmitted(SparkListenerStageSubmitted(createStageInfo(0, 1)))
 
-    bus.postToAll(SparkListenerExecutorMetricsUpdate("", Seq(
+    listener.onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate("", Seq(
       // (task id, stage id, stage attempt, accum updates)
       (0L, 0, 1, createAccumulatorInfos(accumulatorUpdates)),
       (1L, 0, 1, createAccumulatorInfos(accumulatorUpdates))
     )))
 
-    checkAnswer(store.executionMetrics(0), accumulatorUpdates.mapValues(_ * 2))
+    checkAnswer(statusStore.executionMetrics(executionId), accumulatorUpdates.mapValues(_ * 2))
 
     // Ignore the task end for the first attempt
-    bus.postToAll(SparkListenerTaskEnd(
+    listener.onTaskEnd(SparkListenerTaskEnd(
       stageId = 0,
       stageAttemptId = 0,
       taskType = "",
@@ -204,17 +221,17 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
       createTaskInfo(0, 0, accums = accumulatorUpdates.mapValues(_ * 100)),
       null))
 
-    checkAnswer(store.executionMetrics(0), accumulatorUpdates.mapValues(_ * 2))
+    checkAnswer(statusStore.executionMetrics(executionId), accumulatorUpdates.mapValues(_ * 2))
 
     // Finish two tasks
-    bus.postToAll(SparkListenerTaskEnd(
+    listener.onTaskEnd(SparkListenerTaskEnd(
       stageId = 0,
       stageAttemptId = 1,
       taskType = "",
       reason = null,
       createTaskInfo(0, 0, accums = accumulatorUpdates.mapValues(_ * 2)),
       null))
-    bus.postToAll(SparkListenerTaskEnd(
+    listener.onTaskEnd(SparkListenerTaskEnd(
       stageId = 0,
       stageAttemptId = 1,
       taskType = "",
@@ -222,28 +239,28 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
       createTaskInfo(1, 0, accums = accumulatorUpdates.mapValues(_ * 3)),
       null))
 
-    checkAnswer(store.executionMetrics(0), accumulatorUpdates.mapValues(_ * 5))
+    checkAnswer(statusStore.executionMetrics(executionId), accumulatorUpdates.mapValues(_ * 5))
 
     // Summit a new stage
-    bus.postToAll(SparkListenerStageSubmitted(createStageInfo(1, 0)))
+    listener.onStageSubmitted(SparkListenerStageSubmitted(createStageInfo(1, 0)))
 
-    bus.postToAll(SparkListenerExecutorMetricsUpdate("", Seq(
+    listener.onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate("", Seq(
       // (task id, stage id, stage attempt, accum updates)
       (0L, 1, 0, createAccumulatorInfos(accumulatorUpdates)),
       (1L, 1, 0, createAccumulatorInfos(accumulatorUpdates))
     )))
 
-    checkAnswer(store.executionMetrics(0), accumulatorUpdates.mapValues(_ * 7))
+    checkAnswer(statusStore.executionMetrics(executionId), accumulatorUpdates.mapValues(_ * 7))
 
     // Finish two tasks
-    bus.postToAll(SparkListenerTaskEnd(
+    listener.onTaskEnd(SparkListenerTaskEnd(
       stageId = 1,
       stageAttemptId = 0,
       taskType = "",
       reason = null,
       createTaskInfo(0, 0, accums = accumulatorUpdates.mapValues(_ * 3)),
       null))
-    bus.postToAll(SparkListenerTaskEnd(
+    listener.onTaskEnd(SparkListenerTaskEnd(
       stageId = 1,
       stageAttemptId = 0,
       taskType = "",
@@ -251,127 +268,180 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
       createTaskInfo(1, 0, accums = accumulatorUpdates.mapValues(_ * 3)),
       null))
 
-    checkAnswer(store.executionMetrics(0), accumulatorUpdates.mapValues(_ * 11))
+    checkAnswer(statusStore.executionMetrics(executionId), accumulatorUpdates.mapValues(_ * 11))
 
-    assertJobs(store.execution(0), running = Seq(0))
+    assertJobs(statusStore.execution(executionId), running = Seq(0))
 
-    bus.postToAll(SparkListenerJobEnd(
+    listener.onJobEnd(SparkListenerJobEnd(
       jobId = 0,
       time = System.currentTimeMillis(),
       JobSucceeded
     ))
-    bus.postToAll(SparkListenerSQLExecutionEnd(
+    listener.onOtherEvent(SparkListenerSQLExecutionEnd(
       executionId, System.currentTimeMillis()))
 
-    assertJobs(store.execution(0), completed = Seq(0))
-    checkAnswer(store.executionMetrics(0), accumulatorUpdates.mapValues(_ * 11))
+    assertJobs(statusStore.execution(executionId), completed = Seq(0))
+
+    checkAnswer(statusStore.executionMetrics(executionId), accumulatorUpdates.mapValues(_ * 11))
   }
 
-  sqlStoreTest("onExecutionEnd happens before onJobEnd(JobSucceeded)") { (store, bus) =>
+  test("onExecutionEnd happens before onJobEnd(JobSucceeded)") {
+    val statusStore = createStatusStore()
+    val listener = statusStore.listener.get
+
     val executionId = 0
     val df = createTestDataFrame
-    bus.postToAll(SparkListenerSQLExecutionStart(
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(
       executionId,
       "test",
       "test",
       df.queryExecution.toString,
       SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
       System.currentTimeMillis()))
-    bus.postToAll(SparkListenerJobStart(
+    listener.onJobStart(SparkListenerJobStart(
       jobId = 0,
       time = System.currentTimeMillis(),
       stageInfos = Nil,
       createProperties(executionId)))
-    bus.postToAll(SparkListenerSQLExecutionEnd(
+    listener.onOtherEvent(SparkListenerSQLExecutionEnd(
       executionId, System.currentTimeMillis()))
-    bus.postToAll(SparkListenerJobEnd(
+    listener.onJobEnd(SparkListenerJobEnd(
       jobId = 0,
       time = System.currentTimeMillis(),
       JobSucceeded
     ))
 
-    assertJobs(store.execution(0), completed = Seq(0))
+    assertJobs(statusStore.execution(executionId), completed = Seq(0))
   }
 
-  sqlStoreTest("onExecutionEnd happens before multiple onJobEnd(JobSucceeded)s") { (store, bus) =>
+  test("onExecutionEnd happens before multiple onJobEnd(JobSucceeded)s") {
+    val statusStore = createStatusStore()
+    val listener = statusStore.listener.get
+
     val executionId = 0
     val df = createTestDataFrame
-    bus.postToAll(SparkListenerSQLExecutionStart(
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(
       executionId,
       "test",
       "test",
       df.queryExecution.toString,
       SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
       System.currentTimeMillis()))
-    bus.postToAll(SparkListenerJobStart(
+    listener.onJobStart(SparkListenerJobStart(
       jobId = 0,
       time = System.currentTimeMillis(),
       stageInfos = Nil,
       createProperties(executionId)))
-    bus.postToAll(SparkListenerJobEnd(
+    listener.onJobEnd(SparkListenerJobEnd(
         jobId = 0,
         time = System.currentTimeMillis(),
         JobSucceeded
     ))
 
-    bus.postToAll(SparkListenerJobStart(
+    listener.onJobStart(SparkListenerJobStart(
       jobId = 1,
       time = System.currentTimeMillis(),
       stageInfos = Nil,
       createProperties(executionId)))
-    bus.postToAll(SparkListenerSQLExecutionEnd(
+    listener.onOtherEvent(SparkListenerSQLExecutionEnd(
       executionId, System.currentTimeMillis()))
-    bus.postToAll(SparkListenerJobEnd(
+    listener.onJobEnd(SparkListenerJobEnd(
       jobId = 1,
       time = System.currentTimeMillis(),
       JobSucceeded
     ))
 
-    assertJobs(store.execution(0), completed = Seq(0, 1))
+    assertJobs(statusStore.execution(executionId), completed = Seq(0, 1))
   }
 
-  sqlStoreTest("onExecutionEnd happens before onJobEnd(JobFailed)") { (store, bus) =>
+  test("onExecutionEnd happens before onJobEnd(JobFailed)") {
+    val statusStore = createStatusStore()
+    val listener = statusStore.listener.get
+
     val executionId = 0
     val df = createTestDataFrame
-    bus.postToAll(SparkListenerSQLExecutionStart(
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(
       executionId,
       "test",
       "test",
       df.queryExecution.toString,
       SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
       System.currentTimeMillis()))
-    bus.postToAll(SparkListenerJobStart(
+    listener.onJobStart(SparkListenerJobStart(
       jobId = 0,
       time = System.currentTimeMillis(),
       stageInfos = Seq.empty,
       createProperties(executionId)))
-    bus.postToAll(SparkListenerSQLExecutionEnd(
+    listener.onOtherEvent(SparkListenerSQLExecutionEnd(
       executionId, System.currentTimeMillis()))
-    bus.postToAll(SparkListenerJobEnd(
+    listener.onJobEnd(SparkListenerJobEnd(
       jobId = 0,
       time = System.currentTimeMillis(),
       JobFailed(new RuntimeException("Oops"))
     ))
 
-    assertJobs(store.execution(0), failed = Seq(0))
+    assertJobs(statusStore.execution(executionId), failed = Seq(0))
+  }
+
+  test("handle one execution with multiple jobs") {
+    val statusStore = createStatusStore()
+    val listener = statusStore.listener.get
+
+    val executionId = 0
+    val df = createTestDataFrame
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(
+      executionId,
+      "test",
+      "test",
+      df.queryExecution.toString,
+      SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+      System.currentTimeMillis()))
+
+    var stageId = 0
+    def twoStageJob(jobId: Int): Unit = {
+      val stages = Seq(stageId, stageId + 1).map { id => createStageInfo(id, 0)}
+      stageId += 2
+      listener.onJobStart(SparkListenerJobStart(
+        jobId = jobId,
+        time = System.currentTimeMillis(),
+        stageInfos = stages,
+        createProperties(executionId)))
+      stages.foreach { s =>
+        listener.onStageSubmitted(SparkListenerStageSubmitted(s))
+        listener.onStageCompleted(SparkListenerStageCompleted(s))
+      }
+      listener.onJobEnd(SparkListenerJobEnd(
+        jobId = jobId,
+        time = System.currentTimeMillis(),
+        JobSucceeded
+      ))
+    }
+    // submit two jobs with the same executionId
+    twoStageJob(0)
+    twoStageJob(1)
+    listener.onOtherEvent(SparkListenerSQLExecutionEnd(
+      executionId, System.currentTimeMillis()))
+
+    assertJobs(statusStore.execution(0), completed = 0 to 1)
+    assert(statusStore.execution(0).get.stages === (0 to 3).toSet)
   }
 
   test("SPARK-11126: no memory leak when running non SQL jobs") {
-    val previousStageNumber = statusStore.executionsList().size
+    val listener = spark.sharedState.statusStore.listener.get
+    // At the beginning of this test case, there should be no live data in the listener.
+    assert(listener.noLiveData())
     spark.sparkContext.parallelize(1 to 10).foreach(i => ())
     spark.sparkContext.listenerBus.waitUntilEmpty(10000)
-    // listener should ignore the non SQL stage
-    assert(statusStore.executionsList().size == previousStageNumber)
-
-    spark.sparkContext.parallelize(1 to 10).toDF().foreach(i => ())
-    spark.sparkContext.listenerBus.waitUntilEmpty(10000)
-    // listener should save the SQL stage
-    assert(statusStore.executionsList().size == previousStageNumber + 1)
+    // Listener should ignore the non-SQL stages, as the stage data are only removed when SQL
+    // execution ends, which will not be triggered for non-SQL jobs.
+    assert(listener.noLiveData())
   }
 
   test("driver side SQL metrics") {
+    val statusStore = spark.sharedState.statusStore
     val oldCount = statusStore.executionsList().size
-    val expectedAccumValue = 12345L
+
+    val expectedAccumValue = 12345
     val physicalPlan = MyPlan(sqlContext.sparkContext, expectedAccumValue)
     val dummyQueryExecution = new QueryExecution(spark, LocalRelation()) {
       override lazy val sparkPlan = physicalPlan
@@ -382,7 +452,8 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
       physicalPlan.execute().collect()
     }
 
-    while (statusStore.executionsList().size < oldCount) {
+    // Wait until the new execution is started and being tracked.
+    while (statusStore.executionsCount() < oldCount) {
       Thread.sleep(100)
     }
 
@@ -400,30 +471,6 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
     assert(metrics(driverMetric.id) === expectedValue)
   }
 
-  private def assertJobs(
-      exec: Option[SQLExecutionUIData],
-      running: Seq[Int] = Nil,
-      completed: Seq[Int] = Nil,
-      failed: Seq[Int] = Nil): Unit = {
-
-    val actualRunning = new ListBuffer[Int]()
-    val actualCompleted = new ListBuffer[Int]()
-    val actualFailed = new ListBuffer[Int]()
-
-    exec.get.jobs.foreach { case (jobId, jobStatus) =>
-      jobStatus match {
-        case JobExecutionStatus.RUNNING => actualRunning += jobId
-        case JobExecutionStatus.SUCCEEDED => actualCompleted += jobId
-        case JobExecutionStatus.FAILED => actualFailed += jobId
-        case _ => fail(s"Unexpected status $jobStatus")
-      }
-    }
-
-    assert(actualRunning.toSeq.sorted === running)
-    assert(actualCompleted.toSeq.sorted === completed)
-    assert(actualFailed.toSeq.sorted === failed)
-  }
-
   test("roundtripping SparkListenerDriverAccumUpdates through JsonProtocol (SPARK-18462)") {
     val event = SparkListenerDriverAccumUpdates(1L, Seq((2L, 3L)))
     val json = JsonProtocol.sparkEventToJson(event)
@@ -489,17 +536,17 @@ private case class MyPlan(sc: SparkContext, expectedValue: Long) extends LeafExe
 }
 
 
-class SQLListenerMemoryLeakSuite extends SparkFunSuite {
+class SQLAppStatusListenerMemoryLeakSuite extends SparkFunSuite {
 
-  // TODO: this feature is not yet available in SQLAppStatusStore.
-  ignore("no memory leak") {
-    quietly {
-      val conf = new SparkConf()
-        .setMaster("local")
-        .setAppName("test")
-        .set(config.MAX_TASK_FAILURES, 1) // Don't retry the tasks to run this test quickly
-        .set("spark.sql.ui.retainedExecutions", "50") // Set it to 50 to run this test quickly
-      withSpark(new SparkContext(conf)) { sc =>
+  test("no memory leak") {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test")
+      .set(config.MAX_TASK_FAILURES, 1) // Don't retry the tasks to run this test quickly
+      .set("spark.sql.ui.retainedExecutions", "50") // Set it to 50 to run this test quickly
+      .set(ASYNC_TRACKING_ENABLED, false)
+    withSpark(new SparkContext(conf)) { sc =>
+      quietly {
         val spark = new SparkSession(sc)
         import spark.implicits._
         // Run 100 successful executions and 100 failed executions.
@@ -517,9 +564,10 @@ class SQLListenerMemoryLeakSuite extends SparkFunSuite {
           }
         }
         sc.listenerBus.waitUntilEmpty(10000)
-
-        val statusStore = new SQLAppStatusStore(sc.statusStore.store)
-        assert(statusStore.executionsList().size <= 50)
+        val statusStore = spark.sharedState.statusStore
+        assert(statusStore.executionsCount() <= 50)
+        // No live data should be left behind after all executions end.
+        assert(statusStore.listener.get.noLiveData())
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
index 068a17bf772e1..7304803a092c0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
@@ -23,6 +23,7 @@ import org.apache.arrow.vector.complex._
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.execution.arrow.ArrowUtils
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.ArrowColumnVector
 import org.apache.spark.unsafe.types.UTF8String
 
 class ArrowColumnVectorSuite extends SparkFunSuite {
@@ -30,19 +31,17 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
   test("boolean") {
     val allocator = ArrowUtils.rootAllocator.newChildAllocator("boolean", 0, Long.MaxValue)
     val vector = ArrowUtils.toArrowField("boolean", BooleanType, nullable = true, null)
-      .createVector(allocator).asInstanceOf[NullableBitVector]
+      .createVector(allocator).asInstanceOf[BitVector]
     vector.allocateNew()
-    val mutator = vector.getMutator()
 
     (0 until 10).foreach { i =>
-      mutator.setSafe(i, if (i % 2 == 0) 1 else 0)
+      vector.setSafe(i, if (i % 2 == 0) 1 else 0)
     }
-    mutator.setNull(10)
-    mutator.setValueCount(11)
+    vector.setNull(10)
+    vector.setValueCount(11)
 
     val columnVector = new ArrowColumnVector(vector)
     assert(columnVector.dataType === BooleanType)
-    assert(columnVector.anyNullsSet)
     assert(columnVector.numNulls === 1)
 
     (0 until 10).foreach { i =>
@@ -59,19 +58,17 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
   test("byte") {
     val allocator = ArrowUtils.rootAllocator.newChildAllocator("byte", 0, Long.MaxValue)
     val vector = ArrowUtils.toArrowField("byte", ByteType, nullable = true, null)
-      .createVector(allocator).asInstanceOf[NullableTinyIntVector]
+      .createVector(allocator).asInstanceOf[TinyIntVector]
     vector.allocateNew()
-    val mutator = vector.getMutator()
 
     (0 until 10).foreach { i =>
-      mutator.setSafe(i, i.toByte)
+      vector.setSafe(i, i.toByte)
     }
-    mutator.setNull(10)
-    mutator.setValueCount(11)
+    vector.setNull(10)
+    vector.setValueCount(11)
 
     val columnVector = new ArrowColumnVector(vector)
     assert(columnVector.dataType === ByteType)
-    assert(columnVector.anyNullsSet)
     assert(columnVector.numNulls === 1)
 
     (0 until 10).foreach { i =>
@@ -88,19 +85,17 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
   test("short") {
     val allocator = ArrowUtils.rootAllocator.newChildAllocator("short", 0, Long.MaxValue)
     val vector = ArrowUtils.toArrowField("short", ShortType, nullable = true, null)
-      .createVector(allocator).asInstanceOf[NullableSmallIntVector]
+      .createVector(allocator).asInstanceOf[SmallIntVector]
     vector.allocateNew()
-    val mutator = vector.getMutator()
 
     (0 until 10).foreach { i =>
-      mutator.setSafe(i, i.toShort)
+      vector.setSafe(i, i.toShort)
     }
-    mutator.setNull(10)
-    mutator.setValueCount(11)
+    vector.setNull(10)
+    vector.setValueCount(11)
 
     val columnVector = new ArrowColumnVector(vector)
     assert(columnVector.dataType === ShortType)
-    assert(columnVector.anyNullsSet)
     assert(columnVector.numNulls === 1)
 
     (0 until 10).foreach { i =>
@@ -117,19 +112,17 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
   test("int") {
     val allocator = ArrowUtils.rootAllocator.newChildAllocator("int", 0, Long.MaxValue)
     val vector = ArrowUtils.toArrowField("int", IntegerType, nullable = true, null)
-      .createVector(allocator).asInstanceOf[NullableIntVector]
+      .createVector(allocator).asInstanceOf[IntVector]
     vector.allocateNew()
-    val mutator = vector.getMutator()
 
     (0 until 10).foreach { i =>
-      mutator.setSafe(i, i)
+      vector.setSafe(i, i)
     }
-    mutator.setNull(10)
-    mutator.setValueCount(11)
+    vector.setNull(10)
+    vector.setValueCount(11)
 
     val columnVector = new ArrowColumnVector(vector)
     assert(columnVector.dataType === IntegerType)
-    assert(columnVector.anyNullsSet)
     assert(columnVector.numNulls === 1)
 
     (0 until 10).foreach { i =>
@@ -146,19 +139,17 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
   test("long") {
     val allocator = ArrowUtils.rootAllocator.newChildAllocator("long", 0, Long.MaxValue)
     val vector = ArrowUtils.toArrowField("long", LongType, nullable = true, null)
-      .createVector(allocator).asInstanceOf[NullableBigIntVector]
+      .createVector(allocator).asInstanceOf[BigIntVector]
     vector.allocateNew()
-    val mutator = vector.getMutator()
 
     (0 until 10).foreach { i =>
-      mutator.setSafe(i, i.toLong)
+      vector.setSafe(i, i.toLong)
     }
-    mutator.setNull(10)
-    mutator.setValueCount(11)
+    vector.setNull(10)
+    vector.setValueCount(11)
 
     val columnVector = new ArrowColumnVector(vector)
     assert(columnVector.dataType === LongType)
-    assert(columnVector.anyNullsSet)
     assert(columnVector.numNulls === 1)
 
     (0 until 10).foreach { i =>
@@ -175,19 +166,17 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
   test("float") {
     val allocator = ArrowUtils.rootAllocator.newChildAllocator("float", 0, Long.MaxValue)
     val vector = ArrowUtils.toArrowField("float", FloatType, nullable = true, null)
-      .createVector(allocator).asInstanceOf[NullableFloat4Vector]
+      .createVector(allocator).asInstanceOf[Float4Vector]
     vector.allocateNew()
-    val mutator = vector.getMutator()
 
     (0 until 10).foreach { i =>
-      mutator.setSafe(i, i.toFloat)
+      vector.setSafe(i, i.toFloat)
     }
-    mutator.setNull(10)
-    mutator.setValueCount(11)
+    vector.setNull(10)
+    vector.setValueCount(11)
 
     val columnVector = new ArrowColumnVector(vector)
     assert(columnVector.dataType === FloatType)
-    assert(columnVector.anyNullsSet)
     assert(columnVector.numNulls === 1)
 
     (0 until 10).foreach { i =>
@@ -204,19 +193,17 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
   test("double") {
     val allocator = ArrowUtils.rootAllocator.newChildAllocator("double", 0, Long.MaxValue)
     val vector = ArrowUtils.toArrowField("double", DoubleType, nullable = true, null)
-      .createVector(allocator).asInstanceOf[NullableFloat8Vector]
+      .createVector(allocator).asInstanceOf[Float8Vector]
     vector.allocateNew()
-    val mutator = vector.getMutator()
 
     (0 until 10).foreach { i =>
-      mutator.setSafe(i, i.toDouble)
+      vector.setSafe(i, i.toDouble)
     }
-    mutator.setNull(10)
-    mutator.setValueCount(11)
+    vector.setNull(10)
+    vector.setValueCount(11)
 
     val columnVector = new ArrowColumnVector(vector)
     assert(columnVector.dataType === DoubleType)
-    assert(columnVector.anyNullsSet)
     assert(columnVector.numNulls === 1)
 
     (0 until 10).foreach { i =>
@@ -233,20 +220,18 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
   test("string") {
     val allocator = ArrowUtils.rootAllocator.newChildAllocator("string", 0, Long.MaxValue)
     val vector = ArrowUtils.toArrowField("string", StringType, nullable = true, null)
-      .createVector(allocator).asInstanceOf[NullableVarCharVector]
+      .createVector(allocator).asInstanceOf[VarCharVector]
     vector.allocateNew()
-    val mutator = vector.getMutator()
 
     (0 until 10).foreach { i =>
       val utf8 = s"str$i".getBytes("utf8")
-      mutator.setSafe(i, utf8, 0, utf8.length)
+      vector.setSafe(i, utf8, 0, utf8.length)
     }
-    mutator.setNull(10)
-    mutator.setValueCount(11)
+    vector.setNull(10)
+    vector.setValueCount(11)
 
     val columnVector = new ArrowColumnVector(vector)
     assert(columnVector.dataType === StringType)
-    assert(columnVector.anyNullsSet)
     assert(columnVector.numNulls === 1)
 
     (0 until 10).foreach { i =>
@@ -261,20 +246,18 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
   test("binary") {
     val allocator = ArrowUtils.rootAllocator.newChildAllocator("binary", 0, Long.MaxValue)
     val vector = ArrowUtils.toArrowField("binary", BinaryType, nullable = true, null)
-      .createVector(allocator).asInstanceOf[NullableVarBinaryVector]
+      .createVector(allocator).asInstanceOf[VarBinaryVector]
     vector.allocateNew()
-    val mutator = vector.getMutator()
 
     (0 until 10).foreach { i =>
       val utf8 = s"str$i".getBytes("utf8")
-      mutator.setSafe(i, utf8, 0, utf8.length)
+      vector.setSafe(i, utf8, 0, utf8.length)
     }
-    mutator.setNull(10)
-    mutator.setValueCount(11)
+    vector.setNull(10)
+    vector.setValueCount(11)
 
     val columnVector = new ArrowColumnVector(vector)
     assert(columnVector.dataType === BinaryType)
-    assert(columnVector.anyNullsSet)
     assert(columnVector.numNulls === 1)
 
     (0 until 10).foreach { i =>
@@ -291,35 +274,32 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
     val vector = ArrowUtils.toArrowField("array", ArrayType(IntegerType), nullable = true, null)
       .createVector(allocator).asInstanceOf[ListVector]
     vector.allocateNew()
-    val mutator = vector.getMutator()
-    val elementVector = vector.getDataVector().asInstanceOf[NullableIntVector]
-    val elementMutator = elementVector.getMutator()
+    val elementVector = vector.getDataVector().asInstanceOf[IntVector]
 
     // [1, 2]
-    mutator.startNewValue(0)
-    elementMutator.setSafe(0, 1)
-    elementMutator.setSafe(1, 2)
-    mutator.endValue(0, 2)
+    vector.startNewValue(0)
+    elementVector.setSafe(0, 1)
+    elementVector.setSafe(1, 2)
+    vector.endValue(0, 2)
 
     // [3, null, 5]
-    mutator.startNewValue(1)
-    elementMutator.setSafe(2, 3)
-    elementMutator.setNull(3)
-    elementMutator.setSafe(4, 5)
-    mutator.endValue(1, 3)
+    vector.startNewValue(1)
+    elementVector.setSafe(2, 3)
+    elementVector.setNull(3)
+    elementVector.setSafe(4, 5)
+    vector.endValue(1, 3)
 
     // null
 
     // []
-    mutator.startNewValue(3)
-    mutator.endValue(3, 0)
+    vector.startNewValue(3)
+    vector.endValue(3, 0)
 
-    elementMutator.setValueCount(5)
-    mutator.setValueCount(4)
+    elementVector.setValueCount(5)
+    vector.setValueCount(4)
 
     val columnVector = new ArrowColumnVector(vector)
     assert(columnVector.dataType === ArrayType(IntegerType))
-    assert(columnVector.anyNullsSet)
     assert(columnVector.numNulls === 1)
 
     val array0 = columnVector.getArray(0)
@@ -348,42 +328,38 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
     val vector = ArrowUtils.toArrowField("struct", schema, nullable = true, null)
       .createVector(allocator).asInstanceOf[NullableMapVector]
     vector.allocateNew()
-    val mutator = vector.getMutator()
-    val intVector = vector.getChildByOrdinal(0).asInstanceOf[NullableIntVector]
-    val intMutator = intVector.getMutator()
-    val longVector = vector.getChildByOrdinal(1).asInstanceOf[NullableBigIntVector]
-    val longMutator = longVector.getMutator()
+    val intVector = vector.getChildByOrdinal(0).asInstanceOf[IntVector]
+    val longVector = vector.getChildByOrdinal(1).asInstanceOf[BigIntVector]
 
     // (1, 1L)
-    mutator.setIndexDefined(0)
-    intMutator.setSafe(0, 1)
-    longMutator.setSafe(0, 1L)
+    vector.setIndexDefined(0)
+    intVector.setSafe(0, 1)
+    longVector.setSafe(0, 1L)
 
     // (2, null)
-    mutator.setIndexDefined(1)
-    intMutator.setSafe(1, 2)
-    longMutator.setNull(1)
+    vector.setIndexDefined(1)
+    intVector.setSafe(1, 2)
+    longVector.setNull(1)
 
     // (null, 3L)
-    mutator.setIndexDefined(2)
-    intMutator.setNull(2)
-    longMutator.setSafe(2, 3L)
+    vector.setIndexDefined(2)
+    intVector.setNull(2)
+    longVector.setSafe(2, 3L)
 
     // null
-    mutator.setNull(3)
+    vector.setNull(3)
 
     // (5, 5L)
-    mutator.setIndexDefined(4)
-    intMutator.setSafe(4, 5)
-    longMutator.setSafe(4, 5L)
+    vector.setIndexDefined(4)
+    intVector.setSafe(4, 5)
+    longVector.setSafe(4, 5L)
 
-    intMutator.setValueCount(5)
-    longMutator.setValueCount(5)
-    mutator.setValueCount(5)
+    intVector.setValueCount(5)
+    longVector.setValueCount(5)
+    vector.setValueCount(5)
 
     val columnVector = new ArrowColumnVector(vector)
     assert(columnVector.dataType === schema)
-    assert(columnVector.anyNullsSet)
     assert(columnVector.numNulls === 1)
 
     val row0 = columnVector.getStruct(0, 2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index 3c76ca79f5dda..944240f3bade5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -21,10 +21,10 @@ import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
-import org.apache.spark.sql.catalyst.util.ArrayData
 import org.apache.spark.sql.execution.columnar.ColumnAccessor
 import org.apache.spark.sql.execution.columnar.compression.ColumnBuilderHelper
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.ColumnarArray
 import org.apache.spark.unsafe.types.UTF8String
 
 class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
@@ -57,7 +57,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       testVector.appendBoolean(i % 2 == 0)
     }
 
-    val array = new ColumnarArray(testVector)
+    val array = new ColumnarArray(testVector, 0, 10)
 
     (0 until 10).foreach { i =>
       assert(array.get(i, BooleanType) === (i % 2 == 0))
@@ -69,7 +69,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       testVector.appendByte(i.toByte)
     }
 
-    val array = new ColumnarArray(testVector)
+    val array = new ColumnarArray(testVector, 0, 10)
 
     (0 until 10).foreach { i =>
       assert(array.get(i, ByteType) === i.toByte)
@@ -81,7 +81,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       testVector.appendShort(i.toShort)
     }
 
-    val array = new ColumnarArray(testVector)
+    val array = new ColumnarArray(testVector, 0, 10)
 
     (0 until 10).foreach { i =>
       assert(array.get(i, ShortType) === i.toShort)
@@ -93,7 +93,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       testVector.appendInt(i)
     }
 
-    val array = new ColumnarArray(testVector)
+    val array = new ColumnarArray(testVector, 0, 10)
 
     (0 until 10).foreach { i =>
       assert(array.get(i, IntegerType) === i)
@@ -105,7 +105,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       testVector.appendLong(i)
     }
 
-    val array = new ColumnarArray(testVector)
+    val array = new ColumnarArray(testVector, 0, 10)
 
     (0 until 10).foreach { i =>
       assert(array.get(i, LongType) === i)
@@ -117,7 +117,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       testVector.appendFloat(i.toFloat)
     }
 
-    val array = new ColumnarArray(testVector)
+    val array = new ColumnarArray(testVector, 0, 10)
 
     (0 until 10).foreach { i =>
       assert(array.get(i, FloatType) === i.toFloat)
@@ -129,7 +129,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       testVector.appendDouble(i.toDouble)
     }
 
-    val array = new ColumnarArray(testVector)
+    val array = new ColumnarArray(testVector, 0, 10)
 
     (0 until 10).foreach { i =>
       assert(array.get(i, DoubleType) === i.toDouble)
@@ -142,7 +142,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       testVector.appendByteArray(utf8, 0, utf8.length)
     }
 
-    val array = new ColumnarArray(testVector)
+    val array = new ColumnarArray(testVector, 0, 10)
 
     (0 until 10).foreach { i =>
       assert(array.get(i, StringType) === UTF8String.fromString(s"str$i"))
@@ -155,7 +155,7 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       testVector.appendByteArray(utf8, 0, utf8.length)
     }
 
-    val array = new ColumnarArray(testVector)
+    val array = new ColumnarArray(testVector, 0, 10)
 
     (0 until 10).foreach { i =>
       val utf8 = s"str$i".getBytes("utf8")
@@ -163,6 +163,18 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
   }
 
+  testVectors("mutable ColumnarRow", 10, IntegerType) { testVector =>
+    val mutableRow = new MutableColumnarRow(Array(testVector))
+    (0 until 10).foreach { i =>
+      mutableRow.rowId = i
+      mutableRow.setInt(0, 10 - i)
+    }
+    (0 until 10).foreach { i =>
+      mutableRow.rowId = i
+      assert(mutableRow.getInt(0) === (10 - i))
+    }
+  }
+
   val arrayType: ArrayType = ArrayType(IntegerType, containsNull = true)
   testVectors("array", 10, arrayType) { testVector =>
 
@@ -179,12 +191,10 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     testVector.putArray(2, 3, 0)
     testVector.putArray(3, 3, 3)
 
-    val array = new ColumnarArray(testVector)
-
-    assert(array.getArray(0).toIntArray() === Array(0))
-    assert(array.getArray(1).toIntArray() === Array(1, 2))
-    assert(array.getArray(2).toIntArray() === Array.empty[Int])
-    assert(array.getArray(3).toIntArray() === Array(3, 4, 5))
+    assert(testVector.getArray(0).toIntArray() === Array(0))
+    assert(testVector.getArray(1).toIntArray() === Array(1, 2))
+    assert(testVector.getArray(2).toIntArray() === Array.empty[Int])
+    assert(testVector.getArray(3).toIntArray() === Array(3, 4, 5))
   }
 
   val structType: StructType = new StructType().add("int", IntegerType).add("double", DoubleType)
@@ -196,12 +206,10 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     c1.putInt(1, 456)
     c2.putDouble(1, 5.67)
 
-    val array = new ColumnarArray(testVector)
-
-    assert(array.getStruct(0, structType.length).get(0, IntegerType) === 123)
-    assert(array.getStruct(0, structType.length).get(1, DoubleType) === 3.45)
-    assert(array.getStruct(1, structType.length).get(0, IntegerType) === 456)
-    assert(array.getStruct(1, structType.length).get(1, DoubleType) === 5.67)
+    assert(testVector.getStruct(0, structType.length).get(0, IntegerType) === 123)
+    assert(testVector.getStruct(0, structType.length).get(1, DoubleType) === 3.45)
+    assert(testVector.getStruct(1, structType.length).get(0, IntegerType) === 456)
+    assert(testVector.getStruct(1, structType.length).get(1, DoubleType) === 5.67)
   }
 
   test("[SPARK-22092] off-heap column vector reallocation corrupts array data") {
@@ -214,9 +222,8 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       testVector.reserve(16)
 
       // Check that none of the values got lost/overwritten.
-      val array = new ColumnarArray(testVector)
       (0 until 8).foreach { i =>
-        assert(array.get(i, arrayType).asInstanceOf[ArrayData].toIntArray() === Array(i))
+        assert(testVector.getArray(i).toIntArray() === Array(i))
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
index 705b26b8c91e6..38ea2e47fdef8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
@@ -23,11 +23,9 @@ import scala.util.Random
 
 import org.apache.spark.memory.MemoryMode
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
-import org.apache.spark.sql.execution.vectorized.ColumnVector
 import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
-import org.apache.spark.sql.execution.vectorized.WritableColumnVector
-import org.apache.spark.sql.types.{BinaryType, DataType, IntegerType}
+import org.apache.spark.sql.types.{ArrayType, BinaryType, IntegerType}
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.util.Benchmark
 import org.apache.spark.util.collection.BitSet
@@ -265,20 +263,22 @@ object ColumnarBatchBenchmark {
     }
 
     /*
-    Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz
-    Int Read/Write:              Avg Time(ms)    Avg Rate(M/s)  Relative Rate
-    -------------------------------------------------------------------------
-    Java Array                          248.8          1317.04         1.00 X
-    ByteBuffer Unsafe                   435.6           752.25         0.57 X
-    ByteBuffer API                     1752.0           187.03         0.14 X
-    DirectByteBuffer                    595.4           550.35         0.42 X
-    Unsafe Buffer                       235.2          1393.20         1.06 X
-    Column(on heap)                     189.8          1726.45         1.31 X
-    Column(off heap)                    408.4           802.35         0.61 X
-    Column(off heap direct)             237.6          1379.12         1.05 X
-    UnsafeRow (on heap)                 414.6           790.35         0.60 X
-    UnsafeRow (off heap)                487.2           672.58         0.51 X
-    Column On Heap Append               530.1           618.14         0.59 X
+    Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
+    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+
+    Int Read/Write:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Java Array                                     177 /  181       1856.4           0.5       1.0X
+    ByteBuffer Unsafe                              318 /  322       1032.0           1.0       0.6X
+    ByteBuffer API                                1411 / 1418        232.2           4.3       0.1X
+    DirectByteBuffer                               467 /  474        701.8           1.4       0.4X
+    Unsafe Buffer                                  178 /  185       1843.6           0.5       1.0X
+    Column(on heap)                                178 /  184       1840.8           0.5       1.0X
+    Column(off heap)                               341 /  344        961.8           1.0       0.5X
+    Column(off heap direct)                        178 /  184       1845.4           0.5       1.0X
+    UnsafeRow (on heap)                            378 /  389        866.3           1.2       0.5X
+    UnsafeRow (off heap)                           393 /  402        834.0           1.2       0.4X
+    Column On Heap Append                          309 /  318       1059.1           0.9       0.6X
     */
     val benchmark = new Benchmark("Int Read/Write", count * iters)
     benchmark.addCase("Java Array")(javaArray)
@@ -332,11 +332,13 @@ object ColumnarBatchBenchmark {
       }
     }}
     /*
-    Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz
-    Boolean Read/Write:          Avg Time(ms)    Avg Rate(M/s)  Relative Rate
-    -------------------------------------------------------------------------
-    Bitset                             895.88           374.54         1.00 X
-    Byte Array                         578.96           579.56         1.55 X
+    Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
+    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+
+    Boolean Read/Write:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Bitset                                         726 /  727        462.4           2.2       1.0X
+    Byte Array                                     530 /  542        632.7           1.6       1.4X
     */
     benchmark.run()
   }
@@ -387,10 +389,13 @@ object ColumnarBatchBenchmark {
     }
 
     /*
-    String Read/Write:                       Avg Time(ms)    Avg Rate(M/s)  Relative Rate
-    -------------------------------------------------------------------------------------
-    On Heap                                         457.0            35.85         1.00 X
-    Off Heap                                       1206.0            13.59         0.38 X
+    Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
+    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+
+    String Read/Write:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    On Heap                                        332 /  338         49.3          20.3       1.0X
+    Off Heap                                       466 /  467         35.2          28.4       0.7X
     */
     val benchmark = new Benchmark("String Read/Write", count * iters)
     benchmark.addCase("On Heap")(column(MemoryMode.ON_HEAP))
@@ -398,9 +403,94 @@ object ColumnarBatchBenchmark {
     benchmark.run
   }
 
+  def arrayAccess(iters: Int): Unit = {
+    val random = new Random(0)
+    val count = 4 * 1000
+
+    val onHeapVector = new OnHeapColumnVector(count, ArrayType(IntegerType))
+    val offHeapVector = new OffHeapColumnVector(count, ArrayType(IntegerType))
+
+    val minSize = 3
+    val maxSize = 32
+    var arraysCount = 0
+    var elementsCount = 0
+    while (arraysCount < count) {
+      val size = random.nextInt(maxSize - minSize) + minSize
+      val onHeapArrayData = onHeapVector.arrayData()
+      val offHeapArrayData = offHeapVector.arrayData()
+
+      var i = 0
+      while (i < size) {
+        val value = random.nextInt()
+        onHeapArrayData.appendInt(value)
+        offHeapArrayData.appendInt(value)
+        i += 1
+      }
+
+      onHeapVector.putArray(arraysCount, elementsCount, size)
+      offHeapVector.putArray(arraysCount, elementsCount, size)
+      elementsCount += size
+      arraysCount += 1
+    }
+
+    def readArrays(onHeap: Boolean): Unit = {
+      System.gc()
+      val vector = if (onHeap) onHeapVector else offHeapVector
+
+      var sum = 0L
+      for (_ <- 0 until iters) {
+        var i = 0
+        while (i < count) {
+          sum += vector.getArray(i).numElements()
+          i += 1
+        }
+      }
+    }
+
+    def readArrayElements(onHeap: Boolean): Unit = {
+      System.gc()
+      val vector = if (onHeap) onHeapVector else offHeapVector
+
+      var sum = 0L
+      for (_ <- 0 until iters) {
+        var i = 0
+        while (i < count) {
+          val array = vector.getArray(i)
+          val size = array.numElements()
+          var j = 0
+          while (j < size) {
+            sum += array.getInt(j)
+            j += 1
+          }
+          i += 1
+        }
+      }
+    }
+
+    val benchmark = new Benchmark("Array Vector Read", count * iters)
+    benchmark.addCase("On Heap Read Size Only") { _ => readArrays(true) }
+    benchmark.addCase("Off Heap Read Size Only") { _ => readArrays(false) }
+    benchmark.addCase("On Heap Read Elements") { _ => readArrayElements(true) }
+    benchmark.addCase("Off Heap Read Elements") { _ => readArrayElements(false) }
+
+    /*
+    Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
+    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+
+    Array Vector Read:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    On Heap Read Size Only                         415 /  422        394.7           2.5       1.0X
+    Off Heap Read Size Only                        394 /  402        415.9           2.4       1.1X
+    On Heap Read Elements                         2558 / 2593         64.0          15.6       0.2X
+    Off Heap Read Elements                        3316 / 3317         49.4          20.2       0.1X
+    */
+    benchmark.run
+  }
+
   def main(args: Array[String]): Unit = {
     intAccess(1024 * 40)
     booleanAccess(1024 * 40)
     stringAccess(1024 * 4)
+    arrayAccess(1024 * 40)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
index 80a50866aa504..675f06b31b970 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.util.Random
 
-import org.apache.arrow.vector.NullableIntVector
+import org.apache.arrow.vector.IntVector
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.memory.MemoryMode
@@ -33,6 +33,7 @@ import org.apache.spark.sql.{RandomDataGenerator, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.arrow.ArrowUtils
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -65,27 +66,22 @@ class ColumnarBatchSuite extends SparkFunSuite {
     column =>
       val reference = mutable.ArrayBuffer.empty[Boolean]
       var idx = 0
-      assert(!column.anyNullsSet())
       assert(column.numNulls() == 0)
 
       column.appendNotNull()
       reference += false
-      assert(!column.anyNullsSet())
       assert(column.numNulls() == 0)
 
       column.appendNotNulls(3)
       (1 to 3).foreach(_ => reference += false)
-      assert(!column.anyNullsSet())
       assert(column.numNulls() == 0)
 
       column.appendNull()
       reference += true
-      assert(column.anyNullsSet())
       assert(column.numNulls() == 1)
 
       column.appendNulls(3)
       (1 to 3).foreach(_ => reference += true)
-      assert(column.anyNullsSet())
       assert(column.numNulls() == 4)
 
       idx = column.elementsAppended
@@ -93,13 +89,11 @@ class ColumnarBatchSuite extends SparkFunSuite {
       column.putNotNull(idx)
       reference += false
       idx += 1
-      assert(column.anyNullsSet())
       assert(column.numNulls() == 4)
 
       column.putNull(idx)
       reference += true
       idx += 1
-      assert(column.anyNullsSet())
       assert(column.numNulls() == 5)
 
       column.putNulls(idx, 3)
@@ -107,7 +101,6 @@ class ColumnarBatchSuite extends SparkFunSuite {
       reference += true
       reference += true
       idx += 3
-      assert(column.anyNullsSet())
       assert(column.numNulls() == 8)
 
       column.putNotNulls(idx, 4)
@@ -116,7 +109,6 @@ class ColumnarBatchSuite extends SparkFunSuite {
       reference += false
       reference += false
       idx += 4
-      assert(column.anyNullsSet())
       assert(column.numNulls() == 8)
 
       reference.zipWithIndex.foreach { v =>
@@ -645,26 +637,26 @@ class ColumnarBatchSuite extends SparkFunSuite {
       column.putArray(2, 2, 0)
       column.putArray(3, 3, 3)
 
-      val a1 = ColumnVectorUtils.toPrimitiveJavaArray(column.getArray(0)).asInstanceOf[Array[Int]]
-      val a2 = ColumnVectorUtils.toPrimitiveJavaArray(column.getArray(1)).asInstanceOf[Array[Int]]
-      val a3 = ColumnVectorUtils.toPrimitiveJavaArray(column.getArray(2)).asInstanceOf[Array[Int]]
-      val a4 = ColumnVectorUtils.toPrimitiveJavaArray(column.getArray(3)).asInstanceOf[Array[Int]]
+      val a1 = ColumnVectorUtils.toJavaIntArray(column.getArray(0))
+      val a2 = ColumnVectorUtils.toJavaIntArray(column.getArray(1))
+      val a3 = ColumnVectorUtils.toJavaIntArray(column.getArray(2))
+      val a4 = ColumnVectorUtils.toJavaIntArray(column.getArray(3))
       assert(a1 === Array(0))
       assert(a2 === Array(1, 2))
       assert(a3 === Array.empty[Int])
       assert(a4 === Array(3, 4, 5))
 
       // Verify the ArrayData APIs
-      assert(column.getArray(0).length == 1)
+      assert(column.getArray(0).numElements() == 1)
       assert(column.getArray(0).getInt(0) == 0)
 
-      assert(column.getArray(1).length == 2)
+      assert(column.getArray(1).numElements() == 2)
       assert(column.getArray(1).getInt(0) == 1)
       assert(column.getArray(1).getInt(1) == 2)
 
-      assert(column.getArray(2).length == 0)
+      assert(column.getArray(2).numElements() == 0)
 
-      assert(column.getArray(3).length == 3)
+      assert(column.getArray(3).numElements() == 3)
       assert(column.getArray(3).getInt(0) == 3)
       assert(column.getArray(3).getInt(1) == 4)
       assert(column.getArray(3).getInt(2) == 5)
@@ -677,7 +669,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
       assert(data.capacity == array.length * 2)
       data.putInts(0, array.length, array, 0)
       column.putArray(0, 0, array.length)
-      assert(ColumnVectorUtils.toPrimitiveJavaArray(column.getArray(0)).asInstanceOf[Array[Int]]
+      assert(ColumnVectorUtils.toJavaIntArray(column.getArray(0))
         === array)
   }
 
@@ -751,11 +743,6 @@ class ColumnarBatchSuite extends SparkFunSuite {
       c2.putDouble(1, 5.67)
 
       val s = column.getStruct(0)
-      assert(s.columns()(0).getInt(0) == 123)
-      assert(s.columns()(0).getInt(1) == 456)
-      assert(s.columns()(1).getDouble(0) == 3.45)
-      assert(s.columns()(1).getDouble(1) == 5.67)
-
       assert(s.getInt(0) == 123)
       assert(s.getDouble(1) == 3.45)
 
@@ -932,10 +919,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
       assert(it.hasNext == false)
 
       // Reset and add 3 rows
-      batch.reset()
-      assert(batch.numRows() == 0)
-      assert(batch.rowIterator().hasNext == false)
-
+      columns.foreach(_.reset())
       // Add rows [NULL, 2.2, 2, "abc"], [3, NULL, 3, ""], [4, 4.4, 4, "world]
       columns(0).putNull(0)
       columns(1).putDouble(0, 2.2)
@@ -1129,29 +1113,6 @@ class ColumnarBatchSuite extends SparkFunSuite {
     testRandomRows(false, 30)
   }
 
-  test("mutable ColumnarBatch rows") {
-    val NUM_ITERS = 10
-    val types = Array(
-      BooleanType, FloatType, DoubleType, IntegerType, LongType, ShortType,
-      DecimalType.ShortDecimal, DecimalType.IntDecimal, DecimalType.ByteDecimal,
-      DecimalType.FloatDecimal, DecimalType.LongDecimal, new DecimalType(5, 2),
-      new DecimalType(12, 2), new DecimalType(30, 10))
-    for (i <- 0 to NUM_ITERS) {
-      val random = new Random(System.nanoTime())
-      val schema = RandomDataGenerator.randomSchema(random, numFields = 20, types)
-      val oldRow = RandomDataGenerator.randomRow(random, schema)
-      val newRow = RandomDataGenerator.randomRow(random, schema)
-
-      (MemoryMode.ON_HEAP :: MemoryMode.OFF_HEAP :: Nil).foreach { memMode =>
-        val batch = ColumnVectorUtils.toBatch(schema, memMode, (oldRow :: Nil).iterator.asJava)
-        val columnarBatchRow = batch.getRow(0)
-        newRow.toSeq.zipWithIndex.foreach(i => columnarBatchRow.update(i._2, i._1))
-        compareStruct(schema, columnarBatchRow, newRow, 0)
-        batch.close()
-      }
-    }
-  }
-
   test("exceeding maximum capacity should throw an error") {
     (MemoryMode.ON_HEAP :: MemoryMode.OFF_HEAP :: Nil).foreach { memMode =>
       val column = allocate(1, ByteType, memMode)
@@ -1174,22 +1135,20 @@ class ColumnarBatchSuite extends SparkFunSuite {
   test("create columnar batch from Arrow column vectors") {
     val allocator = ArrowUtils.rootAllocator.newChildAllocator("int", 0, Long.MaxValue)
     val vector1 = ArrowUtils.toArrowField("int1", IntegerType, nullable = true, null)
-      .createVector(allocator).asInstanceOf[NullableIntVector]
+      .createVector(allocator).asInstanceOf[IntVector]
     vector1.allocateNew()
-    val mutator1 = vector1.getMutator()
     val vector2 = ArrowUtils.toArrowField("int2", IntegerType, nullable = true, null)
-      .createVector(allocator).asInstanceOf[NullableIntVector]
+      .createVector(allocator).asInstanceOf[IntVector]
     vector2.allocateNew()
-    val mutator2 = vector2.getMutator()
 
     (0 until 10).foreach { i =>
-      mutator1.setSafe(i, i)
-      mutator2.setSafe(i + 1, i)
+      vector1.setSafe(i, i)
+      vector2.setSafe(i + 1, i)
     }
-    mutator1.setNull(10)
-    mutator1.setValueCount(11)
-    mutator2.setNull(0)
-    mutator2.setValueCount(11)
+    vector1.setNull(10)
+    vector1.setValueCount(11)
+    vector2.setNull(0)
+    vector2.setValueCount(11)
 
     val columnVectors = Seq(new ArrowColumnVector(vector1), new ArrowColumnVector(vector2))
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index f2f951703d08f..03038a6407e2b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -292,4 +292,34 @@ class SQLConfSuite extends QueryTest with SharedSQLContext {
 
     spark.sessionState.conf.clear()
   }
+
+  test("SPARK-22779: correctly compute default value for fallback configs") {
+    val fallback = SQLConf.buildConf("spark.sql.__test__.spark_22779")
+      .fallbackConf(SQLConf.PARQUET_COMPRESSION)
+
+    assert(spark.sessionState.conf.getConfString(fallback.key) ===
+      SQLConf.PARQUET_COMPRESSION.defaultValue.get)
+    assert(spark.sessionState.conf.getConfString(fallback.key, "lzo") === "lzo")
+
+    val displayValue = spark.sessionState.conf.getAllDefinedConfs
+      .find { case (key, _, _) => key == fallback.key }
+      .map { case (_, v, _) => v }
+      .get
+    assert(displayValue === fallback.defaultValueString)
+
+    spark.sessionState.conf.setConf(SQLConf.PARQUET_COMPRESSION, "gzip")
+    assert(spark.sessionState.conf.getConfString(fallback.key) === "gzip")
+
+    spark.sessionState.conf.setConf(fallback, "lzo")
+    assert(spark.sessionState.conf.getConfString(fallback.key) === "lzo")
+
+    val newDisplayValue = spark.sessionState.conf.getAllDefinedConfs
+      .find { case (key, _, _) => key == fallback.key }
+      .map { case (_, v, _) => v }
+      .get
+    assert(newDisplayValue === "lzo")
+
+    SQLConf.unregister(fallback)
+  }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 61571bccdcb51..cb2df0ac54f4c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -854,6 +854,22 @@ class JDBCSuite extends SparkFunSuite
     assert(derby.getTableExistsQuery(table) == defaultQuery)
   }
 
+  test("truncate table query by jdbc dialect") {
+    val MySQL = JdbcDialects.get("jdbc:mysql://127.0.0.1/db")
+    val Postgres = JdbcDialects.get("jdbc:postgresql://127.0.0.1/db")
+    val db2 = JdbcDialects.get("jdbc:db2://127.0.0.1/db")
+    val h2 = JdbcDialects.get(url)
+    val derby = JdbcDialects.get("jdbc:derby:db")
+    val table = "weblogs"
+    val defaultQuery = s"TRUNCATE TABLE $table"
+    val postgresQuery = s"TRUNCATE TABLE ONLY $table"
+    assert(MySQL.getTruncateQuery(table) == defaultQuery)
+    assert(Postgres.getTruncateQuery(table) == postgresQuery)
+    assert(db2.getTruncateQuery(table) == defaultQuery)
+    assert(h2.getTruncateQuery(table) == defaultQuery)
+    assert(derby.getTruncateQuery(table) == defaultQuery)
+  }
+
   test("Test DataFrame.where for Date and Timestamp") {
     // Regression test for bug SPARK-11788
     val timestamp = java.sql.Timestamp.valueOf("2001-02-20 11:22:33.543543");
@@ -1064,10 +1080,10 @@ class JDBCSuite extends SparkFunSuite
   }
 
   test("unsupported types") {
-    var e = intercept[SparkException] {
+    var e = intercept[SQLException] {
       spark.read.jdbc(urlWithUserAndPass, "TEST.TIMEZONE", new Properties()).collect()
     }.getMessage
-    assert(e.contains("java.lang.UnsupportedOperationException: unimplemented"))
+    assert(e.contains("Unsupported type TIMESTAMP_WITH_TIMEZONE"))
     e = intercept[SQLException] {
       spark.read.jdbc(urlWithUserAndPass, "TEST.ARRAY", new Properties()).collect()
     }.getMessage
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index ab18905e2ddb2..fb61fa716b946 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -602,6 +602,37 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
     )
   }
 
+  test("SPARK-22042 ReorderJoinPredicates can break when child's partitioning is not decided") {
+    withTable("bucketed_table", "table1", "table2") {
+      df.write.format("parquet").saveAsTable("table1")
+      df.write.format("parquet").saveAsTable("table2")
+      df.write.format("parquet").bucketBy(8, "j", "k").saveAsTable("bucketed_table")
+
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "0") {
+        checkAnswer(
+          sql("""
+                |SELECT ab.i, ab.j, ab.k, c.i, c.j, c.k
+                |FROM (
+                |  SELECT a.i, a.j, a.k
+                |  FROM bucketed_table a
+                |  JOIN table1 b
+                |  ON a.i = b.i
+                |) ab
+                |JOIN table2 c
+                |ON ab.i = c.i
+              """.stripMargin),
+          sql("""
+                |SELECT a.i, a.j, a.k, c.i, c.j, c.k
+                |FROM bucketed_table a
+                |JOIN table1 b
+                |ON a.i = b.i
+                |JOIN table2 c
+                |ON a.i = c.i
+              """.stripMargin))
+      }
+    }
+  }
+
   test("error if there exists any malformed bucket files") {
     withTable("bucketed_table") {
       df1.write.format("parquet").bucketBy(8, "i").saveAsTable("bucketed_table")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala
index 3ce6ae3c52927..f22d843bfabde 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala
@@ -53,13 +53,6 @@ class DDLSourceLoadSuite extends DataSourceTest with SharedSQLContext {
     assert(spark.read.format("org.apache.spark.sql.sources.FakeSourceOne")
       .load().schema == StructType(Seq(StructField("stringType", StringType, nullable = false))))
   }
-
-  test("should fail to load ORC without Hive Support") {
-    val e = intercept[AnalysisException] {
-      spark.read.format("orc").load()
-    }
-    assert(e.message.contains("The ORC data source must be used with Hive support enabled"))
-  }
 }
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 8b7e2e5f45946..fef01c860db6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -21,6 +21,8 @@ import java.io.File
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.util.Utils
 
@@ -442,4 +444,80 @@ class InsertSuite extends DataSourceTest with SharedSQLContext {
       assert(e.contains("Only Data Sources providing FileFormat are supported"))
     }
   }
+
+  test("SPARK-20236: dynamic partition overwrite without catalog table") {
+    withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.DYNAMIC.toString) {
+      withTempPath { path =>
+        Seq((1, 1, 1)).toDF("i", "part1", "part2")
+          .write.partitionBy("part1", "part2").parquet(path.getAbsolutePath)
+        checkAnswer(spark.read.parquet(path.getAbsolutePath), Row(1, 1, 1))
+
+        Seq((2, 1, 1)).toDF("i", "part1", "part2")
+          .write.partitionBy("part1", "part2").mode("overwrite").parquet(path.getAbsolutePath)
+        checkAnswer(spark.read.parquet(path.getAbsolutePath), Row(2, 1, 1))
+
+        Seq((2, 2, 2)).toDF("i", "part1", "part2")
+          .write.partitionBy("part1", "part2").mode("overwrite").parquet(path.getAbsolutePath)
+        checkAnswer(spark.read.parquet(path.getAbsolutePath), Row(2, 1, 1) :: Row(2, 2, 2) :: Nil)
+      }
+    }
+  }
+
+  test("SPARK-20236: dynamic partition overwrite") {
+    withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.DYNAMIC.toString) {
+      withTable("t") {
+        sql(
+          """
+            |create table t(i int, part1 int, part2 int) using parquet
+            |partitioned by (part1, part2)
+          """.stripMargin)
+
+        sql("insert into t partition(part1=1, part2=1) select 1")
+        checkAnswer(spark.table("t"), Row(1, 1, 1))
+
+        sql("insert overwrite table t partition(part1=1, part2=1) select 2")
+        checkAnswer(spark.table("t"), Row(2, 1, 1))
+
+        sql("insert overwrite table t partition(part1=2, part2) select 2, 2")
+        checkAnswer(spark.table("t"), Row(2, 1, 1) :: Row(2, 2, 2) :: Nil)
+
+        sql("insert overwrite table t partition(part1=1, part2=2) select 3")
+        checkAnswer(spark.table("t"), Row(2, 1, 1) :: Row(2, 2, 2) :: Row(3, 1, 2) :: Nil)
+
+        sql("insert overwrite table t partition(part1=1, part2) select 4, 1")
+        checkAnswer(spark.table("t"), Row(4, 1, 1) :: Row(2, 2, 2) :: Row(3, 1, 2) :: Nil)
+      }
+    }
+  }
+
+  test("SPARK-20236: dynamic partition overwrite with customer partition path") {
+    withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.DYNAMIC.toString) {
+      withTable("t") {
+        sql(
+          """
+            |create table t(i int, part1 int, part2 int) using parquet
+            |partitioned by (part1, part2)
+          """.stripMargin)
+
+        val path1 = Utils.createTempDir()
+        sql(s"alter table t add partition(part1=1, part2=1) location '$path1'")
+        sql(s"insert into t partition(part1=1, part2=1) select 1")
+        checkAnswer(spark.table("t"), Row(1, 1, 1))
+
+        sql("insert overwrite table t partition(part1=1, part2=1) select 2")
+        checkAnswer(spark.table("t"), Row(2, 1, 1))
+
+        sql("insert overwrite table t partition(part1=2, part2) select 2, 2")
+        checkAnswer(spark.table("t"), Row(2, 1, 1) :: Row(2, 2, 2) :: Nil)
+
+        val path2 = Utils.createTempDir()
+        sql(s"alter table t add partition(part1=1, part2=2) location '$path2'")
+        sql("insert overwrite table t partition(part1=1, part2=2) select 3")
+        checkAnswer(spark.table("t"), Row(2, 1, 1) :: Row(2, 2, 2) :: Row(3, 1, 2) :: Nil)
+
+        sql("insert overwrite table t partition(part1=1, part2) select 4, 1")
+        checkAnswer(spark.table("t"), Row(4, 1, 1) :: Row(2, 2, 2) :: Row(3, 1, 2) :: Nil)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/fakeExternalSources.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/fakeExternalSources.scala
index bf43de597a7a0..2cb48281b30af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/fakeExternalSources.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/fakeExternalSources.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationP
 import org.apache.spark.sql.types._
 
 
-// Note that the package name is intendedly mismatched in order to resemble external data sources
+// Note that the package name is intentionally mismatched in order to resemble external data sources
 // and test the detection for them.
 class FakeExternalSourceOne extends RelationProvider with DataSourceRegister {
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2OptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2OptionsSuite.scala
index 933f4075bcc8a..90d92864b26fa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2OptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2OptionsSuite.scala
@@ -37,4 +37,46 @@ class DataSourceV2OptionsSuite extends SparkFunSuite {
     val options = new DataSourceV2Options(Map("foo" -> "bAr").asJava)
     assert(options.get("foo").get == "bAr")
   }
+
+  test("getInt") {
+    val options = new DataSourceV2Options(Map("numFOo" -> "1", "foo" -> "bar").asJava)
+    assert(options.getInt("numFOO", 10) == 1)
+    assert(options.getInt("numFOO2", 10) == 10)
+
+    intercept[NumberFormatException]{
+      options.getInt("foo", 1)
+    }
+  }
+
+  test("getBoolean") {
+    val options = new DataSourceV2Options(
+      Map("isFoo" -> "true", "isFOO2" -> "false", "foo" -> "bar").asJava)
+    assert(options.getBoolean("isFoo", false))
+    assert(!options.getBoolean("isFoo2", true))
+    assert(options.getBoolean("isBar", true))
+    assert(!options.getBoolean("isBar", false))
+    assert(!options.getBoolean("FOO", true))
+  }
+
+  test("getLong") {
+    val options = new DataSourceV2Options(Map("numFoo" -> "9223372036854775807",
+      "foo" -> "bar").asJava)
+    assert(options.getLong("numFOO", 0L) == 9223372036854775807L)
+    assert(options.getLong("numFoo2", -1L) == -1L)
+
+    intercept[NumberFormatException]{
+      options.getLong("foo", 0L)
+    }
+  }
+
+  test("getDouble") {
+    val options = new DataSourceV2Options(Map("numFoo" -> "922337.1",
+      "foo" -> "bar").asJava)
+    assert(options.getDouble("numFOO", 0d) == 922337.1d)
+    assert(options.getDouble("numFoo2", -1.02d) == -1.02d)
+
+    intercept[NumberFormatException]{
+      options.getDouble("foo", 0.1d)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala
new file mode 100644
index 0000000000000..4911e3225552d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources.v2
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
+import org.apache.spark.sql.internal.SQLConf
+
+class DataSourceV2UtilsSuite extends SparkFunSuite {
+
+  private val keyPrefix = new DataSourceV2WithSessionConfig().keyPrefix
+
+  test("method withSessionConfig() should propagate session configs correctly") {
+    // Only match configs with keys start with "spark.datasource.${keyPrefix}".
+    val conf = new SQLConf
+    conf.setConfString(s"spark.datasource.$keyPrefix.foo.bar", "false")
+    conf.setConfString(s"spark.datasource.$keyPrefix.whateverConfigName", "123")
+    conf.setConfString(s"spark.sql.$keyPrefix.config.name", "false")
+    conf.setConfString("spark.datasource.another.config.name", "123")
+    conf.setConfString(s"spark.datasource.$keyPrefix.", "123")
+    val cs = classOf[DataSourceV2WithSessionConfig].newInstance()
+    val confs = DataSourceV2Utils.extractSessionConfigs(cs.asInstanceOf[DataSourceV2], conf)
+    assert(confs.size == 2)
+    assert(confs.keySet.filter(_.startsWith("spark.datasource")).size == 0)
+    assert(confs.keySet.filter(_.startsWith("not.exist.prefix")).size == 0)
+    assert(confs.keySet.contains("foo.bar"))
+    assert(confs.keySet.contains("whateverConfigName"))
+  }
+}
+
+class DataSourceV2WithSessionConfig extends SimpleDataSourceV2 with SessionConfigSupport {
+
+  override def keyPrefix: String = "userDefinedDataSource"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index 47bc452bda0d4..d6bef9ce07379 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -260,8 +260,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
       CheckLastBatch((10, 5)),
       StopStream,
       AssertOnQuery { q => // purge commit and clear the sink
-        val commit = q.batchCommitLog.getLatest().map(_._1).getOrElse(-1L) + 1L
-        q.batchCommitLog.purge(commit)
+        val commit = q.commitLog.getLatest().map(_._1).getOrElse(-1L) + 1L
+        q.commitLog.purge(commit)
         q.sink.asInstanceOf[MemorySink].clear()
         true
       },
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 08db06b94904b..8c4e1fd00b0a2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -81,7 +81,7 @@ class FileStreamSinkSuite extends StreamTest {
       .start(outputDir)
 
     try {
-      // The output is partitoned by "value", so the value will appear in the file path.
+      // The output is partitioned by "value", so the value will appear in the file path.
       // This is to test if we handle spaces in the path correctly.
       inputData.addData("hello world")
       failAfter(streamingTimeout) {
@@ -305,6 +305,10 @@ class FileStreamSinkSuite extends StreamTest {
     testFormat(Some("parquet"))
   }
 
+  test("orc") {
+    testFormat(Some("orc"))
+  }
+
   test("text") {
     testFormat(Some("text"))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index b6baaed1927e4..39bb572740617 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -86,6 +86,28 @@ abstract class FileStreamSourceTest
     }
   }
 
+  case class AddOrcFileData(data: DataFrame, src: File, tmp: File) extends AddFileData {
+    override def addData(source: FileStreamSource): Unit = {
+      AddOrcFileData.writeToFile(data, src, tmp)
+    }
+  }
+
+  object AddOrcFileData {
+    def apply(seq: Seq[String], src: File, tmp: File): AddOrcFileData = {
+      AddOrcFileData(seq.toDS().toDF(), src, tmp)
+    }
+
+    /** Write orc files in a temp dir, and move the individual files to the 'src' dir */
+    def writeToFile(df: DataFrame, src: File, tmp: File): Unit = {
+      val tmpDir = Utils.tempFileWith(new File(tmp, "orc"))
+      df.write.orc(tmpDir.getCanonicalPath)
+      src.mkdirs()
+      tmpDir.listFiles().foreach { f =>
+        f.renameTo(new File(src, s"${f.getName}"))
+      }
+    }
+  }
+
   case class AddParquetFileData(data: DataFrame, src: File, tmp: File) extends AddFileData {
     override def addData(source: FileStreamSource): Unit = {
       AddParquetFileData.writeToFile(data, src, tmp)
@@ -248,6 +270,42 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     }
   }
 
+  // =============== ORC file stream schema tests ================
+
+  test("FileStreamSource schema: orc, existing files, no schema") {
+    withTempDir { src =>
+      Seq("a", "b", "c").toDS().as("userColumn").toDF().write
+        .mode(org.apache.spark.sql.SaveMode.Overwrite)
+        .orc(src.getCanonicalPath)
+
+      // Without schema inference, should throw error
+      withSQLConf(SQLConf.STREAMING_SCHEMA_INFERENCE.key -> "false") {
+        intercept[IllegalArgumentException] {
+          createFileStreamSourceAndGetSchema(
+            format = Some("orc"), path = Some(src.getCanonicalPath), schema = None)
+        }
+      }
+
+      // With schema inference, should infer correct schema
+      withSQLConf(SQLConf.STREAMING_SCHEMA_INFERENCE.key -> "true") {
+        val schema = createFileStreamSourceAndGetSchema(
+          format = Some("orc"), path = Some(src.getCanonicalPath), schema = None)
+        assert(schema === new StructType().add("value", StringType))
+      }
+    }
+  }
+
+  test("FileStreamSource schema: orc, existing files, schema") {
+    withTempPath { src =>
+      Seq("a", "b", "c").toDS().as("oldUserColumn").toDF()
+        .write.orc(new File(src, "1").getCanonicalPath)
+      val userSchema = new StructType().add("userColumn", StringType)
+      val schema = createFileStreamSourceAndGetSchema(
+        format = Some("orc"), path = Some(src.getCanonicalPath), schema = Some(userSchema))
+      assert(schema === userSchema)
+    }
+  }
+
   // =============== Parquet file stream schema tests ================
 
   test("FileStreamSource schema: parquet, existing files, no schema") {
@@ -507,6 +565,59 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     }
   }
 
+  // =============== ORC file stream tests ================
+
+  test("read from orc files") {
+    withTempDirs { case (src, tmp) =>
+      val fileStream = createFileStream("orc", src.getCanonicalPath, Some(valueSchema))
+      val filtered = fileStream.filter($"value" contains "keep")
+
+      testStream(filtered)(
+        AddOrcFileData(Seq("drop1", "keep2", "keep3"), src, tmp),
+        CheckAnswer("keep2", "keep3"),
+        StopStream,
+        AddOrcFileData(Seq("drop4", "keep5", "keep6"), src, tmp),
+        StartStream(),
+        CheckAnswer("keep2", "keep3", "keep5", "keep6"),
+        AddOrcFileData(Seq("drop7", "keep8", "keep9"), src, tmp),
+        CheckAnswer("keep2", "keep3", "keep5", "keep6", "keep8", "keep9")
+      )
+    }
+  }
+
+  test("read from orc files with changing schema") {
+    withTempDirs { case (src, tmp) =>
+      withSQLConf(SQLConf.STREAMING_SCHEMA_INFERENCE.key -> "true") {
+
+        // Add a file so that we can infer its schema
+        AddOrcFileData.writeToFile(Seq("value0").toDF("k"), src, tmp)
+
+        val fileStream = createFileStream("orc", src.getCanonicalPath)
+
+        // FileStreamSource should infer the column "k"
+        assert(fileStream.schema === StructType(Seq(StructField("k", StringType))))
+
+        // After creating DF and before starting stream, add data with different schema
+        // Should not affect the inferred schema any more
+        AddOrcFileData.writeToFile(Seq(("value1", 0)).toDF("k", "v"), src, tmp)
+
+        testStream(fileStream)(
+          // Should not pick up column v in the file added before start
+          AddOrcFileData(Seq("value2").toDF("k"), src, tmp),
+          CheckAnswer("value0", "value1", "value2"),
+
+          // Should read data in column k, and ignore v
+          AddOrcFileData(Seq(("value3", 1)).toDF("k", "v"), src, tmp),
+          CheckAnswer("value0", "value1", "value2", "value3"),
+
+          // Should ignore rows that do not have the necessary k column
+          AddOrcFileData(Seq("value5").toDF("v"), src, tmp),
+          CheckAnswer("value0", "value1", "value2", "value3", null)
+        )
+      }
+    }
+  }
+
   // =============== Parquet file stream tests ================
 
   test("read from parquet files") {
@@ -1023,7 +1134,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
         expectedCompactInterval: Int): Boolean = {
       import CompactibleFileStreamLog._
 
-      val fileSource = (execution invokePrivate _sources()).head.asInstanceOf[FileStreamSource]
+      val fileSource = getSourcesFromStreamingQuery(execution).head
       val metadataLog = fileSource invokePrivate _metadataLog()
 
       if (isCompactionBatch(batchId, expectedCompactInterval)) {
@@ -1099,8 +1210,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
           CheckAnswer("keep1", "keep2", "keep3"),
           AssertOnQuery("check getBatch") { execution: StreamExecution =>
             val _sources = PrivateMethod[Seq[Source]]('sources)
-            val fileSource =
-              (execution invokePrivate _sources()).head.asInstanceOf[FileStreamSource]
+            val fileSource = getSourcesFromStreamingQuery(execution).head
 
             def verify(startId: Option[Int], endId: Int, expected: String*): Unit = {
               val start = startId.map(new FileStreamSourceOffset(_))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index b906393a379ae..de2b51678cea6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -360,13 +360,13 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest
     }
   }
 
-  // Values used for testing InputProcessor
+  // Values used for testing StateStoreUpdater
   val currentBatchTimestamp = 1000
   val currentBatchWatermark = 1000
   val beforeTimeoutThreshold = 999
   val afterTimeoutThreshold = 1001
 
-  // Tests for InputProcessor.processNewData() when timeout = NoTimeout
+  // Tests for StateStoreUpdater.updateStateForKeysWithData() when timeout = NoTimeout
   for (priorState <- Seq(None, Some(0))) {
     val priorStateStr = if (priorState.nonEmpty) "prior state set" else "no prior state"
     val testName = s"NoTimeout - $priorStateStr - "
@@ -397,7 +397,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest
       expectedState = None)        // should be removed
   }
 
-  // Tests for InputProcessor.processTimedOutState() when timeout != NoTimeout
+  // Tests for StateStoreUpdater.updateStateForKeysWithData() when timeout != NoTimeout
   for (priorState <- Seq(None, Some(0))) {
     for (priorTimeoutTimestamp <- Seq(NO_TIMESTAMP, 1000)) {
       var testName = ""
@@ -444,18 +444,6 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest
           expectedState = None)                                 // state should be removed
       }
 
-      // Tests with ProcessingTimeTimeout
-      if (priorState == None) {
-        testStateUpdateWithData(
-          s"ProcessingTimeTimeout - $testName - timeout updated without initializing state",
-          stateUpdates = state => { state.setTimeoutDuration(5000) },
-          timeoutConf = ProcessingTimeTimeout,
-          priorState = None,
-          priorTimeoutTimestamp = priorTimeoutTimestamp,
-          expectedState = None,
-          expectedTimeoutTimestamp = currentBatchTimestamp + 5000)
-      }
-
       testStateUpdateWithData(
         s"ProcessingTimeTimeout - $testName - state and timeout duration updated",
         stateUpdates =
@@ -466,36 +454,10 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest
         expectedState = Some(5),                                 // state should change
         expectedTimeoutTimestamp = currentBatchTimestamp + 5000) // timestamp should change
 
-      testStateUpdateWithData(
-        s"ProcessingTimeTimeout - $testName - timeout updated after state removed",
-        stateUpdates = state => { state.remove(); state.setTimeoutDuration(5000) },
-        timeoutConf = ProcessingTimeTimeout,
-        priorState = priorState,
-        priorTimeoutTimestamp = priorTimeoutTimestamp,
-        expectedState = None,
-        expectedTimeoutTimestamp = currentBatchTimestamp + 5000)
-
-      // Tests with EventTimeTimeout
-
-      if (priorState == None) {
-        testStateUpdateWithData(
-          s"EventTimeTimeout - $testName - setting timeout without init state not allowed",
-          stateUpdates = state => {
-            state.setTimeoutTimestamp(10000)
-          },
-          timeoutConf = EventTimeTimeout,
-          priorState = None,
-          priorTimeoutTimestamp = priorTimeoutTimestamp,
-          expectedState = None,
-          expectedTimeoutTimestamp = 10000)
-      }
-
       testStateUpdateWithData(
         s"EventTimeTimeout - $testName - state and timeout timestamp updated",
         stateUpdates =
-          (state: GroupState[Int]) => {
-            state.update(5); state.setTimeoutTimestamp(5000)
-          },
+          (state: GroupState[Int]) => { state.update(5); state.setTimeoutTimestamp(5000) },
         timeoutConf = EventTimeTimeout,
         priorState = priorState,
         priorTimeoutTimestamp = priorTimeoutTimestamp,
@@ -514,23 +476,50 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest
         timeoutConf = EventTimeTimeout,
         priorState = priorState,
         priorTimeoutTimestamp = priorTimeoutTimestamp,
-        expectedState = Some(5), // state should change
-        expectedTimeoutTimestamp = NO_TIMESTAMP) // timestamp should not update
-
-      testStateUpdateWithData(
-        s"EventTimeTimeout - $testName - setting timeout with state removal not allowed",
-        stateUpdates = state => {
-          state.remove(); state.setTimeoutTimestamp(10000)
-        },
-        timeoutConf = EventTimeTimeout,
-        priorState = priorState,
-        priorTimeoutTimestamp = priorTimeoutTimestamp,
-        expectedState = None,
-        expectedTimeoutTimestamp = 10000)
+        expectedState = Some(5),                                 // state should change
+        expectedTimeoutTimestamp = NO_TIMESTAMP)                 // timestamp should not update
     }
   }
 
-  // Tests for InputProcessor.processTimedOutState()
+  // Currently disallowed cases for StateStoreUpdater.updateStateForKeysWithData(),
+  // Try to remove these cases in the future
+  for (priorTimeoutTimestamp <- Seq(NO_TIMESTAMP, 1000)) {
+    val testName =
+      if (priorTimeoutTimestamp != NO_TIMESTAMP) "prior timeout set" else "no prior timeout"
+    testStateUpdateWithData(
+      s"ProcessingTimeTimeout - $testName - setting timeout without init state not allowed",
+      stateUpdates = state => { state.setTimeoutDuration(5000) },
+      timeoutConf = ProcessingTimeTimeout,
+      priorState = None,
+      priorTimeoutTimestamp = priorTimeoutTimestamp,
+      expectedException = classOf[IllegalStateException])
+
+    testStateUpdateWithData(
+      s"ProcessingTimeTimeout - $testName - setting timeout with state removal not allowed",
+      stateUpdates = state => { state.remove(); state.setTimeoutDuration(5000) },
+      timeoutConf = ProcessingTimeTimeout,
+      priorState = Some(5),
+      priorTimeoutTimestamp = priorTimeoutTimestamp,
+      expectedException = classOf[IllegalStateException])
+
+    testStateUpdateWithData(
+      s"EventTimeTimeout - $testName - setting timeout without init state not allowed",
+      stateUpdates = state => { state.setTimeoutTimestamp(10000) },
+      timeoutConf = EventTimeTimeout,
+      priorState = None,
+      priorTimeoutTimestamp = priorTimeoutTimestamp,
+      expectedException = classOf[IllegalStateException])
+
+    testStateUpdateWithData(
+      s"EventTimeTimeout - $testName - setting timeout with state removal not allowed",
+      stateUpdates = state => { state.remove(); state.setTimeoutTimestamp(10000) },
+      timeoutConf = EventTimeTimeout,
+      priorState = Some(5),
+      priorTimeoutTimestamp = priorTimeoutTimestamp,
+      expectedException = classOf[IllegalStateException])
+  }
+
+  // Tests for StateStoreUpdater.updateStateForTimedOutKeys()
   val preTimeoutState = Some(5)
   for (timeoutConf <- Seq(ProcessingTimeTimeout, EventTimeTimeout)) {
     testStateUpdateWithTimeout(
@@ -1034,7 +1023,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest
     if (priorState.isEmpty && priorTimeoutTimestamp != NO_TIMESTAMP) {
       return // there can be no prior timestamp, when there is no prior state
     }
-    test(s"InputProcessor - process new data - $testName") {
+    test(s"StateStoreUpdater - updates with data - $testName") {
       val mapGroupsFunc = (key: Int, values: Iterator[Int], state: GroupState[Int]) => {
         assert(state.hasTimedOut === false, "hasTimedOut not false")
         assert(values.nonEmpty, "Some value is expected")
@@ -1056,7 +1045,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest
       expectedState: Option[Int],
       expectedTimeoutTimestamp: Long = NO_TIMESTAMP): Unit = {
 
-    test(s"InputProcessor - process timed out state - $testName") {
+    test(s"StateStoreUpdater - updates for timeout - $testName") {
       val mapGroupsFunc = (key: Int, values: Iterator[Int], state: GroupState[Int]) => {
         assert(state.hasTimedOut === true, "hasTimedOut not true")
         assert(values.isEmpty, "values not empty")
@@ -1083,20 +1072,21 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest
     val store = newStateStore()
     val mapGroupsSparkPlan = newFlatMapGroupsWithStateExec(
       mapGroupsFunc, timeoutConf, currentBatchTimestamp)
-    val inputProcessor = new mapGroupsSparkPlan.InputProcessor(store)
-    val stateManager = mapGroupsSparkPlan.stateManager
+    val updater = new mapGroupsSparkPlan.StateStoreUpdater(store)
     val key = intToRow(0)
     // Prepare store with prior state configs
-    if (priorState.nonEmpty || priorTimeoutTimestamp != NO_TIMESTAMP) {
-      stateManager.putState(store, key, priorState.orNull, priorTimeoutTimestamp)
+    if (priorState.nonEmpty) {
+      val row = updater.getStateRow(priorState.get)
+      updater.setTimeoutTimestamp(row, priorTimeoutTimestamp)
+      store.put(key.copy(), row.copy())
     }
 
     // Call updating function to update state store
     def callFunction() = {
       val returnedIter = if (testTimeoutUpdates) {
-        inputProcessor.processTimedOutState()
+        updater.updateStateForTimedOutKeys()
       } else {
-        inputProcessor.processNewData(Iterator(key))
+        updater.updateStateForKeysWithData(Iterator(key))
       }
       returnedIter.size // consume the iterator to force state updates
     }
@@ -1107,11 +1097,15 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest
     } else {
       // Call function to update and verify updated state in store
       callFunction()
-      val updatedState = stateManager.getState(store, key)
-      assert(Option(updatedState.stateObj).map(_.toString.toInt) === expectedState,
+      val updatedStateRow = store.get(key)
+      assert(
+        Option(updater.getStateObj(updatedStateRow)).map(_.toString.toInt) === expectedState,
         "final state not as expected")
-      assert(updatedState.timeoutTimestamp === expectedTimeoutTimestamp,
-        "final timeout timestamp not as expected")
+      if (updatedStateRow != null) {
+        assert(
+          updater.getTimeoutTimestamp(updatedStateRow) === expectedTimeoutTimestamp,
+          "final timeout timestamp not as expected")
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 3d687d2214e90..c65e5d3dd75c2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -28,7 +28,7 @@ import com.google.common.util.concurrent.UncheckedExecutionException
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.SparkContext
+import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.logical.Range
@@ -77,10 +77,23 @@ class StreamSuite extends StreamTest {
   }
 
   test("StreamingRelation.computeStats") {
+    withTempDir { dir =>
+      val df = spark.readStream.format("csv").schema(StructType(Seq())).load(dir.getCanonicalPath)
+      val streamingRelation = df.logicalPlan collect {
+        case s: StreamingRelation => s
+      }
+      assert(streamingRelation.nonEmpty, "cannot find StreamingRelation")
+      assert(
+        streamingRelation.head.computeStats.sizeInBytes ==
+          spark.sessionState.conf.defaultSizeInBytes)
+    }
+  }
+
+  test("StreamingRelationV2.computeStats") {
     val streamingRelation = spark.readStream.format("rate").load().logicalPlan collect {
-      case s: StreamingRelation => s
+      case s: StreamingRelationV2 => s
     }
-    assert(streamingRelation.nonEmpty, "cannot find StreamingRelation")
+    assert(streamingRelation.nonEmpty, "cannot find StreamingExecutionRelation")
     assert(
       streamingRelation.head.computeStats.sizeInBytes == spark.sessionState.conf.defaultSizeInBytes)
   }
@@ -275,7 +288,7 @@ class StreamSuite extends StreamTest {
 
     // Check the latest batchid in the commit log
     def CheckCommitLogLatestBatchId(expectedId: Int): AssertOnQuery =
-      AssertOnQuery(_.batchCommitLog.getLatest().get._1 == expectedId,
+      AssertOnQuery(_.commitLog.getLatest().get._1 == expectedId,
         s"commitLog's latest should be $expectedId")
 
     // Ensure that there has not been an incremental execution after restart
@@ -417,6 +430,37 @@ class StreamSuite extends StreamTest {
     assert(OutputMode.Update === InternalOutputModes.Update)
   }
 
+  override protected def sparkConf: SparkConf = super.sparkConf
+    .set("spark.redaction.string.regex", "file:/[\\w_]+")
+
+  test("explain - redaction") {
+    val replacement = "*********"
+
+    val inputData = MemoryStream[String]
+    val df = inputData.toDS().map(_ + "foo").groupBy("value").agg(count("*"))
+    // Test StreamingQuery.display
+    val q = df.writeStream.queryName("memory_explain").outputMode("complete").format("memory")
+      .start()
+      .asInstanceOf[StreamingQueryWrapper]
+      .streamingQuery
+    try {
+      inputData.addData("abc")
+      q.processAllAvailable()
+
+      val explainWithoutExtended = q.explainInternal(false)
+      assert(explainWithoutExtended.contains(replacement))
+      assert(explainWithoutExtended.contains("StateStoreRestore"))
+      assert(!explainWithoutExtended.contains("file:/"))
+
+      val explainWithExtended = q.explainInternal(true)
+      assert(explainWithExtended.contains(replacement))
+      assert(explainWithExtended.contains("StateStoreRestore"))
+      assert(!explainWithoutExtended.contains("file:/"))
+    } finally {
+      q.stop()
+    }
+  }
+
   test("explain") {
     val inputData = MemoryStream[String]
     val df = inputData.toDS().map(_ + "foo").groupBy("value").agg(count("*"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index e68fca050571f..d46461fa9bf6d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -33,11 +33,14 @@ import org.scalatest.exceptions.TestFailedDueToTimeoutException
 import org.scalatest.time.Span
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.SparkEnv
 import org.apache.spark.sql.{Dataset, Encoder, QueryTest, Row}
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution, EpochCoordinatorRef, IncrementAndGetEpoch}
+import org.apache.spark.sql.execution.streaming.sources.MemorySinkV2
 import org.apache.spark.sql.execution.streaming.state.StateStore
 import org.apache.spark.sql.streaming.StreamingQueryListener._
 import org.apache.spark.sql.test.SharedSQLContext
@@ -102,7 +105,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
      * the active query, and then return the source object the data was added, as well as the
      * offset of added data.
      */
-    def addData(query: Option[StreamExecution]): (Source, Offset)
+    def addData(query: Option[StreamExecution]): (BaseStreamingSource, Offset)
   }
 
   /** A trait that can be extended when testing a source. */
@@ -133,6 +136,9 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
     }
 
     def apply(rows: Row*): CheckAnswerRows = CheckAnswerRows(rows, false, false)
+
+    def apply(globalCheckFunction: Seq[Row] => Unit): CheckAnswerRowsByFunc =
+      CheckAnswerRowsByFunc(globalCheckFunction, false)
   }
 
   /**
@@ -154,6 +160,9 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
     }
 
     def apply(rows: Row*): CheckAnswerRows = CheckAnswerRows(rows, true, false)
+
+    def apply(globalCheckFunction: Seq[Row] => Unit): CheckAnswerRowsByFunc =
+      CheckAnswerRowsByFunc(globalCheckFunction, true)
   }
 
   case class CheckAnswerRows(expectedAnswer: Seq[Row], lastOnly: Boolean, isSorted: Boolean)
@@ -162,6 +171,19 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
     private def operatorName = if (lastOnly) "CheckLastBatch" else "CheckAnswer"
   }
 
+  case class CheckAnswerRowsContains(expectedAnswer: Seq[Row], lastOnly: Boolean = false)
+    extends StreamAction with StreamMustBeRunning {
+    override def toString: String = s"$operatorName: ${expectedAnswer.mkString(",")}"
+    private def operatorName = if (lastOnly) "CheckLastBatch" else "CheckAnswer"
+  }
+
+  case class CheckAnswerRowsByFunc(
+      globalCheckFunction: Seq[Row] => Unit,
+      lastOnly: Boolean) extends StreamAction with StreamMustBeRunning {
+    override def toString: String = s"$operatorName"
+    private def operatorName = if (lastOnly) "CheckLastBatchByFunc" else "CheckAnswerByFunc"
+  }
+
   /** Stops the stream. It must currently be running. */
   case object StopStream extends StreamAction with StreamMustBeRunning
 
@@ -225,6 +247,25 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
       AssertOnQuery(query => { func(query); true })
   }
 
+  object AwaitEpoch {
+    def apply(epoch: Long): AssertOnQuery =
+      Execute {
+        case s: ContinuousExecution => s.awaitEpoch(epoch)
+        case _ => throw new IllegalStateException("microbatch cannot await epoch")
+      }
+  }
+
+  object IncrementEpoch {
+    def apply(): AssertOnQuery =
+      Execute {
+        case s: ContinuousExecution =>
+          val newEpoch = EpochCoordinatorRef.get(s.runId.toString, SparkEnv.get)
+            .askSync[Long](IncrementAndGetEpoch)
+          s.awaitEpoch(newEpoch - 1)
+        case _ => throw new IllegalStateException("microbatch cannot increment epoch")
+      }
+  }
+
   /**
    * Executes the specified actions on the given streaming DataFrame and provides helpful
    * error messages in the case of failures or incorrect answers.
@@ -234,7 +275,8 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
    */
   def testStream(
       _stream: Dataset[_],
-      outputMode: OutputMode = OutputMode.Append)(actions: StreamAction*): Unit = synchronized {
+      outputMode: OutputMode = OutputMode.Append,
+      useV2Sink: Boolean = false)(actions: StreamAction*): Unit = synchronized {
     import org.apache.spark.sql.streaming.util.StreamManualClock
 
     // `synchronized` is added to prevent the user from calling multiple `testStream`s concurrently
@@ -247,7 +289,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
     var currentStream: StreamExecution = null
     var lastStream: StreamExecution = null
     val awaiting = new mutable.HashMap[Int, Offset]() // source index -> offset to wait for
-    val sink = new MemorySink(stream.schema, outputMode)
+    val sink = if (useV2Sink) new MemorySinkV2 else new MemorySink(stream.schema, outputMode)
     val resetConfValues = mutable.Map[String, Option[String]]()
 
     @volatile
@@ -287,14 +329,20 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
       if (currentStream != null) currentStream.committedOffsets.toString else "not started"
 
     def threadState =
-      if (currentStream != null && currentStream.microBatchThread.isAlive) "alive" else "dead"
-    def threadStackTrace = if (currentStream != null && currentStream.microBatchThread.isAlive) {
-      s"Thread stack trace: ${currentStream.microBatchThread.getStackTrace.mkString("\n")}"
-    } else {
-      ""
-    }
+      if (currentStream != null && currentStream.queryExecutionThread.isAlive) "alive" else "dead"
+
+    def threadStackTrace =
+      if (currentStream != null && currentStream.queryExecutionThread.isAlive) {
+        s"Thread stack trace: ${currentStream.queryExecutionThread.getStackTrace.mkString("\n")}"
+      } else {
+        ""
+      }
 
-    def testState =
+    def testState = {
+      val sinkDebugString = sink match {
+        case s: MemorySink => s.toDebugString
+        case s: MemorySinkV2 => s.toDebugString
+      }
       s"""
          |== Progress ==
          |$testActions
@@ -307,12 +355,13 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
          |${if (streamThreadDeathCause != null) stackTraceToString(streamThreadDeathCause) else ""}
          |
          |== Sink ==
-         |${sink.toDebugString}
+         |$sinkDebugString
          |
          |
          |== Plan ==
          |${if (currentStream != null) currentStream.lastExecution else ""}
          """.stripMargin
+    }
 
     def verify(condition: => Boolean, message: String): Unit = {
       if (!condition) {
@@ -352,6 +401,33 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
          """.stripMargin)
     }
 
+    def fetchStreamAnswer(currentStream: StreamExecution, lastOnly: Boolean) = {
+      verify(currentStream != null, "stream not running")
+      // Get the map of source index to the current source objects
+      val indexToSource = currentStream
+        .logicalPlan
+        .collect { case StreamingExecutionRelation(s, _) => s }
+        .zipWithIndex
+        .map(_.swap)
+        .toMap
+
+      // Block until all data added has been processed for all the source
+      awaiting.foreach { case (sourceIndex, offset) =>
+        failAfter(streamingTimeout) {
+          currentStream.awaitOffset(indexToSource(sourceIndex), offset)
+        }
+      }
+
+      val (latestBatchData, allData) = sink match {
+        case s: MemorySink => (s.latestBatchData, s.allData)
+        case s: MemorySinkV2 => (s.latestBatchData, s.allData)
+      }
+      try if (lastOnly) latestBatchData else allData catch {
+        case e: Exception =>
+          failTest("Exception while getting data from sink", e)
+      }
+    }
+
     var manualClockExpectedTime = -1L
     val defaultCheckpointLocation =
       Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
@@ -386,6 +462,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
                   None,
                   Some(metadataRoot),
                   stream,
+                  Map(),
                   sink,
                   outputMode,
                   trigger = trigger,
@@ -424,7 +501,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
             verify(currentStream != null, "can not stop a stream that is not running")
             try failAfter(streamingTimeout) {
               currentStream.stop()
-              verify(!currentStream.microBatchThread.isAlive,
+              verify(!currentStream.queryExecutionThread.isAlive,
                 s"microbatch thread not stopped")
               verify(!currentStream.isActive,
                 "query.isActive() is false even after stopping")
@@ -450,7 +527,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
                 currentStream.awaitTermination()
               }
               eventually("microbatch thread not stopped after termination with failure") {
-                assert(!currentStream.microBatchThread.isAlive)
+                assert(!currentStream.queryExecutionThread.isAlive)
               }
               verify(currentStream.exception === Some(thrownException),
                 s"incorrect exception returned by query.exception()")
@@ -552,29 +629,23 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
             e.runAction()
 
           case CheckAnswerRows(expectedAnswer, lastOnly, isSorted) =>
-            verify(currentStream != null, "stream not running")
-            // Get the map of source index to the current source objects
-            val indexToSource = currentStream
-              .logicalPlan
-              .collect { case StreamingExecutionRelation(s, _) => s }
-              .zipWithIndex
-              .map(_.swap)
-              .toMap
-
-            // Block until all data added has been processed for all the source
-            awaiting.foreach { case (sourceIndex, offset) =>
-              failAfter(streamingTimeout) {
-                currentStream.awaitOffset(indexToSource(sourceIndex), offset)
-              }
+            val sparkAnswer = fetchStreamAnswer(currentStream, lastOnly)
+            QueryTest.sameRows(expectedAnswer, sparkAnswer, isSorted).foreach {
+              error => failTest(error)
             }
 
-            val sparkAnswer = try if (lastOnly) sink.latestBatchData else sink.allData catch {
-              case e: Exception =>
-                failTest("Exception while getting data from sink", e)
+          case CheckAnswerRowsContains(expectedAnswer, lastOnly) =>
+            val sparkAnswer = fetchStreamAnswer(currentStream, lastOnly)
+            QueryTest.includesRows(expectedAnswer, sparkAnswer).foreach {
+              error => failTest(error)
             }
 
-            QueryTest.sameRows(expectedAnswer, sparkAnswer, isSorted).foreach {
-              error => failTest(error)
+          case CheckAnswerRowsByFunc(globalCheckFunction, lastOnly) =>
+            val sparkAnswer = fetchStreamAnswer(currentStream, lastOnly)
+            try {
+              globalCheckFunction(sparkAnswer)
+            } catch {
+              case e: Throwable => failTest(e.toString)
             }
         }
         pos += 1
@@ -588,7 +659,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with TimeLimits with Be
       case e: org.scalatest.exceptions.TestFailedDueToTimeoutException =>
         failTest("Timed out waiting for stream", e)
     } finally {
-      if (currentStream != null && currentStream.microBatchThread.isAlive) {
+      if (currentStream != null && currentStream.queryExecutionThread.isAlive) {
         currentStream.stop()
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 1b4d8556f6ae5..97e065193fd05 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -299,7 +299,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest
       StopStream,
       AssertOnQuery { q => // clear the sink
         q.sink.asInstanceOf[MemorySink].clear()
-        q.batchCommitLog.purge(3)
+        q.commitLog.purge(3)
         // advance by a minute i.e., 90 seconds total
         clock.advance(60 * 1000L)
         true
@@ -351,7 +351,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest
       StopStream,
       AssertOnQuery { q => // clear the sink
         q.sink.asInstanceOf[MemorySink].clear()
-        q.batchCommitLog.purge(3)
+        q.commitLog.purge(3)
         // advance by 60 days i.e., 90 days total
         clock.advance(DateTimeUtils.MILLIS_PER_DAY * 60)
         true
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index cc693909270f8..76201c63a2701 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -37,7 +37,6 @@ import org.apache.spark.sql.streaming.util.{BlockingSource, MockSourceProvider,
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.ManualClock
 
-
 class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging with MockitoSugar {
 
   import AwaitTerminationTester._
@@ -172,12 +171,12 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       StopStream, // clears out StreamTest state
       AssertOnQuery { q =>
         // both commit log and offset log contain the same (latest) batch id
-        q.batchCommitLog.getLatest().map(_._1).getOrElse(-1L) ==
+        q.commitLog.getLatest().map(_._1).getOrElse(-1L) ==
           q.offsetLog.getLatest().map(_._1).getOrElse(-2L)
       },
       AssertOnQuery { q =>
         // blow away commit log and sink result
-        q.batchCommitLog.purge(1)
+        q.commitLog.purge(1)
         q.sink.asInstanceOf[MemorySink].clear()
         true
       },
@@ -425,6 +424,29 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     }
   }
 
+  test("SPARK-22975: MetricsReporter defaults when there was no progress reported") {
+    withSQLConf("spark.sql.streaming.metricsEnabled" -> "true") {
+      BlockingSource.latch = new CountDownLatch(1)
+      withTempDir { tempDir =>
+        val sq = spark.readStream
+          .format("org.apache.spark.sql.streaming.util.BlockingSource")
+          .load()
+          .writeStream
+          .format("org.apache.spark.sql.streaming.util.BlockingSource")
+          .option("checkpointLocation", tempDir.toString)
+          .start()
+          .asInstanceOf[StreamingQueryWrapper]
+          .streamingQuery
+
+        val gauges = sq.streamMetrics.metricRegistry.getGauges
+        assert(gauges.get("latency").getValue.asInstanceOf[Long] == 0)
+        assert(gauges.get("processingRate-total").getValue.asInstanceOf[Double] == 0.0)
+        assert(gauges.get("inputRate-total").getValue.asInstanceOf[Double] == 0.0)
+        sq.stop()
+      }
+    }
+  }
+
   test("input row calculation with mixed batch and streaming sources") {
     val streamingTriggerDF = spark.createDataset(1 to 10).toDF
     val streamingInputDF = createSingleTriggerStreamingDF(streamingTriggerDF).toDF("value")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala
index 2a854e37bf0df..69b7154895341 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala
@@ -18,10 +18,8 @@
 package org.apache.spark.sql.streaming
 
 import org.apache.spark.sql.Column
-import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.execution.{LeafExecNode, LocalTableScanExec, SparkPlan}
-import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
+import org.apache.spark.sql.execution.LocalTableScanExec
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper.JoinConditionSplitPredicates
 import org.apache.spark.sql.types._
 
@@ -95,19 +93,17 @@ class StreamingSymmetricHashJoinHelperSuite extends StreamTest {
   }
 
   test("conjuncts after nondeterministic") {
-    // All conjuncts after a nondeterministic conjunct shouldn't be split because they don't
-    // commute across it.
     val predicate =
-      (rand() > lit(0)
+      (rand(9) > lit(0)
         && leftColA > leftColB
         && rightColC > rightColD
         && leftColA === rightColC
         && lit(1) === lit(1)).expr
     val split = JoinConditionSplitPredicates(Some(predicate), left, right)
 
-    assert(split.leftSideOnly.isEmpty)
-    assert(split.rightSideOnly.isEmpty)
-    assert(split.bothSides.contains(predicate))
+    assert(split.leftSideOnly.contains((leftColA > leftColB && lit(1) === lit(1)).expr))
+    assert(split.rightSideOnly.contains((rightColC > rightColD && lit(1) === lit(1)).expr))
+    assert(split.bothSides.contains((leftColA === rightColC && rand(9) > lit(0)).expr))
     assert(split.full.contains(predicate))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
new file mode 100644
index 0000000000000..9562c10feafe9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -0,0 +1,316 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.continuous
+
+import java.io.{File, InterruptedIOException, IOException, UncheckedIOException}
+import java.nio.channels.ClosedByInterruptException
+import java.util.concurrent.{CountDownLatch, ExecutionException, TimeoutException, TimeUnit}
+
+import scala.reflect.ClassTag
+import scala.util.control.ControlThrowable
+
+import com.google.common.util.concurrent.UncheckedExecutionException
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.{SparkContext, SparkEnv}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.plans.logical.Range
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
+import org.apache.spark.sql.execution.command.ExplainCommand
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanExec, WriteToDataSourceV2Exec}
+import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.continuous._
+import org.apache.spark.sql.execution.streaming.sources.MemorySinkV2
+import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreConf, StateStoreId, StateStoreProvider}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.StreamSourceProvider
+import org.apache.spark.sql.streaming.{StreamTest, Trigger}
+import org.apache.spark.sql.streaming.util.StreamManualClock
+import org.apache.spark.sql.test.TestSparkSession
+import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
+
+class ContinuousSuiteBase extends StreamTest {
+  // We need more than the default local[2] to be able to schedule all partitions simultaneously.
+  override protected def createSparkSession = new TestSparkSession(
+    new SparkContext(
+      "local[10]",
+      "continuous-stream-test-sql-context",
+      sparkConf.set("spark.sql.testkey", "true")))
+
+  protected def waitForRateSourceTriggers(query: StreamExecution, numTriggers: Int): Unit = {
+    query match {
+      case s: ContinuousExecution =>
+        assert(numTriggers >= 2, "must wait for at least 2 triggers to ensure query is initialized")
+        val reader = s.lastExecution.executedPlan.collectFirst {
+          case DataSourceV2ScanExec(_, r: RateStreamContinuousReader) => r
+        }.get
+
+        val deltaMs = numTriggers * 1000 + 300
+        while (System.currentTimeMillis < reader.creationTime + deltaMs) {
+          Thread.sleep(reader.creationTime + deltaMs - System.currentTimeMillis)
+        }
+    }
+  }
+
+  // A continuous trigger that will only fire the initial time for the duration of a test.
+  // This allows clean testing with manual epoch advancement.
+  protected val longContinuousTrigger = Trigger.Continuous("1 hour")
+}
+
+class ContinuousSuite extends ContinuousSuiteBase {
+  import testImplicits._
+
+  test("basic rate source") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "5")
+      .option("rowsPerSecond", "5")
+      .load()
+      .select('value)
+
+    testStream(df, useV2Sink = true)(
+      StartStream(longContinuousTrigger),
+      AwaitEpoch(0),
+      Execute(waitForRateSourceTriggers(_, 2)),
+      IncrementEpoch(),
+      CheckAnswerRowsContains(scala.Range(0, 10).map(Row(_))),
+      StopStream,
+      StartStream(longContinuousTrigger),
+      AwaitEpoch(2),
+      Execute(waitForRateSourceTriggers(_, 2)),
+      IncrementEpoch(),
+      CheckAnswerRowsContains(scala.Range(0, 20).map(Row(_))),
+      StopStream)
+  }
+
+  test("map") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "5")
+      .option("rowsPerSecond", "5")
+      .load()
+      .select('value)
+      .map(r => r.getLong(0) * 2)
+
+    testStream(df, useV2Sink = true)(
+      StartStream(longContinuousTrigger),
+      AwaitEpoch(0),
+      Execute(waitForRateSourceTriggers(_, 2)),
+      IncrementEpoch(),
+      Execute(waitForRateSourceTriggers(_, 4)),
+      IncrementEpoch(),
+      CheckAnswerRowsContains(scala.Range(0, 40, 2).map(Row(_))))
+  }
+
+  test("flatMap") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "5")
+      .option("rowsPerSecond", "5")
+      .load()
+      .select('value)
+      .flatMap(r => Seq(0, r.getLong(0), r.getLong(0) * 2))
+
+    testStream(df, useV2Sink = true)(
+      StartStream(longContinuousTrigger),
+      AwaitEpoch(0),
+      Execute(waitForRateSourceTriggers(_, 2)),
+      IncrementEpoch(),
+      Execute(waitForRateSourceTriggers(_, 4)),
+      IncrementEpoch(),
+      CheckAnswerRowsContains(scala.Range(0, 20).flatMap(n => Seq(0, n, n * 2)).map(Row(_))))
+  }
+
+  test("filter") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "5")
+      .option("rowsPerSecond", "5")
+      .load()
+      .select('value)
+      .where('value > 5)
+
+    testStream(df, useV2Sink = true)(
+      StartStream(longContinuousTrigger),
+      AwaitEpoch(0),
+      Execute(waitForRateSourceTriggers(_, 2)),
+      IncrementEpoch(),
+      Execute(waitForRateSourceTriggers(_, 4)),
+      IncrementEpoch(),
+      CheckAnswerRowsContains(scala.Range(6, 20).map(Row(_))))
+  }
+
+  test("deduplicate") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "5")
+      .option("rowsPerSecond", "5")
+      .load()
+      .select('value)
+      .dropDuplicates()
+
+    val except = intercept[AnalysisException] {
+      testStream(df, useV2Sink = true)(StartStream(longContinuousTrigger))
+    }
+
+    assert(except.message.contains(
+      "Continuous processing does not support Deduplicate operations."))
+  }
+
+  test("timestamp") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "5")
+      .option("rowsPerSecond", "5")
+      .load()
+      .select(current_timestamp())
+
+    val except = intercept[AnalysisException] {
+      testStream(df, useV2Sink = true)(StartStream(longContinuousTrigger))
+    }
+
+    assert(except.message.contains(
+      "Continuous processing does not support current time operations."))
+  }
+
+  test("repeatedly restart") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "5")
+      .option("rowsPerSecond", "5")
+      .load()
+      .select('value)
+
+    testStream(df, useV2Sink = true)(
+      StartStream(longContinuousTrigger),
+      AwaitEpoch(0),
+      Execute(waitForRateSourceTriggers(_, 2)),
+      IncrementEpoch(),
+      CheckAnswerRowsContains(scala.Range(0, 10).map(Row(_))),
+      StopStream,
+      StartStream(longContinuousTrigger),
+      StopStream,
+      StartStream(longContinuousTrigger),
+      StopStream,
+      StartStream(longContinuousTrigger),
+      AwaitEpoch(2),
+      Execute(waitForRateSourceTriggers(_, 2)),
+      IncrementEpoch(),
+      CheckAnswerRowsContains(scala.Range(0, 20).map(Row(_))),
+      StopStream)
+  }
+
+  test("query without test harness") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "2")
+      .option("rowsPerSecond", "2")
+      .load()
+      .select('value)
+    val query = df.writeStream
+      .format("memory")
+      .queryName("noharness")
+      .trigger(Trigger.Continuous(100))
+      .start()
+    val continuousExecution =
+      query.asInstanceOf[StreamingQueryWrapper].streamingQuery.asInstanceOf[ContinuousExecution]
+    continuousExecution.awaitEpoch(0)
+    waitForRateSourceTriggers(continuousExecution, 2)
+    query.stop()
+
+    val results = spark.read.table("noharness").collect()
+    assert(Set(0, 1, 2, 3).map(Row(_)).subsetOf(results.toSet))
+  }
+}
+
+class ContinuousStressSuite extends ContinuousSuiteBase {
+  import testImplicits._
+
+  test("only one epoch") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "5")
+      .option("rowsPerSecond", "500")
+      .load()
+      .select('value)
+
+    testStream(df, useV2Sink = true)(
+      StartStream(longContinuousTrigger),
+      AwaitEpoch(0),
+      Execute(waitForRateSourceTriggers(_, 201)),
+      IncrementEpoch(),
+      Execute { query =>
+        val data = query.sink.asInstanceOf[MemorySinkV2].allData
+        val vals = data.map(_.getLong(0)).toSet
+        assert(scala.Range(0, 25000).forall { i =>
+          vals.contains(i)
+        })
+      })
+  }
+
+  test("automatic epoch advancement") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "5")
+      .option("rowsPerSecond", "500")
+      .load()
+      .select('value)
+
+    testStream(df, useV2Sink = true)(
+      StartStream(Trigger.Continuous(2012)),
+      AwaitEpoch(0),
+      Execute(waitForRateSourceTriggers(_, 201)),
+      IncrementEpoch(),
+      CheckAnswerRowsContains(scala.Range(0, 25000).map(Row(_))))
+  }
+
+  test("restarts") {
+    val df = spark.readStream
+      .format("rate")
+      .option("numPartitions", "5")
+      .option("rowsPerSecond", "500")
+      .load()
+      .select('value)
+
+    testStream(df, useV2Sink = true)(
+      StartStream(Trigger.Continuous(2012)),
+      AwaitEpoch(10),
+      StopStream,
+      StartStream(Trigger.Continuous(2012)),
+      AwaitEpoch(20),
+      StopStream,
+      StartStream(Trigger.Continuous(2012)),
+      AwaitEpoch(21),
+      StopStream,
+      StartStream(Trigger.Continuous(2012)),
+      AwaitEpoch(22),
+      StopStream,
+      StartStream(Trigger.Continuous(2012)),
+      AwaitEpoch(25),
+      StopStream,
+      StartStream(Trigger.Continuous(2012)),
+      StopStream,
+      StartStream(Trigger.Continuous(2012)),
+      AwaitEpoch(50),
+      CheckAnswerRowsContains(scala.Range(0, 25000).map(Row(_))))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index a5d7e6257a6df..8c9bb7d56a35f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -155,7 +155,6 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
     }
   }
 
-
   test("resolve default source") {
     spark.read
       .format("org.apache.spark.sql.test")
@@ -478,42 +477,56 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
       spark.read.schema(userSchema).parquet(Seq(dir, dir): _*), expData ++ expData, userSchema)
   }
 
-  /**
-   * This only tests whether API compiles, but does not run it as orc()
-   * cannot be run without Hive classes.
-   */
-  ignore("orc - API") {
-    // Reader, with user specified schema
-    // Refer to csv-specific test suites for behavior without user specified schema
-    spark.read.schema(userSchema).orc()
-    spark.read.schema(userSchema).orc(dir)
-    spark.read.schema(userSchema).orc(dir, dir, dir)
-    spark.read.schema(userSchema).orc(Seq(dir, dir): _*)
-    Option(dir).map(spark.read.schema(userSchema).orc)
+  test("orc - API and behavior regarding schema") {
+    withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> "native") {
+      // Writer
+      spark.createDataset(data).toDF("str").write.mode(SaveMode.Overwrite).orc(dir)
+      val df = spark.read.orc(dir)
+      checkAnswer(df, spark.createDataset(data).toDF())
+      val schema = df.schema
 
-    // Writer
-    spark.range(10).write.orc(dir)
+      // Reader, without user specified schema
+      intercept[AnalysisException] {
+        testRead(spark.read.orc(), Seq.empty, schema)
+      }
+      testRead(spark.read.orc(dir), data, schema)
+      testRead(spark.read.orc(dir, dir), data ++ data, schema)
+      testRead(spark.read.orc(Seq(dir, dir): _*), data ++ data, schema)
+      // Test explicit calls to single arg method - SPARK-16009
+      testRead(Option(dir).map(spark.read.orc).get, data, schema)
+
+      // Reader, with user specified schema, data should be nulls as schema in file different
+      // from user schema
+      val expData = Seq[String](null, null, null)
+      testRead(spark.read.schema(userSchema).orc(), Seq.empty, userSchema)
+      testRead(spark.read.schema(userSchema).orc(dir), expData, userSchema)
+      testRead(spark.read.schema(userSchema).orc(dir, dir), expData ++ expData, userSchema)
+      testRead(
+        spark.read.schema(userSchema).orc(Seq(dir, dir): _*), expData ++ expData, userSchema)
+    }
   }
 
   test("column nullability and comment - write and then read") {
-    Seq("json", "parquet", "csv").foreach { format =>
-      val schema = StructType(
-        StructField("cl1", IntegerType, nullable = false).withComment("test") ::
-          StructField("cl2", IntegerType, nullable = true) ::
-          StructField("cl3", IntegerType, nullable = true) :: Nil)
-      val row = Row(3, null, 4)
-      val df = spark.createDataFrame(sparkContext.parallelize(row :: Nil), schema)
-
-      val tableName = "tab"
-      withTable(tableName) {
-        df.write.format(format).mode("overwrite").saveAsTable(tableName)
-        // Verify the DDL command result: DESCRIBE TABLE
-        checkAnswer(
-          sql(s"desc $tableName").select("col_name", "comment").where($"comment" === "test"),
-          Row("cl1", "test") :: Nil)
-        // Verify the schema
-        val expectedFields = schema.fields.map(f => f.copy(nullable = true))
-        assert(spark.table(tableName).schema == schema.copy(fields = expectedFields))
+    withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> "native") {
+      Seq("json", "orc", "parquet", "csv").foreach { format =>
+        val schema = StructType(
+          StructField("cl1", IntegerType, nullable = false).withComment("test") ::
+            StructField("cl2", IntegerType, nullable = true) ::
+            StructField("cl3", IntegerType, nullable = true) :: Nil)
+        val row = Row(3, null, 4)
+        val df = spark.createDataFrame(sparkContext.parallelize(row :: Nil), schema)
+
+        val tableName = "tab"
+        withTable(tableName) {
+          df.write.format(format).mode("overwrite").saveAsTable(tableName)
+          // Verify the DDL command result: DESCRIBE TABLE
+          checkAnswer(
+            sql(s"desc $tableName").select("col_name", "comment").where($"comment" === "test"),
+            Row("cl1", "test") :: Nil)
+          // Verify the schema
+          val expectedFields = schema.fields.map(f => f.copy(nullable = true))
+          assert(spark.table(tableName).schema == schema.copy(fields = expectedFields))
+        }
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index b4248b74f50ab..bc4a120f7042f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -113,7 +113,7 @@ private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with
       if (thread.isAlive) {
         thread.interrupt()
         // If this interrupt does not work, then this thread is most likely running something that
-        // is not interruptible. There is not much point to wait for the thread to termniate, and
+        // is not interruptible. There is not much point to wait for the thread to terminate, and
         // we rather let the JVM terminate the thread on exit.
         fail(
           s"Test '$name' running on o.a.s.util.UninterruptibleThread timed out after" +
@@ -254,13 +254,26 @@ private[sql] trait SQLTestUtilsBase
   }
 
   /**
-   * Drops temporary table `tableName` after calling `f`.
+   * Drops temporary view `viewNames` after calling `f`.
    */
-  protected def withTempView(tableNames: String*)(f: => Unit): Unit = {
+  protected def withTempView(viewNames: String*)(f: => Unit): Unit = {
     try f finally {
       // If the test failed part way, we don't want to mask the failure by failing to remove
-      // temp tables that never got created.
-      try tableNames.foreach(spark.catalog.dropTempView) catch {
+      // temp views that never got created.
+      try viewNames.foreach(spark.catalog.dropTempView) catch {
+        case _: NoSuchTableException =>
+      }
+    }
+  }
+
+  /**
+   * Drops global temporary view `viewNames` after calling `f`.
+   */
+  protected def withGlobalTempView(viewNames: String*)(f: => Unit): Unit = {
+    try f finally {
+      // If the test failed part way, we don't want to mask the failure by failing to remove
+      // global temp views that never got created.
+      try viewNames.foreach(spark.catalog.dropGlobalTempView) catch {
         case _: NoSuchTableException =>
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index 959edf9a49371..4286e8a6ca2c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.internal.{SessionState, SessionStateBuilder, SQLConf
 /**
  * A special `SparkSession` prepared for testing.
  */
-private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) { self =>
+private[spark] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) { self =>
   def this(sparkConf: SparkConf) {
     this(new SparkContext("local[2]", "test-sql-context",
       sparkConf.set("spark.sql.testkey", "true")))
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 3135a8a275dae..9f247f9224c75 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
index 3f2de108f069a..6740d3bb59dc3 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
@@ -23,6 +23,7 @@
 
 import org.apache.hadoop.hive.ql.processors.CommandProcessor;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory;
+import org.apache.hadoop.hive.ql.session.OperationLog;
 import org.apache.hive.service.cli.HiveSQLException;
 import org.apache.hive.service.cli.OperationType;
 import org.apache.hive.service.cli.session.HiveSession;
@@ -67,4 +68,16 @@ protected void setConfOverlay(Map<String, String> confOverlay) {
       this.confOverlay = confOverlay;
     }
   }
+
+  protected void registerCurrentOperationLog() {
+    if (isOperationLogEnabled) {
+      if (operationLog == null) {
+        LOG.warn("Failed to get current OperationLog object of Operation: " +
+          getHandle().getHandleIdentifier());
+        isOperationLogEnabled = false;
+        return;
+      }
+      OperationLog.setCurrentOperationLog(operationLog);
+    }
+  }
 }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
index 5014cedd870b6..70c27948de61b 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
@@ -42,7 +42,6 @@
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
-import org.apache.hadoop.hive.ql.session.OperationLog;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.SerDe;
@@ -274,18 +273,6 @@ private Hive getSessionHive() throws HiveSQLException {
     }
   }
 
-  private void registerCurrentOperationLog() {
-    if (isOperationLogEnabled) {
-      if (operationLog == null) {
-        LOG.warn("Failed to get current OperationLog object of Operation: " +
-            getHandle().getHandleIdentifier());
-        isOperationLogEnabled = false;
-        return;
-      }
-      OperationLog.setCurrentOperationLog(operationLog);
-    }
-  }
-
   private void cleanup(OperationState state) throws HiveSQLException {
     setState(state);
     if (shouldRunAsync()) {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index f5191fa9132bd..664bc20601eaa 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -170,6 +170,7 @@ private[hive] class SparkExecuteStatementOperation(
         override def run(): Unit = {
           val doAsAction = new PrivilegedExceptionAction[Unit]() {
             override def run(): Unit = {
+              registerCurrentOperationLog()
               try {
                 execute()
               } catch {
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 497a252b0e930..6efcc4149435a 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 44e680dbd2f93..632e3e0c4c3f9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -668,7 +668,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val schema = restoreTableMetadata(rawTable).schema
 
     // convert table statistics to properties so that we can persist them through hive client
-    var statsProperties =
+    val statsProperties =
       if (stats.isDefined) {
         statsToProperties(stats.get, schema)
       } else {
@@ -1098,14 +1098,14 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val schema = restoreTableMetadata(rawTable).schema
 
     // convert partition statistics to properties so that we can persist them through hive api
-    val withStatsProps = lowerCasedParts.map(p => {
+    val withStatsProps = lowerCasedParts.map { p =>
       if (p.stats.isDefined) {
         val statsProperties = statsToProperties(p.stats.get, schema)
         p.copy(parameters = p.parameters ++ statsProperties)
       } else {
         p
       }
-    })
+    }
 
     // Note: Before altering table partitions in Hive, you *must* set the current database
     // to the one that contains the table of interest. Otherwise you will end up with the
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 92cb4ef11c9e3..dc92ad3b0c1ac 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -42,7 +42,7 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
    * Create a Hive aware resource loader.
    */
   override protected lazy val resourceLoader: HiveSessionResourceLoader = {
-    val client: HiveClient = externalCatalog.client.newSession()
+    val client: HiveClient = externalCatalog.client
     new HiveSessionResourceLoader(session, client)
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
index 9e9894803ce25..11afe1af32809 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
@@ -50,7 +50,7 @@ private[hive] object HiveShim {
   val HIVE_GENERIC_UDF_MACRO_CLS = "org.apache.hadoop.hive.ql.udf.generic.GenericUDFMacro"
 
   /*
-   * This function in hive-0.13 become private, but we have to do this to walkaround hive bug
+   * This function in hive-0.13 become private, but we have to do this to work around hive bug
    */
   private def appendReadColumnNames(conf: Configuration, cols: Seq[String]) {
     val old: String = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index ee1f6ee173063..ab857b9055720 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -34,7 +34,6 @@ import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils}
 import org.apache.spark.sql.execution.datasources.{CreateTable, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetOptions}
 import org.apache.spark.sql.hive.execution._
-import org.apache.spark.sql.hive.orc.OrcFileFormat
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 
 
@@ -88,7 +87,7 @@ class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] {
     }
   }
 
-  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case c @ CreateTable(t, _, query) if DDLUtils.isHiveTable(t) =>
       // Finds the database name if the name does not exist.
       val dbName = t.identifier.database.getOrElse(session.catalog.currentDatabase)
@@ -115,7 +114,7 @@ class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] {
 }
 
 class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case relation: HiveTableRelation
         if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty =>
       val table = relation.tableMeta
@@ -146,10 +145,11 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
  * `PreprocessTableInsertion`.
  */
 object HiveAnalysis extends Rule[LogicalPlan] {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case InsertIntoTable(r: HiveTableRelation, partSpec, query, overwrite, ifPartitionNotExists)
         if DDLUtils.isHiveTable(r.tableMeta) =>
-      InsertIntoHiveTable(r.tableMeta, partSpec, query, overwrite, ifPartitionNotExists)
+      InsertIntoHiveTable(r.tableMeta, partSpec, query, overwrite,
+        ifPartitionNotExists, query.output)
 
     case CreateTable(tableDesc, mode, None) if DDLUtils.isHiveTable(tableDesc) =>
       DDLUtils.checkDataColNames(tableDesc)
@@ -164,7 +164,7 @@ object HiveAnalysis extends Rule[LogicalPlan] {
       val outputPath = new Path(storage.locationUri.get)
       if (overwrite) DDLUtils.verifyNotReadPath(child, outputPath)
 
-      InsertIntoHiveDirCommand(isLocal, storage, child, overwrite)
+      InsertIntoHiveDirCommand(isLocal, storage, child, overwrite, child.output)
   }
 }
 
@@ -195,8 +195,19 @@ case class RelationConversions(
         .convertToLogicalRelation(relation, options, classOf[ParquetFileFormat], "parquet")
     } else {
       val options = relation.tableMeta.storage.properties
-      sessionCatalog.metastoreCatalog
-        .convertToLogicalRelation(relation, options, classOf[OrcFileFormat], "orc")
+      if (conf.getConf(SQLConf.ORC_IMPLEMENTATION) == "native") {
+        sessionCatalog.metastoreCatalog.convertToLogicalRelation(
+          relation,
+          options,
+          classOf[org.apache.spark.sql.execution.datasources.orc.OrcFileFormat],
+          "orc")
+      } else {
+        sessionCatalog.metastoreCatalog.convertToLogicalRelation(
+          relation,
+          options,
+          classOf[org.apache.spark.sql.hive.orc.OrcFileFormat],
+          "orc")
+      }
     }
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index f5e6720f6a510..c7717d70c996f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -47,7 +47,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf._
 import org.apache.spark.sql.internal.StaticSQLConf.{CATALOG_IMPLEMENTATION, WAREHOUSE_PATH}
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ChildFirstURLClassLoader, Utils}
 
 
 private[spark] object HiveUtils extends Logging {
@@ -109,7 +109,7 @@ private[spark] object HiveUtils extends Logging {
     .doc("When set to true, the built-in ORC reader and writer are used to process " +
       "ORC tables created by using the HiveQL syntax, instead of Hive serde.")
     .booleanConf
-    .createWithDefault(false)
+    .createWithDefault(true)
 
   val HIVE_METASTORE_SHARED_PREFIXES = buildConf("spark.sql.hive.metastore.sharedPrefixes")
     .doc("A comma separated list of class prefixes that should be loaded using the classloader " +
@@ -312,6 +312,8 @@ private[spark] object HiveUtils extends Logging {
       // starting from the given classLoader.
       def allJars(classLoader: ClassLoader): Array[URL] = classLoader match {
         case null => Array.empty[URL]
+        case childFirst: ChildFirstURLClassLoader =>
+          childFirst.getURLs() ++ allJars(Utils.getSparkClassLoader)
         case urlClassLoader: URLClassLoader =>
           urlClassLoader.getURLs ++ allJars(urlClassLoader.getParent)
         case other => allJars(other.getParent)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index b5a5890d47b03..4b923f5235a90 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -186,7 +186,7 @@ private[hive] class HiveClientImpl(
   /** Returns the configuration for the current session. */
   def conf: HiveConf = state.getConf
 
-  private val userName = state.getAuthenticator.getUserName
+  private val userName = conf.getUser
 
   override def getConf(key: String, defaultValue: String): String = {
     conf.get(key, defaultValue)
@@ -330,7 +330,7 @@ private[hive] class HiveClientImpl(
     Option(client.getDatabase(dbName)).map { d =>
       CatalogDatabase(
         name = d.getName,
-        description = d.getDescription,
+        description = Option(d.getDescription).getOrElse(""),
         locationUri = CatalogUtils.stringToURI(d.getLocationUri),
         properties = Option(d.getParameters).map(_.asScala.toMap).orNull)
     }.getOrElse(throw new NoSuchDatabaseException(dbName))
@@ -414,30 +414,6 @@ private[hive] class HiveClientImpl(
       }
       val comment = properties.get("comment")
 
-      // Here we are reading statistics from Hive.
-      // Note that this statistics could be overridden by Spark's statistics if that's available.
-      val totalSize = properties.get(StatsSetupConst.TOTAL_SIZE).map(BigInt(_))
-      val rawDataSize = properties.get(StatsSetupConst.RAW_DATA_SIZE).map(BigInt(_))
-      val rowCount = properties.get(StatsSetupConst.ROW_COUNT).map(BigInt(_)).filter(_ >= 0)
-      // TODO: check if this estimate is valid for tables after partition pruning.
-      // NOTE: getting `totalSize` directly from params is kind of hacky, but this should be
-      // relatively cheap if parameters for the table are populated into the metastore.
-      // Currently, only totalSize, rawDataSize, and rowCount are used to build the field `stats`
-      // TODO: stats should include all the other two fields (`numFiles` and `numPartitions`).
-      // (see StatsSetupConst in Hive)
-      val stats =
-        // When table is external, `totalSize` is always zero, which will influence join strategy
-        // so when `totalSize` is zero, use `rawDataSize` instead. When `rawDataSize` is also zero,
-        // return None. Later, we will use the other ways to estimate the statistics.
-        if (totalSize.isDefined && totalSize.get > 0L) {
-          Some(CatalogStatistics(sizeInBytes = totalSize.get, rowCount = rowCount))
-        } else if (rawDataSize.isDefined && rawDataSize.get > 0) {
-          Some(CatalogStatistics(sizeInBytes = rawDataSize.get, rowCount = rowCount))
-        } else {
-          // TODO: still fill the rowCount even if sizeInBytes is empty. Might break anything?
-          None
-        }
-
       CatalogTable(
         identifier = TableIdentifier(h.getTableName, Option(h.getDbName)),
         tableType = h.getTableType match {
@@ -476,7 +452,7 @@ private[hive] class HiveClientImpl(
         // For EXTERNAL_TABLE, the table properties has a particular field "EXTERNAL". This is added
         // in the function toHiveTable.
         properties = filteredProperties,
-        stats = stats,
+        stats = readHiveStats(properties),
         comment = comment,
         // In older versions of Spark(before 2.2.0), we expand the view original text and store
         // that into `viewExpandedText`, and that should be used in view resolution. So we get
@@ -488,6 +464,7 @@ private[hive] class HiveClientImpl(
   }
 
   override def createTable(table: CatalogTable, ignoreIfExists: Boolean): Unit = withHiveState {
+    verifyColumnDataType(table.dataSchema)
     client.createTable(toHiveTable(table, Some(userName)), ignoreIfExists)
   }
 
@@ -507,6 +484,7 @@ private[hive] class HiveClientImpl(
     // these properties are still available to the others that share the same Hive metastore.
     // If users explicitly alter these Hive-specific properties through ALTER TABLE DDL, we respect
     // these user-specified values.
+    verifyColumnDataType(table.dataSchema)
     val hiveTable = toHiveTable(
       table.copy(properties = table.ignoredProperties ++ table.properties), Some(userName))
     // Do not use `table.qualifiedName` here because this may be a rename
@@ -520,6 +498,7 @@ private[hive] class HiveClientImpl(
       newDataSchema: StructType,
       schemaProps: Map[String, String]): Unit = withHiveState {
     val oldTable = client.getTable(dbName, tableName)
+    verifyColumnDataType(newDataSchema)
     val hiveCols = newDataSchema.map(toHiveColumn)
     oldTable.setFields(hiveCols.asJava)
 
@@ -844,7 +823,8 @@ private[hive] class HiveClientImpl(
   }
 
   def reset(): Unit = withHiveState {
-    client.getAllTables("default").asScala.foreach { t =>
+    try {
+      client.getAllTables("default").asScala.foreach { t =>
         logDebug(s"Deleting table $t")
         val table = client.getTable("default", t)
         client.getIndexes("default", t, 255).asScala.foreach { index =>
@@ -858,6 +838,9 @@ private[hive] class HiveClientImpl(
         logDebug(s"Dropping Database: $db")
         client.dropDatabase(db, true, false, true)
       }
+    } finally {
+      runSqlHive("USE default")
+    }
   }
 }
 
@@ -872,15 +855,19 @@ private[hive] object HiveClientImpl {
     new FieldSchema(c.name, typeString, c.getComment().orNull)
   }
 
-  /** Builds the native StructField from Hive's FieldSchema. */
-  def fromHiveColumn(hc: FieldSchema): StructField = {
-    val columnType = try {
+  /** Get the Spark SQL native DataType from Hive's FieldSchema. */
+  private def getSparkSQLDataType(hc: FieldSchema): DataType = {
+    try {
       CatalystSqlParser.parseDataType(hc.getType)
     } catch {
       case e: ParseException =>
         throw new SparkException("Cannot recognize hive type string: " + hc.getType, e)
     }
+  }
 
+  /** Builds the native StructField from Hive's FieldSchema. */
+  def fromHiveColumn(hc: FieldSchema): StructField = {
+    val columnType = getSparkSQLDataType(hc)
     val metadata = if (hc.getType != columnType.catalogString) {
       new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build()
     } else {
@@ -895,6 +882,10 @@ private[hive] object HiveClientImpl {
     Option(hc.getComment).map(field.withComment).getOrElse(field)
   }
 
+  private def verifyColumnDataType(schema: StructType): Unit = {
+    schema.foreach(col => getSparkSQLDataType(toHiveColumn(col)))
+  }
+
   private def toInputFormat(name: String) =
     Utils.classForName(name).asInstanceOf[Class[_ <: org.apache.hadoop.mapred.InputFormat[_, _]]]
 
@@ -998,6 +989,11 @@ private[hive] object HiveClientImpl {
    */
   def fromHivePartition(hp: HivePartition): CatalogTablePartition = {
     val apiPartition = hp.getTPartition
+    val properties: Map[String, String] = if (hp.getParameters != null) {
+      hp.getParameters.asScala.toMap
+    } else {
+      Map.empty
+    }
     CatalogTablePartition(
       spec = Option(hp.getSpec).map(_.asScala.toMap).getOrElse(Map.empty),
       storage = CatalogStorageFormat(
@@ -1008,8 +1004,37 @@ private[hive] object HiveClientImpl {
         compressed = apiPartition.getSd.isCompressed,
         properties = Option(apiPartition.getSd.getSerdeInfo.getParameters)
           .map(_.asScala.toMap).orNull),
-        parameters =
-          if (hp.getParameters() != null) hp.getParameters().asScala.toMap else Map.empty)
+      parameters = properties,
+      stats = readHiveStats(properties))
+  }
+
+  /**
+   * Reads statistics from Hive.
+   * Note that this statistics could be overridden by Spark's statistics if that's available.
+   */
+  private def readHiveStats(properties: Map[String, String]): Option[CatalogStatistics] = {
+    val totalSize = properties.get(StatsSetupConst.TOTAL_SIZE).map(BigInt(_))
+    val rawDataSize = properties.get(StatsSetupConst.RAW_DATA_SIZE).map(BigInt(_))
+    val rowCount = properties.get(StatsSetupConst.ROW_COUNT).map(BigInt(_))
+    // NOTE: getting `totalSize` directly from params is kind of hacky, but this should be
+    // relatively cheap if parameters for the table are populated into the metastore.
+    // Currently, only totalSize, rawDataSize, and rowCount are used to build the field `stats`
+    // TODO: stats should include all the other two fields (`numFiles` and `numPartitions`).
+    // (see StatsSetupConst in Hive)
+
+    // When table is external, `totalSize` is always zero, which will influence join strategy.
+    // So when `totalSize` is zero, use `rawDataSize` instead. When `rawDataSize` is also zero,
+    // return None.
+    // In Hive, when statistics gathering is disabled, `rawDataSize` and `numRows` is always
+    // zero after INSERT command. So they are used here only if they are larger than zero.
+    if (totalSize.isDefined && totalSize.get > 0L) {
+      Some(CatalogStatistics(sizeInBytes = totalSize.get, rowCount = rowCount.filter(_ > 0)))
+    } else if (rawDataSize.isDefined && rawDataSize.get > 0) {
+      Some(CatalogStatistics(sizeInBytes = rawDataSize.get, rowCount = rowCount.filter(_ > 0)))
+    } else {
+      // TODO: still fill the rowCount even if sizeInBytes is empty. Might break anything?
+      None
+    }
   }
 
   // Below is the key of table properties for storing Hive-generated statistics
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 212e24f082017..49954e82b073d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -249,7 +249,7 @@ private[hive] class IsolatedClientLoader(
   }
 
   /** The isolated client interface to Hive. */
-  private[hive] def createClient(): HiveClient = {
+  private[hive] def createClient(): HiveClient = synchronized {
     if (!isolationOn) {
       return new HiveClientImpl(version, sparkConf, hadoopConf, config, baseClassLoader, this)
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
index 1c6f8dd77fc2c..cebeca0ce9444 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
@@ -27,10 +27,12 @@ import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.apache.hadoop.mapred._
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.{Dataset, Row, SparkSession}
+import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.hive.client.HiveClientImpl
 
 /**
@@ -54,9 +56,10 @@ case class InsertIntoHiveDirCommand(
     isLocal: Boolean,
     storage: CatalogStorageFormat,
     query: LogicalPlan,
-    overwrite: Boolean) extends SaveAsHiveFile {
+    overwrite: Boolean,
+    outputColumns: Seq[Attribute]) extends SaveAsHiveFile {
 
-  override def run(sparkSession: SparkSession): Seq[Row] = {
+  override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
     assert(storage.locationUri.nonEmpty)
 
     val hiveTable = HiveClientImpl.toHiveTable(CatalogTable(
@@ -98,10 +101,11 @@ case class InsertIntoHiveDirCommand(
     try {
       saveAsHiveFile(
         sparkSession = sparkSession,
-        queryExecution = Dataset.ofRows(sparkSession, query).queryExecution,
+        plan = child,
         hadoopConf = hadoopConf,
         fileSinkConf = fileSinkConf,
-        outputLocation = tmpPath.toString)
+        outputLocation = tmpPath.toString,
+        allColumns = outputColumns)
 
       val fs = writeToPath.getFileSystem(hadoopConf)
       if (overwrite && fs.exists(writeToPath)) {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 56e10bc457a00..3ce5b8469d6fc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -17,15 +17,17 @@
 
 package org.apache.spark.sql.hive.execution
 
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.ql.ErrorMsg
 import org.apache.hadoop.hive.ql.plan.TableDesc
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.{AnalysisException, Dataset, Row, SparkSession}
-import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, ExternalCatalog}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.CommandUtils
 import org.apache.spark.sql.hive.HiveShim.{ShimFileSinkDesc => FileSinkDesc}
 import org.apache.spark.sql.hive.client.HiveClientImpl
@@ -66,14 +68,15 @@ case class InsertIntoHiveTable(
     partition: Map[String, Option[String]],
     query: LogicalPlan,
     overwrite: Boolean,
-    ifPartitionNotExists: Boolean) extends SaveAsHiveFile {
+    ifPartitionNotExists: Boolean,
+    outputColumns: Seq[Attribute]) extends SaveAsHiveFile {
 
   /**
    * Inserts all the rows in the table into Hive.  Row objects are properly serialized with the
    * `org.apache.hadoop.hive.serde2.SerDe` and the
    * `org.apache.hadoop.mapred.OutputFormat` provided by the table definition.
    */
-  override def run(sparkSession: SparkSession): Seq[Row] = {
+  override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
     val externalCatalog = sparkSession.sharedState.externalCatalog
     val hadoopConf = sparkSession.sessionState.newHadoopConf()
 
@@ -91,6 +94,35 @@ case class InsertIntoHiveTable(
     )
     val tableLocation = hiveQlTable.getDataLocation
     val tmpLocation = getExternalTmpPath(sparkSession, hadoopConf, tableLocation)
+
+    try {
+      processInsert(sparkSession, externalCatalog, hadoopConf, tableDesc, tmpLocation, child)
+    } finally {
+      // Attempt to delete the staging directory and the inclusive files. If failed, the files are
+      // expected to be dropped at the normal termination of VM since deleteOnExit is used.
+      deleteExternalTmpPath(hadoopConf)
+    }
+
+    // un-cache this table.
+    sparkSession.catalog.uncacheTable(table.identifier.quotedString)
+    sparkSession.sessionState.catalog.refreshTable(table.identifier)
+
+    CommandUtils.updateTableStats(sparkSession, table)
+
+    // It would be nice to just return the childRdd unchanged so insert operations could be chained,
+    // however for now we return an empty list to simplify compatibility checks with hive, which
+    // does not return anything for insert operations.
+    // TODO: implement hive compatibility as rules.
+    Seq.empty[Row]
+  }
+
+  private def processInsert(
+      sparkSession: SparkSession,
+      externalCatalog: ExternalCatalog,
+      hadoopConf: Configuration,
+      tableDesc: TableDesc,
+      tmpLocation: Path,
+      child: SparkPlan): Unit = {
     val fileSinkConf = new FileSinkDesc(tmpLocation.toString, tableDesc, false)
 
     val numDynamicPartitions = partition.values.count(_.isEmpty)
@@ -162,10 +194,11 @@ case class InsertIntoHiveTable(
 
     saveAsHiveFile(
       sparkSession = sparkSession,
-      queryExecution = Dataset.ofRows(sparkSession, query).queryExecution,
+      plan = child,
       hadoopConf = hadoopConf,
       fileSinkConf = fileSinkConf,
       outputLocation = tmpLocation.toString,
+      allColumns = outputColumns,
       partitionAttributes = partitionAttributes)
 
     if (partition.nonEmpty) {
@@ -231,21 +264,5 @@ case class InsertIntoHiveTable(
         overwrite,
         isSrcLocal = false)
     }
-
-    // Attempt to delete the staging directory and the inclusive files. If failed, the files are
-    // expected to be dropped at the normal termination of VM since deleteOnExit is used.
-    deleteExternalTmpPath(hadoopConf)
-
-    // un-cache this table.
-    sparkSession.catalog.uncacheTable(table.identifier.quotedString)
-    sparkSession.sessionState.catalog.refreshTable(table.identifier)
-
-    CommandUtils.updateTableStats(sparkSession, table)
-
-    // It would be nice to just return the childRdd unchanged so insert operations could be chained,
-    // however for now we return an empty list to simplify compatibility checks with hive, which
-    // does not return anything for insert operations.
-    // TODO: implement hive compatibility as rules.
-    Seq.empty[Row]
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
index 63657590e5e79..9a6607f2f2c6c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
@@ -33,7 +33,7 @@ import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.DataWritingCommand
 import org.apache.spark.sql.execution.datasources.FileFormatWriter
 import org.apache.spark.sql.hive.HiveExternalCatalog
@@ -47,10 +47,11 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand {
 
   protected def saveAsHiveFile(
       sparkSession: SparkSession,
-      queryExecution: QueryExecution,
+      plan: SparkPlan,
       hadoopConf: Configuration,
       fileSinkConf: FileSinkDesc,
       outputLocation: String,
+      allColumns: Seq[Attribute],
       customPartitionLocations: Map[TablePartitionSpec, String] = Map.empty,
       partitionAttributes: Seq[Attribute] = Nil): Set[String] = {
 
@@ -75,10 +76,11 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand {
 
     FileFormatWriter.write(
       sparkSession = sparkSession,
-      queryExecution = queryExecution,
+      plan = plan,
       fileFormat = new HiveFileFormat(fileSinkConf),
       committer = committer,
-      outputSpec = FileFormatWriter.OutputSpec(outputLocation, customPartitionLocations),
+      outputSpec =
+        FileFormatWriter.OutputSpec(outputLocation, customPartitionLocations, allColumns),
       hadoopConf = hadoopConf,
       partitionColumns = partitionAttributes,
       bucketSpec = None,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
index d786a610f1535..3328400b214fb 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
@@ -412,7 +412,9 @@ case class HiveScriptIOSchema (
     propsMap = propsMap + (serdeConstants.LIST_COLUMN_TYPES -> columnTypesNames)
 
     val properties = new Properties()
-    properties.putAll(propsMap.asJava)
+    // Can not use properties.putAll(propsMap.asJava) in scala-2.12
+    // See https://github.com/scala/bug/issues/10418
+    propsMap.foreach { case (k, v) => properties.put(k, v) }
     serde.initialize(null, properties)
 
     serde
@@ -424,7 +426,9 @@ case class HiveScriptIOSchema (
     recordReaderClass.map { klass =>
       val instance = Utils.classForName(klass).newInstance().asInstanceOf[RecordReader]
       val props = new Properties()
-      props.putAll(outputSerdeProps.toMap.asJava)
+      // Can not use props.putAll(outputSerdeProps.toMap.asJava) in scala-2.12
+      // See https://github.com/scala/bug/issues/10418
+      outputSerdeProps.toMap.foreach { case (k, v) => props.put(k, v) }
       instance.initialize(inputStream, conf, props)
       instance
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 3b33a9ff082f3..237ed9bc05988 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -59,9 +59,11 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
       sparkSession: SparkSession,
       options: Map[String, String],
       files: Seq[FileStatus]): Option[StructType] = {
+    val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
     OrcFileOperator.readSchema(
       files.map(_.getPath.toString),
-      Some(sparkSession.sessionState.newHadoopConf())
+      Some(sparkSession.sessionState.newHadoopConf()),
+      ignoreCorruptFiles
     )
   }
 
@@ -129,14 +131,18 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
 
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+    val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
 
     (file: PartitionedFile) => {
       val conf = broadcastedHadoopConf.value.value
 
+      val filePath = new Path(new URI(file.filePath))
+
       // SPARK-8501: Empty ORC files always have an empty schema stored in their footer. In this
       // case, `OrcFileOperator.readSchema` returns `None`, and we can't read the underlying file
       // using the given physical schema. Instead, we simply return an empty iterator.
-      val isEmptyFile = OrcFileOperator.readSchema(Seq(file.filePath), Some(conf)).isEmpty
+      val isEmptyFile =
+        OrcFileOperator.readSchema(Seq(filePath.toString), Some(conf), ignoreCorruptFiles).isEmpty
       if (isEmptyFile) {
         Iterator.empty
       } else {
@@ -146,15 +152,12 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
           val job = Job.getInstance(conf)
           FileInputFormat.setInputPaths(job, file.filePath)
 
-          val fileSplit = new FileSplit(
-            new Path(new URI(file.filePath)), file.start, file.length, Array.empty
-          )
+          val fileSplit = new FileSplit(filePath, file.start, file.length, Array.empty)
           // Custom OrcRecordReader is used to get
           // ObjectInspector during recordReader creation itself and can
           // avoid NameNode call in unwrapOrcStructs per file.
           // Specifically would be helpful for partitioned datasets.
-          val orcReader = OrcFile.createReader(
-            new Path(new URI(file.filePath)), OrcFile.readerOptions(conf))
+          val orcReader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf))
           new SparkOrcNewRecordReader(orcReader, conf, fileSplit.getStart, fileSplit.getLength)
         }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
index 5a3fcd7a759c0..80e44ca504356 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.sql.hive.orc
 
+import java.io.IOException
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.ql.io.orc.{OrcFile, Reader}
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
 
+import org.apache.spark.SparkException
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -46,7 +49,10 @@ private[hive] object OrcFileOperator extends Logging {
    *       create the result reader from that file.  If no such file is found, it returns `None`.
    * @todo Needs to consider all files when schema evolution is taken into account.
    */
-  def getFileReader(basePath: String, config: Option[Configuration] = None): Option[Reader] = {
+  def getFileReader(basePath: String,
+      config: Option[Configuration] = None,
+      ignoreCorruptFiles: Boolean = false)
+      : Option[Reader] = {
     def isWithNonEmptySchema(path: Path, reader: Reader): Boolean = {
       reader.getObjectInspector match {
         case oi: StructObjectInspector if oi.getAllStructFieldRefs.size() == 0 =>
@@ -65,16 +71,28 @@ private[hive] object OrcFileOperator extends Logging {
     }
 
     listOrcFiles(basePath, conf).iterator.map { path =>
-      path -> OrcFile.createReader(fs, path)
+      val reader = try {
+        Some(OrcFile.createReader(fs, path))
+      } catch {
+        case e: IOException =>
+          if (ignoreCorruptFiles) {
+            logWarning(s"Skipped the footer in the corrupted file: $path", e)
+            None
+          } else {
+            throw new SparkException(s"Could not read footer for file: $path", e)
+          }
+      }
+      path -> reader
     }.collectFirst {
-      case (path, reader) if isWithNonEmptySchema(path, reader) => reader
+      case (path, Some(reader)) if isWithNonEmptySchema(path, reader) => reader
     }
   }
 
-  def readSchema(paths: Seq[String], conf: Option[Configuration]): Option[StructType] = {
+  def readSchema(paths: Seq[String], conf: Option[Configuration], ignoreCorruptFiles: Boolean)
+      : Option[StructType] = {
     // Take the first file where we can open a valid reader if we can find one.  Otherwise just
     // return None to indicate we can't infer the schema.
-    paths.flatMap(getFileReader(_, conf)).headOption.map { reader =>
+    paths.flatMap(getFileReader(_, conf, ignoreCorruptFiles)).headOption.map { reader =>
       val readerInspector = reader.getObjectInspector.asInstanceOf[StructObjectInspector]
       val schema = readerInspector.getTypeName
       logDebug(s"Reading schema from file $paths, got Hive schema string: $schema")
diff --git a/sql/hive/src/test/resources/data/conf/hive-log4j.properties b/sql/hive/src/test/resources/data/conf/hive-log4j.properties
index 6a042472adb90..83fd03a99bc37 100644
--- a/sql/hive/src/test/resources/data/conf/hive-log4j.properties
+++ b/sql/hive/src/test/resources/data/conf/hive-log4j.properties
@@ -32,7 +32,7 @@ log4j.threshhold=WARN
 log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
 log4j.appender.DRFA.File=${hive.log.dir}/${hive.log.file}
 
-# Rollver at midnight
+# Roll over at midnight
 log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
 
 # 30-day backup
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
index 2e35fdeba464d..0a522b6a11c80 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
@@ -107,4 +107,10 @@ class HiveExternalCatalogSuite extends ExternalCatalogSuite {
       .filter(_.contains("Num Buckets")).head
     assert(bucketString.contains("10"))
   }
+
+  test("SPARK-23001: NullPointerException when running desc database") {
+    val catalog = newBasicCatalog()
+    catalog.createDatabase(newDb("dbWithNullDesc").copy(description = null), ignoreIfExists = false)
+    assert(catalog.getDatabase("dbWithNullDesc").description == "")
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 6859432c406a9..ae4aeb7b4ce4a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -18,9 +18,14 @@
 package org.apache.spark.sql.hive
 
 import java.io.File
-import java.nio.file.Files
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Paths}
 
-import org.apache.spark.TestUtils
+import scala.sys.process._
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.{SecurityManager, SparkConf, TestUtils}
 import org.apache.spark.sql.{QueryTest, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
@@ -50,14 +55,29 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
     super.afterAll()
   }
 
-  private def downloadSpark(version: String): Unit = {
-    import scala.sys.process._
+  private def tryDownloadSpark(version: String, path: String): Unit = {
+    // Try mirrors a few times until one succeeds
+    for (i <- 0 until 3) {
+      // we don't retry on a failure to get mirror url. If we can't get a mirror url,
+      // the test fails (getStringFromUrl will throw an exception)
+      val preferredMirror =
+        getStringFromUrl("https://www.apache.org/dyn/closer.lua?preferred=true")
+      val filename = s"spark-$version-bin-hadoop2.7.tgz"
+      val url = s"$preferredMirror/spark/spark-$version/$filename"
+      logInfo(s"Downloading Spark $version from $url")
+      try {
+        getFileFromUrl(url, path, filename)
+        return
+      } catch {
+        case ex: Exception => logWarning(s"Failed to download Spark $version from $url", ex)
+      }
+    }
+    fail(s"Unable to download Spark $version")
+  }
 
-    val preferredMirror =
-      Seq("wget", "https://www.apache.org/dyn/closer.lua?preferred=true", "-q", "-O", "-").!!.trim
-    val url = s"$preferredMirror/spark/spark-$version/spark-$version-bin-hadoop2.7.tgz"
 
-    Seq("wget", url, "-q", "-P", sparkTestingDir.getCanonicalPath).!
+  private def downloadSpark(version: String): Unit = {
+    tryDownloadSpark(version, sparkTestingDir.getCanonicalPath)
 
     val downloaded = new File(sparkTestingDir, s"spark-$version-bin-hadoop2.7.tgz").getCanonicalPath
     val targetDir = new File(sparkTestingDir, s"spark-$version").getCanonicalPath
@@ -73,6 +93,34 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
     new File(tmpDataDir, name).getCanonicalPath
   }
 
+  private def getFileFromUrl(urlString: String, targetDir: String, filename: String): Unit = {
+    val conf = new SparkConf
+    // if the caller passes the name of an existing file, we want doFetchFile to write over it with
+    // the contents from the specified url.
+    conf.set("spark.files.overwrite", "true")
+    val securityManager = new SecurityManager(conf)
+    val hadoopConf = new Configuration
+
+    val outDir = new File(targetDir)
+    if (!outDir.exists()) {
+      outDir.mkdirs()
+    }
+
+    // propagate exceptions up to the caller of getFileFromUrl
+    Utils.doFetchFile(urlString, outDir, filename, conf, securityManager, hadoopConf)
+  }
+
+  private def getStringFromUrl(urlString: String): String = {
+    val contentFile = File.createTempFile("string-", ".txt")
+    contentFile.deleteOnExit()
+
+    // exceptions will propagate to the caller of getStringFromUrl
+    getFileFromUrl(urlString, contentFile.getParent, contentFile.getName)
+
+    val contentPath = Paths.get(contentFile.toURI)
+    new String(Files.readAllBytes(contentPath), StandardCharsets.UTF_8)
+  }
+
   override def beforeAll(): Unit = {
     super.beforeAll()
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index 18137e7ea1d63..ba9b944e4a055 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -146,6 +146,12 @@ class DataSourceWithHiveMetastoreCatalogSuite
     'id cast StringType as 'd2
   ).coalesce(1)
 
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    sparkSession.sessionState.catalog.reset()
+    sparkSession.metadataHive.reset()
+  }
+
   Seq(
     "parquet" -> ((
       "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
index fdbfcf1a68440..8697d47e89e89 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
@@ -17,11 +17,16 @@
 
 package org.apache.spark.sql.hive
 
+import java.net.URL
+
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader}
 
 class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
 
@@ -42,4 +47,19 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton
       assert(hiveConf("foo") === "bar")
     }
   }
+
+  test("ChildFirstURLClassLoader's parent is null, get spark classloader instead") {
+    val conf = new SparkConf
+    val contextClassLoader = Thread.currentThread().getContextClassLoader
+    val loader = new ChildFirstURLClassLoader(Array(), contextClassLoader)
+    try {
+      Thread.currentThread().setContextClassLoader(loader)
+      HiveUtils.newClientForMetadata(
+        conf,
+        SparkHadoopUtil.newConfiguration(conf),
+        HiveUtils.newTemporaryConfiguration(useInMemoryDerby = true))
+    } finally {
+      Thread.currentThread().setContextClassLoader(contextClassLoader)
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index a1060476f2211..c8caba83bf365 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -1350,7 +1350,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       withTempDir { dir =>
         val tmpFile = s"$dir/$nameWithSpecialChars"
         spark.createDataset(Seq("a", "b")).write.format(format).save(tmpFile)
-        spark.read.format(format).load(tmpFile)
+        val fileContent = spark.read.format(format).load(tmpFile)
+        checkAnswer(fileContent, Seq(Row("a"), Row("b")))
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 0cdd9305c6b6f..3af8af0814bb4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -213,6 +213,27 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     }
   }
 
+  test("SPARK-22745 - read Hive's statistics for partition") {
+    val tableName = "hive_stats_part_table"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING)")
+      sql(s"INSERT INTO TABLE $tableName PARTITION (ds='2017-01-01') SELECT * FROM src")
+      var partition = spark.sessionState.catalog
+        .getPartition(TableIdentifier(tableName), Map("ds" -> "2017-01-01"))
+
+      assert(partition.stats.get.sizeInBytes == 5812)
+      assert(partition.stats.get.rowCount.isEmpty)
+
+      hiveClient
+        .runSqlHive(s"ANALYZE TABLE $tableName PARTITION (ds='2017-01-01') COMPUTE STATISTICS")
+      partition = spark.sessionState.catalog
+        .getPartition(TableIdentifier(tableName), Map("ds" -> "2017-01-01"))
+
+      assert(partition.stats.get.sizeInBytes == 5812)
+      assert(partition.stats.get.rowCount == Some(500))
+    }
+  }
+
   test("SPARK-21079 - analyze table with location different than that of individual partitions") {
     val tableName = "analyzeTable_part"
     withTable(tableName) {
@@ -353,15 +374,6 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       createPartition("2010-01-02", 11,
         "SELECT '1', 'A' from src UNION ALL SELECT '1', 'A' from src")
 
-      sql(s"ANALYZE TABLE $tableName PARTITION (ds='2010-01-01') COMPUTE STATISTICS NOSCAN")
-
-      assertPartitionStats("2010-01-01", "10", rowCount = None, sizeInBytes = 2000)
-      assertPartitionStats("2010-01-01", "11", rowCount = None, sizeInBytes = 2000)
-      assert(queryStats("2010-01-02", "10") === None)
-      assert(queryStats("2010-01-02", "11") === None)
-
-      sql(s"ANALYZE TABLE $tableName PARTITION (ds='2010-01-02') COMPUTE STATISTICS NOSCAN")
-
       assertPartitionStats("2010-01-01", "10", rowCount = None, sizeInBytes = 2000)
       assertPartitionStats("2010-01-01", "11", rowCount = None, sizeInBytes = 2000)
       assertPartitionStats("2010-01-02", "10", rowCount = None, sizeInBytes = 2000)
@@ -631,7 +643,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
        """.stripMargin)
     sql(s"INSERT INTO TABLE $tabName SELECT * FROM src")
     if (analyzedBySpark) sql(s"ANALYZE TABLE $tabName COMPUTE STATISTICS")
-    // This is to mimic the scenario in which Hive genrates statistics before we reading it
+    // This is to mimic the scenario in which Hive generates statistics before we read it
     if (analyzedByHive) hiveClient.runSqlHive(s"ANALYZE TABLE $tabName COMPUTE STATISTICS")
     val describeResult1 = hiveClient.runSqlHive(s"DESCRIBE FORMATTED $tabName")
 
@@ -1360,4 +1372,22 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     }
 
   }
+
+  test("Deals with wrong Hive's statistics (zero rowCount)") {
+    withTable("maybe_big") {
+      sql("CREATE TABLE maybe_big (c1 bigint)" +
+        "TBLPROPERTIES ('numRows'='0', 'rawDataSize'='60000000000', 'totalSize'='8000000000000')")
+
+      val catalogTable = getCatalogTable("maybe_big")
+
+      val properties = catalogTable.ignoredProperties
+      assert(properties("totalSize").toLong > 0)
+      assert(properties("rawDataSize").toLong > 0)
+      assert(properties("numRows").toLong == 0)
+
+      val catalogStats = catalogTable.stats.get
+      assert(catalogStats.sizeInBytes > 0)
+      assert(catalogStats.rowCount.isEmpty)
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 9d15dabc8d3f5..e64389e56b5a1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -163,6 +163,15 @@ class VersionsSuite extends SparkFunSuite with Logging {
       client.createDatabase(tempDB, ignoreIfExists = true)
     }
 
+    test(s"$version: createDatabase with null description") {
+      withTempDir { tmpDir =>
+        val dbWithNullDesc =
+          CatalogDatabase("dbWithNullDesc", description = null, tmpDir.toURI, Map())
+        client.createDatabase(dbWithNullDesc, ignoreIfExists = true)
+        assert(client.getDatabase("dbWithNullDesc").description == "")
+      }
+    }
+
     test(s"$version: setCurrentDatabase") {
       client.setCurrentDatabase("default")
     }
@@ -773,7 +782,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
            """.stripMargin
           )
 
-          val errorMsg = "data type mismatch: cannot cast DecimalType(2,1) to BinaryType"
+          val errorMsg = "data type mismatch: cannot cast decimal(2,1) to binary"
 
           if (isPartitioned) {
             val insertStmt = s"INSERT OVERWRITE TABLE $tableName partition (ds='a') SELECT 1.3"
@@ -802,7 +811,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
 
     test(s"$version: read avro file containing decimal") {
       val url = Thread.currentThread().getContextClassLoader.getResource("avroDecimal")
-      val location = new File(url.getFile)
+      val location = new File(url.getFile).toURI.toString
 
       val tableName = "tab1"
       val avroSchema =
@@ -842,6 +851,8 @@ class VersionsSuite extends SparkFunSuite with Logging {
     }
 
     test(s"$version: SPARK-17920: Insert into/overwrite avro table") {
+      // skipped because it's failed in the condition on Windows
+      assume(!(Utils.isWindows && version == "0.12"))
       withTempDir { dir =>
         val avroSchema =
           """
@@ -866,10 +877,10 @@ class VersionsSuite extends SparkFunSuite with Logging {
         val writer = new PrintWriter(schemaFile)
         writer.write(avroSchema)
         writer.close()
-        val schemaPath = schemaFile.getCanonicalPath
+        val schemaPath = schemaFile.toURI.toString
 
         val url = Thread.currentThread().getContextClassLoader.getResource("avroDecimal")
-        val srcLocation = new File(url.getFile).getCanonicalPath
+        val srcLocation = new File(url.getFile).toURI.toString
         val destTableName = "tab1"
         val srcTableName = "tab2"
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index d3465a641a1a4..65be244418670 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -174,6 +174,88 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
   test("alter datasource table add columns - partitioned - orc") {
     testAddColumnPartitioned("orc")
   }
+
+  test("SPARK-22431: illegal nested type") {
+    val queries = Seq(
+      "CREATE TABLE t AS SELECT STRUCT('a' AS `$a`, 1 AS b) q",
+      "CREATE TABLE t(q STRUCT<`$a`:INT, col2:STRING>, i1 INT)",
+      "CREATE VIEW t AS SELECT STRUCT('a' AS `$a`, 1 AS b) q")
+
+    queries.foreach(query => {
+      val err = intercept[SparkException] {
+        spark.sql(query)
+      }.getMessage
+      assert(err.contains("Cannot recognize hive type string"))
+    })
+
+    withView("v") {
+      spark.sql("CREATE VIEW v AS SELECT STRUCT('a' AS `a`, 1 AS b) q")
+      checkAnswer(sql("SELECT q.`a`, q.b FROM v"), Row("a", 1) :: Nil)
+
+      val err = intercept[SparkException] {
+        spark.sql("ALTER VIEW v AS SELECT STRUCT('a' AS `$a`, 1 AS b) q")
+      }.getMessage
+      assert(err.contains("Cannot recognize hive type string"))
+    }
+  }
+
+  test("SPARK-22431: table with nested type") {
+    withTable("t", "x") {
+      spark.sql("CREATE TABLE t(q STRUCT<`$a`:INT, col2:STRING>, i1 INT) USING PARQUET")
+      checkAnswer(spark.table("t"), Nil)
+      spark.sql("CREATE TABLE x (q STRUCT<col1:INT, col2:STRING>, i1 INT)")
+      checkAnswer(spark.table("x"), Nil)
+    }
+  }
+
+  test("SPARK-22431: view with nested type") {
+    withView("v") {
+      spark.sql("CREATE VIEW v AS SELECT STRUCT('a' AS `a`, 1 AS b) q")
+      checkAnswer(spark.table("v"), Row(Row("a", 1)) :: Nil)
+
+      spark.sql("ALTER VIEW v AS SELECT STRUCT('a' AS `b`, 1 AS b) q1")
+      val df = spark.table("v")
+      assert("q1".equals(df.schema.fields(0).name))
+      checkAnswer(df, Row(Row("a", 1)) :: Nil)
+    }
+  }
+
+  test("SPARK-22431: alter table tests with nested types") {
+    withTable("t1", "t2", "t3") {
+      spark.sql("CREATE TABLE t1 (q STRUCT<col1:INT, col2:STRING>, i1 INT)")
+      spark.sql("ALTER TABLE t1 ADD COLUMNS (newcol1 STRUCT<`col1`:STRING, col2:Int>)")
+      val newcol = spark.sql("SELECT * FROM t1").schema.fields(2).name
+      assert("newcol1".equals(newcol))
+
+      spark.sql("CREATE TABLE t2(q STRUCT<`a`:INT, col2:STRING>, i1 INT) USING PARQUET")
+      spark.sql("ALTER TABLE t2 ADD COLUMNS (newcol1 STRUCT<`$col1`:STRING, col2:Int>)")
+      spark.sql("ALTER TABLE t2 ADD COLUMNS (newcol2 STRUCT<`col1`:STRING, col2:Int>)")
+
+      val df2 = spark.table("t2")
+      checkAnswer(df2, Nil)
+      assert("newcol1".equals(df2.schema.fields(2).name))
+      assert("newcol2".equals(df2.schema.fields(3).name))
+
+      spark.sql("CREATE TABLE t3(q STRUCT<`$a`:INT, col2:STRING>, i1 INT) USING PARQUET")
+      spark.sql("ALTER TABLE t3 ADD COLUMNS (newcol1 STRUCT<`$col1`:STRING, col2:Int>)")
+      spark.sql("ALTER TABLE t3 ADD COLUMNS (newcol2 STRUCT<`col1`:STRING, col2:Int>)")
+
+      val df3 = spark.table("t3")
+      checkAnswer(df3, Nil)
+      assert("newcol1".equals(df3.schema.fields(2).name))
+      assert("newcol2".equals(df3.schema.fields(3).name))
+    }
+  }
+
+  test("SPARK-22431: negative alter table tests with nested types") {
+    withTable("t1") {
+      spark.sql("CREATE TABLE t1 (q STRUCT<col1:INT, col2:STRING>, i1 INT)")
+      val err = intercept[SparkException] {
+        spark.sql("ALTER TABLE t1 ADD COLUMNS (newcol1 STRUCT<`$col1`:STRING, col2:Int>)")
+      }.getMessage
+      assert(err.contains("Cannot recognize hive type string:"))
+   }
+  }
 }
 
 class HiveDDLSuite
@@ -793,12 +875,13 @@ class HiveDDLSuite
 
   test("desc table for Hive table - bucketed + sorted table") {
     withTable("tbl") {
-      sql(s"""
-        CREATE TABLE tbl (id int, name string)
-        PARTITIONED BY (ds string)
-        CLUSTERED BY(id)
-        SORTED BY(id, name) INTO 1024 BUCKETS
-        """)
+      sql(
+        s"""
+          |CREATE TABLE tbl (id int, name string)
+          |CLUSTERED BY(id)
+          |SORTED BY(id, name) INTO 1024 BUCKETS
+          |PARTITIONED BY (ds string)
+        """.stripMargin)
 
       val x = sql("DESC FORMATTED tbl").collect()
       assert(x.containsSlice(
@@ -2059,4 +2142,26 @@ class HiveDDLSuite
       }
     }
   }
+
+  test("load command for non local invalid path validation") {
+    withTable("tbl") {
+      sql("CREATE TABLE tbl(i INT, j STRING)")
+      val e = intercept[AnalysisException](
+        sql("load data inpath '/doesnotexist.csv' into table tbl"))
+      assert(e.message.contains("LOAD DATA input path does not exist"))
+    }
+  }
+
+  test("SPARK-22252: FileFormatWriter should respect the input query schema in HIVE") {
+    withTable("t1", "t2", "t3", "t4") {
+      spark.range(1).select('id as 'col1, 'id as 'col2).write.saveAsTable("t1")
+      spark.sql("select COL1, COL2 from t1").write.format("hive").saveAsTable("t2")
+      checkAnswer(spark.table("t2"), Row(0, 0))
+
+      // Test picking part of the columns when writing.
+      spark.range(1).select('id, 'id as 'col1, 'id as 'col2).write.saveAsTable("t3")
+      spark.sql("select COL1, COL2 from t3").write.format("hive").saveAsTable("t4")
+      checkAnswer(spark.table("t4"), Row(0, 0))
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index dfabf1ec2a22a..a4273de5fe260 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -171,4 +171,21 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       sql("EXPLAIN EXTENDED CODEGEN SELECT 1")
     }
   }
+
+  test("SPARK-23021 AnalysisBarrier should not cut off explain output for parsed logical plans") {
+    val df = Seq((1, 1)).toDF("a", "b").groupBy("a").count().limit(1)
+    val outputStream = new java.io.ByteArrayOutputStream()
+    Console.withOut(outputStream) {
+      df.explain(true)
+    }
+    assert(outputStream.toString.replaceAll("""#\d+""", "#0").contains(
+      s"""== Parsed Logical Plan ==
+         |GlobalLimit 1
+         |+- LocalLimit 1
+         |   +- AnalysisBarrier
+         |         +- Aggregate [a#0], [a#0, count(1) AS count#0L]
+         |            +- Project [_1#0 AS a#0, _2#0 AS b#0]
+         |               +- LocalRelation [_1#0, _2#0]
+         |""".stripMargin))
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 3457cbd20277e..e7fd04bec0142 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -461,51 +461,55 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   }
 
   test("CTAS without serde without location") {
-    val originalConf = sessionState.conf.convertCTAS
-
-    setConf(SQLConf.CONVERT_CTAS, true)
-
-    val defaultDataSource = sessionState.conf.defaultDataSourceName
-    try {
-      sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
-      sql("CREATE TABLE IF NOT EXISTS ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
-      val message = intercept[AnalysisException] {
+    withSQLConf(SQLConf.CONVERT_CTAS.key -> "true") {
+      val defaultDataSource = sessionState.conf.defaultDataSourceName
+      withTable("ctas1") {
         sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
-      }.getMessage
-      assert(message.contains("already exists"))
-      checkRelation("ctas1", true, defaultDataSource)
-      sql("DROP TABLE ctas1")
+        sql("CREATE TABLE IF NOT EXISTS ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
+        val message = intercept[AnalysisException] {
+          sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
+        }.getMessage
+        assert(message.contains("already exists"))
+        checkRelation("ctas1", isDataSourceTable = true, defaultDataSource)
+      }
 
       // Specifying database name for query can be converted to data source write path
       // is not allowed right now.
-      sql("CREATE TABLE default.ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
-      checkRelation("ctas1", true, defaultDataSource)
-      sql("DROP TABLE ctas1")
+      withTable("ctas1") {
+        sql("CREATE TABLE default.ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
+        checkRelation("ctas1", isDataSourceTable = true, defaultDataSource)
+      }
 
-      sql("CREATE TABLE ctas1 stored as textfile" +
+      withTable("ctas1") {
+        sql("CREATE TABLE ctas1 stored as textfile" +
           " AS SELECT key k, value FROM src ORDER BY k, value")
-      checkRelation("ctas1", false, "text")
-      sql("DROP TABLE ctas1")
+        checkRelation("ctas1", isDataSourceTable = false, "text")
+      }
 
-      sql("CREATE TABLE ctas1 stored as sequencefile" +
-            " AS SELECT key k, value FROM src ORDER BY k, value")
-      checkRelation("ctas1", false, "sequence")
-      sql("DROP TABLE ctas1")
+      withTable("ctas1") {
+        sql("CREATE TABLE ctas1 stored as sequencefile" +
+          " AS SELECT key k, value FROM src ORDER BY k, value")
+        checkRelation("ctas1", isDataSourceTable = false, "sequence")
+      }
 
-      sql("CREATE TABLE ctas1 stored as rcfile AS SELECT key k, value FROM src ORDER BY k, value")
-      checkRelation("ctas1", false, "rcfile")
-      sql("DROP TABLE ctas1")
+      withTable("ctas1") {
+        sql("CREATE TABLE ctas1 stored as rcfile AS SELECT key k, value FROM src ORDER BY k, value")
+        checkRelation("ctas1", isDataSourceTable = false, "rcfile")
+      }
 
-      sql("CREATE TABLE ctas1 stored as orc AS SELECT key k, value FROM src ORDER BY k, value")
-      checkRelation("ctas1", false, "orc")
-      sql("DROP TABLE ctas1")
+      withTable("ctas1") {
+        sql("CREATE TABLE ctas1 stored as orc AS SELECT key k, value FROM src ORDER BY k, value")
+        checkRelation("ctas1", isDataSourceTable = false, "orc")
+      }
 
-      sql("CREATE TABLE ctas1 stored as parquet AS SELECT key k, value FROM src ORDER BY k, value")
-      checkRelation("ctas1", false, "parquet")
-      sql("DROP TABLE ctas1")
-    } finally {
-      setConf(SQLConf.CONVERT_CTAS, originalConf)
-      sql("DROP TABLE IF EXISTS ctas1")
+      withTable("ctas1") {
+        sql(
+          """
+            |CREATE TABLE ctas1 stored as parquet
+            |AS SELECT key k, value FROM src ORDER BY k, value
+          """.stripMargin)
+        checkRelation("ctas1", isDataSourceTable = false, "parquet")
+      }
     }
   }
 
@@ -539,30 +543,40 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         val defaultDataSource = sessionState.conf.defaultDataSourceName
 
         val tempLocation = dir.toURI.getPath.stripSuffix("/")
-        sql(s"CREATE TABLE ctas1 LOCATION 'file:$tempLocation/c1'" +
-          " AS SELECT key k, value FROM src ORDER BY k, value")
-        checkRelation("ctas1", true, defaultDataSource, Some(s"file:$tempLocation/c1"))
-        sql("DROP TABLE ctas1")
+        withTable("ctas1") {
+          sql(s"CREATE TABLE ctas1 LOCATION 'file:$tempLocation/c1'" +
+            " AS SELECT key k, value FROM src ORDER BY k, value")
+          checkRelation(
+            "ctas1", isDataSourceTable = true, defaultDataSource, Some(s"file:$tempLocation/c1"))
+        }
 
-        sql(s"CREATE TABLE ctas1 LOCATION 'file:$tempLocation/c2'" +
-          " AS SELECT key k, value FROM src ORDER BY k, value")
-        checkRelation("ctas1", true, defaultDataSource, Some(s"file:$tempLocation/c2"))
-        sql("DROP TABLE ctas1")
+        withTable("ctas1") {
+          sql(s"CREATE TABLE ctas1 LOCATION 'file:$tempLocation/c2'" +
+            " AS SELECT key k, value FROM src ORDER BY k, value")
+          checkRelation(
+            "ctas1", isDataSourceTable = true, defaultDataSource, Some(s"file:$tempLocation/c2"))
+        }
 
-        sql(s"CREATE TABLE ctas1 stored as textfile LOCATION 'file:$tempLocation/c3'" +
-          " AS SELECT key k, value FROM src ORDER BY k, value")
-        checkRelation("ctas1", false, "text", Some(s"file:$tempLocation/c3"))
-        sql("DROP TABLE ctas1")
+        withTable("ctas1") {
+          sql(s"CREATE TABLE ctas1 stored as textfile LOCATION 'file:$tempLocation/c3'" +
+            " AS SELECT key k, value FROM src ORDER BY k, value")
+          checkRelation(
+            "ctas1", isDataSourceTable = false, "text", Some(s"file:$tempLocation/c3"))
+        }
 
-        sql(s"CREATE TABLE ctas1 stored as sequenceFile LOCATION 'file:$tempLocation/c4'" +
-          " AS SELECT key k, value FROM src ORDER BY k, value")
-        checkRelation("ctas1", false, "sequence", Some(s"file:$tempLocation/c4"))
-        sql("DROP TABLE ctas1")
+        withTable("ctas1") {
+          sql(s"CREATE TABLE ctas1 stored as sequenceFile LOCATION 'file:$tempLocation/c4'" +
+            " AS SELECT key k, value FROM src ORDER BY k, value")
+          checkRelation(
+            "ctas1", isDataSourceTable = false, "sequence", Some(s"file:$tempLocation/c4"))
+        }
 
-        sql(s"CREATE TABLE ctas1 stored as rcfile LOCATION 'file:$tempLocation/c5'" +
-          " AS SELECT key k, value FROM src ORDER BY k, value")
-        checkRelation("ctas1", false, "rcfile", Some(s"file:$tempLocation/c5"))
-        sql("DROP TABLE ctas1")
+        withTable("ctas1") {
+          sql(s"CREATE TABLE ctas1 stored as rcfile LOCATION 'file:$tempLocation/c5'" +
+            " AS SELECT key k, value FROM src ORDER BY k, value")
+          checkRelation(
+            "ctas1", isDataSourceTable = false, "rcfile", Some(s"file:$tempLocation/c5"))
+        }
       }
     }
   }
@@ -1562,7 +1576,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   }
 
   test("multi-insert with lateral view") {
-    withTempView("t1") {
+    withTempView("source") {
       spark.range(10)
         .select(array($"id", $"id" + 1).as("arr"), $"id")
         .createOrReplaceTempView("source")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
new file mode 100644
index 0000000000000..283037caf4a9b
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
@@ -0,0 +1,387 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import java.nio.charset.StandardCharsets
+import java.sql.{Date, Timestamp}
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument}
+
+import org.apache.spark.sql.{Column, DataFrame}
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.orc.OrcTest
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.types._
+
+/**
+ * A test suite that tests Hive ORC filter API based filter pushdown optimization.
+ */
+class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
+
+  override val orcImp: String = "hive"
+
+  private def checkFilterPredicate(
+      df: DataFrame,
+      predicate: Predicate,
+      checker: (SearchArgument) => Unit): Unit = {
+    val output = predicate.collect { case a: Attribute => a }.distinct
+    val query = df
+      .select(output.map(e => Column(e)): _*)
+      .where(Column(predicate))
+
+    var maybeRelation: Option[HadoopFsRelation] = None
+    val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
+      case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) =>
+        maybeRelation = Some(orcRelation)
+        filters
+    }.flatten.reduceLeftOption(_ && _)
+    assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query")
+
+    val (_, selectedFilters, _) =
+      DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
+    assert(selectedFilters.nonEmpty, "No filter is pushed down")
+
+    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters.toArray)
+    assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $selectedFilters")
+    checker(maybeFilter.get)
+  }
+
+  private def checkFilterPredicate
+      (predicate: Predicate, filterOperator: PredicateLeaf.Operator)
+      (implicit df: DataFrame): Unit = {
+    def checkComparisonOperator(filter: SearchArgument) = {
+      val operator = filter.getLeaves.asScala
+      assert(operator.map(_.getOperator).contains(filterOperator))
+    }
+    checkFilterPredicate(df, predicate, checkComparisonOperator)
+  }
+
+  private def checkFilterPredicate
+      (predicate: Predicate, stringExpr: String)
+      (implicit df: DataFrame): Unit = {
+    def checkLogicalOperator(filter: SearchArgument) = {
+      assert(filter.toString == stringExpr)
+    }
+    checkFilterPredicate(df, predicate, checkLogicalOperator)
+  }
+
+  private def checkNoFilterPredicate
+      (predicate: Predicate)
+      (implicit df: DataFrame): Unit = {
+    val output = predicate.collect { case a: Attribute => a }.distinct
+    val query = df
+      .select(output.map(e => Column(e)): _*)
+      .where(Column(predicate))
+
+    var maybeRelation: Option[HadoopFsRelation] = None
+    val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
+      case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) =>
+        maybeRelation = Some(orcRelation)
+        filters
+    }.flatten.reduceLeftOption(_ && _)
+    assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query")
+
+    val (_, selectedFilters, _) =
+      DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
+    assert(selectedFilters.nonEmpty, "No filter is pushed down")
+
+    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters.toArray)
+    assert(maybeFilter.isEmpty, s"Could generate filter predicate for $selectedFilters")
+  }
+
+  test("filter pushdown - integer") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - long") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - float") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - double") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - string") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === "1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < "2", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= "4", PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal("1") === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal("1") <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal("2") > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal("3") < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal("1") >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal("4") <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - boolean") {
+    withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === true, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < true, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= false, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(false) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(false) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(false) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(true) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(true) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(true) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - decimal") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(1)) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(1)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(2)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(3)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(1)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(
+        Literal(BigDecimal.valueOf(4)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - timestamp") {
+    val timeString = "2015-08-20 14:57:00"
+    val timestamps = (1 to 4).map { i =>
+      val milliseconds = Timestamp.valueOf(timeString).getTime + i * 3600
+      new Timestamp(milliseconds)
+    }
+    withOrcDataFrame(timestamps.map(Tuple1(_))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === timestamps(0), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(timestamps(0)) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(timestamps(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(timestamps(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(timestamps(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(timestamps(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(timestamps(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
+  test("filter pushdown - combinations with logical operators") {
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
+      // Because `ExpressionTree` is not accessible at Hive 1.2.x, this should be checked
+      // in string form in order to check filter creation including logical operators
+      // such as `and`, `or` or `not`. So, this function uses `SearchArgument.toString()`
+      // to produce string expression and then compare it to given string expression below.
+      // This might have to be changed after Hive version is upgraded.
+      checkFilterPredicate(
+        '_1.isNotNull,
+        """leaf-0 = (IS_NULL _1)
+          |expr = (not leaf-0)""".stripMargin.trim
+      )
+      checkFilterPredicate(
+        '_1 =!= 1,
+        """leaf-0 = (IS_NULL _1)
+          |leaf-1 = (EQUALS _1 1)
+          |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
+      )
+      checkFilterPredicate(
+        !('_1 < 4),
+        """leaf-0 = (IS_NULL _1)
+          |leaf-1 = (LESS_THAN _1 4)
+          |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
+      )
+      checkFilterPredicate(
+        '_1 < 2 || '_1 > 3,
+        """leaf-0 = (LESS_THAN _1 2)
+          |leaf-1 = (LESS_THAN_EQUALS _1 3)
+          |expr = (or leaf-0 (not leaf-1))""".stripMargin.trim
+      )
+      checkFilterPredicate(
+        '_1 < 2 && '_1 > 3,
+        """leaf-0 = (IS_NULL _1)
+          |leaf-1 = (LESS_THAN _1 2)
+          |leaf-2 = (LESS_THAN_EQUALS _1 3)
+          |expr = (and (not leaf-0) leaf-1 (not leaf-2))""".stripMargin.trim
+      )
+    }
+  }
+
+  test("no filter pushdown - non-supported types") {
+    implicit class IntToBinary(int: Int) {
+      def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8)
+    }
+    // ArrayType
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
+      checkNoFilterPredicate('_1.isNull)
+    }
+    // BinaryType
+    withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
+      checkNoFilterPredicate('_1 <=> 1.b)
+    }
+    // DateType
+    val stringDate = "2015-01-01"
+    withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df =>
+      checkNoFilterPredicate('_1 === Date.valueOf(stringDate))
+    }
+    // MapType
+    withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
+      checkNoFilterPredicate('_1.isNotNull)
+    }
+  }
+
+  test("SPARK-12218 Converting conjunctions into ORC SearchArguments") {
+    import org.apache.spark.sql.sources._
+    // The `LessThan` should be converted while the `StringContains` shouldn't
+    val schema = new StructType(
+      Array(
+        StructField("a", IntegerType, nullable = true),
+        StructField("b", StringType, nullable = true)))
+    assertResult(
+      """leaf-0 = (LESS_THAN a 10)
+        |expr = leaf-0
+      """.stripMargin.trim
+    ) {
+      OrcFilters.createFilter(schema, Array(
+        LessThan("a", 10),
+        StringContains("b", "prefix")
+      )).get.toString
+    }
+
+    // The `LessThan` should be converted while the whole inner `And` shouldn't
+    assertResult(
+      """leaf-0 = (LESS_THAN a 10)
+        |expr = leaf-0
+      """.stripMargin.trim
+    ) {
+      OrcFilters.createFilter(schema, Array(
+        LessThan("a", 10),
+        Not(And(
+          GreaterThan("a", 1),
+          StringContains("b", "prefix")
+        ))
+      )).get.toString
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcPartitionDiscoverySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcPartitionDiscoverySuite.scala
new file mode 100644
index 0000000000000..ab9b492f347c6
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcPartitionDiscoverySuite.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import org.apache.spark.sql.execution.datasources.orc.OrcPartitionDiscoveryTest
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class HiveOrcPartitionDiscoverySuite extends OrcPartitionDiscoveryTest with TestHiveSingleton  {
+  override val orcImp: String = "hive"
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
new file mode 100644
index 0000000000000..92b2f069cacd6
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import java.io.File
+
+import com.google.common.io.Files
+
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.orc.OrcQueryTest
+import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
+
+class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
+  import testImplicits._
+
+  override val orcImp: String = "hive"
+
+  test("SPARK-8501: Avoids discovery schema from empty ORC files") {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      withTable("empty_orc") {
+        withTempView("empty", "single") {
+          spark.sql(
+            s"""CREATE TABLE empty_orc(key INT, value STRING)
+               |STORED AS ORC
+               |LOCATION '${dir.toURI}'
+             """.stripMargin)
+
+          val emptyDF = Seq.empty[(Int, String)].toDF("key", "value").coalesce(1)
+          emptyDF.createOrReplaceTempView("empty")
+
+          // This creates 1 empty ORC file with Hive ORC SerDe.  We are using this trick because
+          // Spark SQL ORC data source always avoids write empty ORC files.
+          spark.sql(
+            s"""INSERT INTO TABLE empty_orc
+               |SELECT key, value FROM empty
+             """.stripMargin)
+
+          val errorMessage = intercept[AnalysisException] {
+            spark.read.orc(path)
+          }.getMessage
+
+          assert(errorMessage.contains("Unable to infer schema for ORC"))
+
+          val singleRowDF = Seq((0, "foo")).toDF("key", "value").coalesce(1)
+          singleRowDF.createOrReplaceTempView("single")
+
+          spark.sql(
+            s"""INSERT INTO TABLE empty_orc
+               |SELECT key, value FROM single
+             """.stripMargin)
+
+          val df = spark.read.orc(path)
+          assert(df.schema === singleRowDF.schema.asNullable)
+          checkAnswer(df, singleRowDF)
+        }
+      }
+    }
+  }
+
+  test("Verify the ORC conversion parameter: CONVERT_METASTORE_ORC") {
+    withTempView("single") {
+      val singleRowDF = Seq((0, "foo")).toDF("key", "value")
+      singleRowDF.createOrReplaceTempView("single")
+
+      Seq("true", "false").foreach { orcConversion =>
+        withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> orcConversion) {
+          withTable("dummy_orc") {
+            withTempPath { dir =>
+              val path = dir.getCanonicalPath
+              spark.sql(
+                s"""
+                   |CREATE TABLE dummy_orc(key INT, value STRING)
+                   |STORED AS ORC
+                   |LOCATION '${dir.toURI}'
+                 """.stripMargin)
+
+              spark.sql(
+                s"""
+                   |INSERT INTO TABLE dummy_orc
+                   |SELECT key, value FROM single
+                 """.stripMargin)
+
+              val df = spark.sql("SELECT * FROM dummy_orc WHERE key=0")
+              checkAnswer(df, singleRowDF)
+
+              val queryExecution = df.queryExecution
+              if (orcConversion == "true") {
+                queryExecution.analyzed.collectFirst {
+                  case _: LogicalRelation => ()
+                }.getOrElse {
+                  fail(s"Expecting the query plan to convert orc to data sources, " +
+                    s"but got:\n$queryExecution")
+                }
+              } else {
+                queryExecution.analyzed.collectFirst {
+                  case _: HiveTableRelation => ()
+                }.getOrElse {
+                  fail(s"Expecting no conversion from orc to data sources, " +
+                    s"but got:\n$queryExecution")
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("converted ORC table supports resolving mixed case field") {
+    withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") {
+      withTable("dummy_orc") {
+        withTempPath { dir =>
+          val df = spark.range(5).selectExpr("id", "id as valueField", "id as partitionValue")
+          df.write
+            .partitionBy("partitionValue")
+            .mode("overwrite")
+            .orc(dir.getAbsolutePath)
+
+          spark.sql(s"""
+            |create external table dummy_orc (id long, valueField long)
+            |partitioned by (partitionValue int)
+            |stored as orc
+            |location "${dir.toURI}"""".stripMargin)
+          spark.sql(s"msck repair table dummy_orc")
+          checkAnswer(spark.sql("select * from dummy_orc"), df)
+        }
+      }
+    }
+  }
+
+  test("SPARK-20728 Make ORCFileFormat configurable between sql/hive and sql/core") {
+    Seq(
+      ("native", classOf[org.apache.spark.sql.execution.datasources.orc.OrcFileFormat]),
+      ("hive", classOf[org.apache.spark.sql.hive.orc.OrcFileFormat])).foreach {
+      case (orcImpl, format) =>
+        withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) {
+          withTable("spark_20728") {
+            sql("CREATE TABLE spark_20728(a INT) USING ORC")
+            val fileFormat = sql("SELECT * FROM spark_20728").queryExecution.analyzed.collectFirst {
+              case l: LogicalRelation =>
+                l.relation.asInstanceOf[HadoopFsRelation].fileFormat.getClass
+            }
+            assert(fileFormat == Some(format))
+          }
+        }
+    }
+  }
+
+  // Since Hive 1.2.1 library code path still has this problem, users may hit this
+  // when spark.sql.hive.convertMetastoreOrc=false. However, after SPARK-22279,
+  // Apache Spark with the default configuration doesn't hit this bug.
+  test("SPARK-22267 Spark SQL incorrectly reads ORC files when column order is different") {
+    Seq("native", "hive").foreach { orcImpl =>
+      withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) {
+        withTempPath { f =>
+          val path = f.getCanonicalPath
+          Seq(1 -> 2).toDF("c1", "c2").write.orc(path)
+          checkAnswer(spark.read.orc(path), Row(1, 2))
+
+          withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0
+            withTable("t") {
+              sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'")
+              checkAnswer(spark.table("t"), Row(2, 1))
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Since Hive 1.2.1 library code path still has this problem, users may hit this
+  // when spark.sql.hive.convertMetastoreOrc=false. However, after SPARK-22279,
+  // Apache Spark with the default configuration doesn't hit this bug.
+  test("SPARK-19809 NullPointerException on zero-size ORC file") {
+    Seq("native", "hive").foreach { orcImpl =>
+      withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) {
+        withTempPath { dir =>
+          withTable("spark_19809") {
+            sql(s"CREATE TABLE spark_19809(a int) STORED AS ORC LOCATION '$dir'")
+            Files.touch(new File(s"${dir.getCanonicalPath}", "zero.orc"))
+
+            withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0
+              checkAnswer(spark.table("spark_19809"), Seq.empty)
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
new file mode 100644
index 0000000000000..d556a030e2186
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import java.io.File
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.execution.datasources.orc.OrcSuite
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.HiveSerDe
+import org.apache.spark.util.Utils
+
+class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
+
+  override val orcImp: String = "hive"
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    sql(
+      s"""CREATE EXTERNAL TABLE normal_orc(
+         |  intField INT,
+         |  stringField STRING
+         |)
+         |STORED AS ORC
+         |LOCATION '${orcTableAsDir.toURI}'
+       """.stripMargin)
+
+    sql(
+      s"""INSERT INTO TABLE normal_orc
+         |SELECT intField, stringField FROM orc_temp_table
+       """.stripMargin)
+
+    spark.sql(
+      s"""CREATE TEMPORARY VIEW normal_orc_source
+         |USING org.apache.spark.sql.hive.orc
+         |OPTIONS (
+         |  PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}'
+         |)
+       """.stripMargin)
+
+    spark.sql(
+      s"""CREATE TEMPORARY VIEW normal_orc_as_source
+         |USING org.apache.spark.sql.hive.orc
+         |OPTIONS (
+         |  PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}'
+         |)
+       """.stripMargin)
+  }
+
+  test("SPARK-22972: hive orc source") {
+    val tableName = "normal_orc_as_source_hive"
+    withTable(tableName) {
+      sql(
+        s"""
+          |CREATE TABLE $tableName
+          |USING org.apache.spark.sql.hive.orc
+          |OPTIONS (
+          |  PATH '${new File(orcTableAsDir.getAbsolutePath).toURI}'
+          |)
+        """.stripMargin)
+
+      val tableMetadata = spark.sessionState.catalog.getTableMetadata(
+        TableIdentifier(tableName))
+      assert(tableMetadata.storage.inputFormat ==
+        Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"))
+      assert(tableMetadata.storage.outputFormat ==
+        Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"))
+      assert(tableMetadata.storage.serde ==
+        Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+      assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.hive.orc")
+        .equals(HiveSerDe.sourceToSerDe("orc")))
+      assert(HiveSerDe.sourceToSerDe("org.apache.spark.sql.orc")
+        .equals(HiveSerDe.sourceToSerDe("orc")))
+    }
+  }
+
+  test("SPARK-19459/SPARK-18220: read char/varchar column written by Hive") {
+    val location = Utils.createTempDir()
+    val uri = location.toURI
+    try {
+      hiveClient.runSqlHive("USE default")
+      hiveClient.runSqlHive(
+        """
+          |CREATE EXTERNAL TABLE hive_orc(
+          |  a STRING,
+          |  b CHAR(10),
+          |  c VARCHAR(10),
+          |  d ARRAY<CHAR(3)>)
+          |STORED AS orc""".stripMargin)
+      // Hive throws an exception if I assign the location in the create table statement.
+      hiveClient.runSqlHive(
+        s"ALTER TABLE hive_orc SET LOCATION '$uri'")
+      hiveClient.runSqlHive(
+        """
+          |INSERT INTO TABLE hive_orc
+          |SELECT 'a', 'b', 'c', ARRAY(CAST('d' AS CHAR(3)))
+          |FROM (SELECT 1) t""".stripMargin)
+
+      // We create a different table in Spark using the same schema which points to
+      // the same location.
+      spark.sql(
+        s"""
+           |CREATE EXTERNAL TABLE spark_orc(
+           |  a STRING,
+           |  b CHAR(10),
+           |  c VARCHAR(10),
+           |  d ARRAY<CHAR(3)>)
+           |STORED AS orc
+           |LOCATION '$uri'""".stripMargin)
+      val result = Row("a", "b         ", "c", Seq("d  "))
+      checkAnswer(spark.table("hive_orc"), result)
+      checkAnswer(spark.table("spark_orc"), result)
+    } finally {
+      hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc")
+      hiveClient.runSqlHive("DROP TABLE IF EXISTS spark_orc")
+      Utils.deleteRecursively(location)
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
index ba0a7605da71c..f87162f94c01a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala
@@ -30,7 +30,8 @@ import org.apache.spark.sql.types._
 class OrcHadoopFsRelationSuite extends HadoopFsRelationTest {
   import testImplicits._
 
-  override val dataSourceName: String = classOf[OrcFileFormat].getCanonicalName
+  override val dataSourceName: String =
+    classOf[org.apache.spark.sql.execution.datasources.orc.OrcFileFormat].getCanonicalName
 
   // ORC does not play well with NullType and UDT.
   override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
@@ -116,3 +117,8 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest {
     }
   }
 }
+
+class HiveOrcHadoopFsRelationSuite extends OrcHadoopFsRelationSuite {
+  override val dataSourceName: String =
+    classOf[org.apache.spark.sql.hive.orc.OrcFileFormat].getCanonicalName
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
new file mode 100644
index 0000000000000..bf6efa7c4c08c
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
@@ -0,0 +1,501 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import java.io.File
+
+import scala.util.{Random, Try}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.util.{Benchmark, Utils}
+
+
+/**
+ * Benchmark to measure ORC read performance.
+ *
+ * This is in `sql/hive` module in order to compare `sql/core` and `sql/hive` ORC data sources.
+ */
+// scalastyle:off line.size.limit
+object OrcReadBenchmark {
+  val conf = new SparkConf()
+  conf.set("orc.compression", "snappy")
+
+  private val spark = SparkSession.builder()
+    .master("local[1]")
+    .appName("OrcReadBenchmark")
+    .config(conf)
+    .getOrCreate()
+
+  // Set default configs. Individual cases will change them if necessary.
+  spark.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true")
+
+  def withTempPath(f: File => Unit): Unit = {
+    val path = Utils.createTempDir()
+    path.delete()
+    try f(path) finally Utils.deleteRecursively(path)
+  }
+
+  def withTempTable(tableNames: String*)(f: => Unit): Unit = {
+    try f finally tableNames.foreach(spark.catalog.dropTempView)
+  }
+
+  def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    val (keys, values) = pairs.unzip
+    val currentValues = keys.map(key => Try(spark.conf.get(key)).toOption)
+    (keys, values).zipped.foreach(spark.conf.set)
+    try f finally {
+      keys.zip(currentValues).foreach {
+        case (key, Some(value)) => spark.conf.set(key, value)
+        case (key, None) => spark.conf.unset(key)
+      }
+    }
+  }
+
+  private val NATIVE_ORC_FORMAT = classOf[org.apache.spark.sql.execution.datasources.orc.OrcFileFormat].getCanonicalName
+  private val HIVE_ORC_FORMAT = classOf[org.apache.spark.sql.hive.orc.OrcFileFormat].getCanonicalName
+
+  private def prepareTable(dir: File, df: DataFrame, partition: Option[String] = None): Unit = {
+    val dirORC = dir.getCanonicalPath
+
+    if (partition.isDefined) {
+      df.write.partitionBy(partition.get).orc(dirORC)
+    } else {
+      df.write.orc(dirORC)
+    }
+
+    spark.read.format(NATIVE_ORC_FORMAT).load(dirORC).createOrReplaceTempView("nativeOrcTable")
+    spark.read.format(HIVE_ORC_FORMAT).load(dirORC).createOrReplaceTempView("hiveOrcTable")
+  }
+
+  def numericScanBenchmark(values: Int, dataType: DataType): Unit = {
+    val benchmark = new Benchmark(s"SQL Single ${dataType.sql} Column Scan", values)
+
+    withTempPath { dir =>
+      withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
+        import spark.implicits._
+        spark.range(values).map(_ => Random.nextLong).createOrReplaceTempView("t1")
+
+        prepareTable(dir, spark.sql(s"SELECT CAST(value as ${dataType.sql}) id FROM t1"))
+
+        benchmark.addCase("Native ORC MR") { _ =>
+          withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
+            spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Native ORC Vectorized") { _ =>
+          spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
+        }
+
+        benchmark.addCase("Native ORC Vectorized with copy") { _ =>
+          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
+            spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(id) FROM hiveOrcTable").collect()
+        }
+
+        /*
+        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
+        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+
+        SQL Single TINYINT Column Scan:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 1135 / 1171         13.9          72.2       1.0X
+        Native ORC Vectorized                          152 /  163        103.4           9.7       7.5X
+        Native ORC Vectorized with copy                149 /  162        105.4           9.5       7.6X
+        Hive built-in ORC                             1380 / 1384         11.4          87.7       0.8X
+
+        SQL Single SMALLINT Column Scan:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 1182 / 1244         13.3          75.2       1.0X
+        Native ORC Vectorized                          145 /  156        108.7           9.2       8.2X
+        Native ORC Vectorized with copy                148 /  158        106.4           9.4       8.0X
+        Hive built-in ORC                             1591 / 1636          9.9         101.2       0.7X
+
+        SQL Single INT Column Scan:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 1271 / 1271         12.4          80.8       1.0X
+        Native ORC Vectorized                          206 /  212         76.3          13.1       6.2X
+        Native ORC Vectorized with copy                200 /  213         78.8          12.7       6.4X
+        Hive built-in ORC                             1776 / 1787          8.9         112.9       0.7X
+
+        SQL Single BIGINT Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 1344 / 1355         11.7          85.4       1.0X
+        Native ORC Vectorized                          258 /  268         61.0          16.4       5.2X
+        Native ORC Vectorized with copy                252 /  257         62.4          16.0       5.3X
+        Hive built-in ORC                             1818 / 1823          8.7         115.6       0.7X
+
+        SQL Single FLOAT Column Scan:            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 1333 / 1352         11.8          84.8       1.0X
+        Native ORC Vectorized                          310 /  324         50.7          19.7       4.3X
+        Native ORC Vectorized with copy                312 /  320         50.4          19.9       4.3X
+        Hive built-in ORC                             1904 / 1918          8.3         121.0       0.7X
+
+        SQL Single DOUBLE Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 1408 / 1585         11.2          89.5       1.0X
+        Native ORC Vectorized                          359 /  368         43.8          22.8       3.9X
+        Native ORC Vectorized with copy                364 /  371         43.2          23.2       3.9X
+        Hive built-in ORC                             1881 / 1954          8.4         119.6       0.7X
+        */
+        benchmark.run()
+      }
+    }
+  }
+
+  def intStringScanBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Int and String Scan", values)
+
+    withTempPath { dir =>
+      withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
+        import spark.implicits._
+        spark.range(values).map(_ => Random.nextLong).createOrReplaceTempView("t1")
+
+        prepareTable(
+          dir,
+          spark.sql("SELECT CAST(value AS INT) AS c1, CAST(value as STRING) AS c2 FROM t1"))
+
+        benchmark.addCase("Native ORC MR") { _ =>
+          withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
+            spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Native ORC Vectorized") { _ =>
+          spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").collect()
+        }
+
+        benchmark.addCase("Native ORC Vectorized with copy") { _ =>
+          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
+            spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(c1), sum(length(c2)) FROM hiveOrcTable").collect()
+        }
+
+        /*
+        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
+        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+
+        Int and String Scan:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 2566 / 2592          4.1         244.7       1.0X
+        Native ORC Vectorized                         1098 / 1113          9.6         104.7       2.3X
+        Native ORC Vectorized with copy               1527 / 1593          6.9         145.6       1.7X
+        Hive built-in ORC                             3561 / 3705          2.9         339.6       0.7X
+        */
+        benchmark.run()
+      }
+    }
+  }
+
+  def partitionTableScanBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Partitioned Table", values)
+
+    withTempPath { dir =>
+      withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
+        import spark.implicits._
+        spark.range(values).map(_ => Random.nextLong).createOrReplaceTempView("t1")
+
+        prepareTable(dir, spark.sql("SELECT value % 2 AS p, value AS id FROM t1"), Some("p"))
+
+        benchmark.addCase("Data column - Native ORC MR") { _ =>
+          withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
+            spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Data column - Native ORC Vectorized") { _ =>
+          spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
+        }
+
+        benchmark.addCase("Data column - Native ORC Vectorized with copy") { _ =>
+          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
+            spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Data column - Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(id) FROM hiveOrcTable").collect()
+        }
+
+        benchmark.addCase("Partition column - Native ORC MR") { _ =>
+          withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
+            spark.sql("SELECT sum(p) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Partition column - Native ORC Vectorized") { _ =>
+          spark.sql("SELECT sum(p) FROM nativeOrcTable").collect()
+        }
+
+        benchmark.addCase("Partition column - Native ORC Vectorized with copy") { _ =>
+          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
+            spark.sql("SELECT sum(p) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Partition column - Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(p) FROM hiveOrcTable").collect()
+        }
+
+        benchmark.addCase("Both columns - Native ORC MR") { _ =>
+          withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
+            spark.sql("SELECT sum(p), sum(id) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Both columns - Native ORC Vectorized") { _ =>
+          spark.sql("SELECT sum(p), sum(id) FROM nativeOrcTable").collect()
+        }
+
+        benchmark.addCase("Both column - Native ORC Vectorized with copy") { _ =>
+          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
+            spark.sql("SELECT sum(p), sum(id) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Both columns - Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(p), sum(id) FROM hiveOrcTable").collect()
+        }
+
+        /*
+        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
+        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+
+        Partitioned Table:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Data only - Native ORC MR                      1447 / 1457         10.9          92.0       1.0X
+        Data only - Native ORC Vectorized               256 /  266         61.4          16.3       5.6X
+        Data only - Native ORC Vectorized with copy     263 /  273         59.8          16.7       5.5X
+        Data only - Hive built-in ORC                  1960 / 1988          8.0         124.6       0.7X
+        Partition only - Native ORC MR                 1039 / 1043         15.1          66.0       1.4X
+        Partition only - Native ORC Vectorized           48 /   53        326.6           3.1      30.1X
+        Partition only - Native ORC Vectorized with copy 48 /   53        328.4           3.0      30.2X
+        Partition only - Hive built-in ORC             1234 / 1242         12.7          78.4       1.2X
+        Both columns - Native ORC MR                   1465 / 1475         10.7          93.1       1.0X
+        Both columns - Native ORC Vectorized            292 /  301         53.9          18.6       5.0X
+        Both column - Native ORC Vectorized with copy   348 /  354         45.1          22.2       4.2X
+        Both columns - Hive built-in ORC               2051 / 2060          7.7         130.4       0.7X
+        */
+        benchmark.run()
+      }
+    }
+  }
+
+  def repeatedStringScanBenchmark(values: Int): Unit = {
+    val benchmark = new Benchmark("Repeated String", values)
+
+    withTempPath { dir =>
+      withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
+        spark.range(values).createOrReplaceTempView("t1")
+
+        prepareTable(dir, spark.sql("SELECT CAST((id % 200) + 10000 as STRING) AS c1 FROM t1"))
+
+        benchmark.addCase("Native ORC MR") { _ =>
+          withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
+            spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Native ORC Vectorized") { _ =>
+          spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").collect()
+        }
+
+        benchmark.addCase("Native ORC Vectorized with copy") { _ =>
+          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
+            spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql("SELECT sum(length(c1)) FROM hiveOrcTable").collect()
+        }
+
+        /*
+        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
+        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+
+        Repeated String:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 1271 / 1278          8.3         121.2       1.0X
+        Native ORC Vectorized                          200 /  212         52.4          19.1       6.4X
+        Native ORC Vectorized with copy                342 /  347         30.7          32.6       3.7X
+        Hive built-in ORC                             1874 / 2105          5.6         178.7       0.7X
+        */
+        benchmark.run()
+      }
+    }
+  }
+
+  def stringWithNullsScanBenchmark(values: Int, fractionOfNulls: Double): Unit = {
+    withTempPath { dir =>
+      withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
+        spark.range(values).createOrReplaceTempView("t1")
+
+        prepareTable(
+          dir,
+          spark.sql(
+            s"SELECT IF(RAND(1) < $fractionOfNulls, NULL, CAST(id as STRING)) AS c1, " +
+            s"IF(RAND(2) < $fractionOfNulls, NULL, CAST(id as STRING)) AS c2 FROM t1"))
+
+        val benchmark = new Benchmark(s"String with Nulls Scan ($fractionOfNulls%)", values)
+
+        benchmark.addCase("Native ORC MR") { _ =>
+          withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
+            spark.sql("SELECT SUM(LENGTH(c2)) FROM nativeOrcTable " +
+              "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
+          }
+        }
+
+        benchmark.addCase("Native ORC Vectorized") { _ =>
+          spark.sql("SELECT SUM(LENGTH(c2)) FROM nativeOrcTable " +
+            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
+        }
+
+        benchmark.addCase("Native ORC Vectorized with copy") { _ =>
+          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
+            spark.sql("SELECT SUM(LENGTH(c2)) FROM nativeOrcTable " +
+              "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
+          }
+        }
+
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql("SELECT SUM(LENGTH(c2)) FROM hiveOrcTable " +
+            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
+        }
+
+        /*
+        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
+        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+
+        String with Nulls Scan (0.0%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 2394 / 2886          4.4         228.3       1.0X
+        Native ORC Vectorized                          699 /  729         15.0          66.7       3.4X
+        Native ORC Vectorized with copy                959 / 1025         10.9          91.5       2.5X
+        Hive built-in ORC                             3899 / 3901          2.7         371.9       0.6X
+
+        String with Nulls Scan (0.5%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 2234 / 2255          4.7         213.1       1.0X
+        Native ORC Vectorized                          854 /  869         12.3          81.4       2.6X
+        Native ORC Vectorized with copy               1099 / 1128          9.5         104.8       2.0X
+        Hive built-in ORC                             2767 / 2793          3.8         263.9       0.8X
+
+        String with Nulls Scan (0.95%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 1166 / 1202          9.0         111.2       1.0X
+        Native ORC Vectorized                          338 /  345         31.1          32.2       3.5X
+        Native ORC Vectorized with copy                418 /  428         25.1          39.9       2.8X
+        Hive built-in ORC                             1730 / 1761          6.1         164.9       0.7X
+        */
+        benchmark.run()
+      }
+    }
+  }
+
+  def columnsBenchmark(values: Int, width: Int): Unit = {
+    val benchmark = new Benchmark(s"Single Column Scan from $width columns", values)
+
+    withTempPath { dir =>
+      withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
+        import spark.implicits._
+        val middle = width / 2
+        val selectExpr = (1 to width).map(i => s"value as c$i")
+        spark.range(values).map(_ => Random.nextLong).toDF()
+          .selectExpr(selectExpr: _*).createOrReplaceTempView("t1")
+
+        prepareTable(dir, spark.sql("SELECT * FROM t1"))
+
+        benchmark.addCase("Native ORC MR") { _ =>
+          withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
+            spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Native ORC Vectorized") { _ =>
+          spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").collect()
+        }
+
+        benchmark.addCase("Native ORC Vectorized with copy") { _ =>
+          withSQLConf(SQLConf.ORC_COPY_BATCH_TO_SPARK.key -> "true") {
+            spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").collect()
+          }
+        }
+
+        benchmark.addCase("Hive built-in ORC") { _ =>
+          spark.sql(s"SELECT sum(c$middle) FROM hiveOrcTable").collect()
+        }
+
+        /*
+        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
+        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
+
+        Single Column Scan from 100 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 1050 / 1053          1.0        1001.1       1.0X
+        Native ORC Vectorized                           95 /  101         11.0          90.9      11.0X
+        Native ORC Vectorized with copy                 95 /  102         11.0          90.9      11.0X
+        Hive built-in ORC                              348 /  358          3.0         331.8       3.0X
+
+        Single Column Scan from 200 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 2099 / 2108          0.5        2002.1       1.0X
+        Native ORC Vectorized                          179 /  187          5.8         171.1      11.7X
+        Native ORC Vectorized with copy                176 /  188          6.0         167.6      11.9X
+        Hive built-in ORC                              562 /  581          1.9         535.9       3.7X
+
+        Single Column Scan from 300 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+        ------------------------------------------------------------------------------------------------
+        Native ORC MR                                 3221 / 3246          0.3        3071.4       1.0X
+        Native ORC Vectorized                          312 /  322          3.4         298.0      10.3X
+        Native ORC Vectorized with copy                306 /  320          3.4         291.6      10.5X
+        Hive built-in ORC                              815 /  824          1.3         777.3       4.0X
+        */
+        benchmark.run()
+      }
+    }
+  }
+
+  def main(args: Array[String]): Unit = {
+    Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType).foreach { dataType =>
+      numericScanBenchmark(1024 * 1024 * 15, dataType)
+    }
+    intStringScanBenchmark(1024 * 1024 * 10)
+    partitionTableScanBenchmark(1024 * 1024 * 15)
+    repeatedStringScanBenchmark(1024 * 1024 * 10)
+    for (fractionOfNulls <- List(0.0, 0.50, 0.95)) {
+      stringWithNullsScanBenchmark(1024 * 1024 * 10, fractionOfNulls)
+    }
+    columnsBenchmark(1024 * 1024 * 1, 100)
+    columnsBenchmark(1024 * 1024 * 1, 200)
+    columnsBenchmark(1024 * 1024 * 1, 300)
+  }
+}
+// scalastyle:on line.size.limit
diff --git a/streaming/pom.xml b/streaming/pom.xml
index fea882ad11230..4497e53b65984 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
index b2ec33e82ddaa..b22bbb79a5cc9 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
@@ -98,7 +98,7 @@ class RawNetworkReceiver(host: String, port: Int, storageLevel: StorageLevel)
 
   /** Read a buffer fully from a given Channel */
   private def readFully(channel: ReadableByteChannel, dest: ByteBuffer) {
-    while (dest.position < dest.limit) {
+    while (dest.position() < dest.limit()) {
       if (channel.read(dest) == -1) {
         throw new EOFException("End of channel")
       }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
index 15d3c7e54b8dd..8da5a5f8193cf 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
@@ -162,7 +162,7 @@ private[streaming] class MapWithStateRDD[K: ClassTag, V: ClassTag, S: ClassTag,
       mappingFunction,
       batchTime,
       timeoutThresholdTime,
-      removeTimedoutData = doFullScan // remove timedout data only when full scan is enabled
+      removeTimedoutData = doFullScan // remove timed-out data only when full scan is enabled
     )
     Iterator(newRecord)
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
index 69e15655ad790..6748dd4ec48e3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
@@ -23,16 +23,16 @@ import scala.xml._
 
 import org.apache.commons.lang3.StringEscapeUtils
 
+import org.apache.spark.status.api.v1.{JobData, StageData}
 import org.apache.spark.streaming.Time
 import org.apache.spark.streaming.ui.StreamingJobProgressListener.SparkJobId
 import org.apache.spark.ui.{UIUtils => SparkUIUtils, WebUIPage}
-import org.apache.spark.ui.jobs.UIData.JobUIData
 
-private[ui] case class SparkJobIdWithUIData(sparkJobId: SparkJobId, jobUIData: Option[JobUIData])
+private[ui] case class SparkJobIdWithUIData(sparkJobId: SparkJobId, jobData: Option[JobData])
 
 private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
   private val streamingListener = parent.listener
-  private val sparkListener = parent.ssc.sc.jobProgressListener
+  private val store = parent.parent.store
 
   private def columns: Seq[Node] = {
     <th>Output Op Id</th>
@@ -52,13 +52,13 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
       formattedOutputOpDuration: String,
       numSparkJobRowsInOutputOp: Int,
       isFirstRow: Boolean,
-      sparkJob: SparkJobIdWithUIData): Seq[Node] = {
-    if (sparkJob.jobUIData.isDefined) {
+      jobIdWithData: SparkJobIdWithUIData): Seq[Node] = {
+    if (jobIdWithData.jobData.isDefined) {
       generateNormalJobRow(outputOpData, outputOpDescription, formattedOutputOpDuration,
-        numSparkJobRowsInOutputOp, isFirstRow, sparkJob.jobUIData.get)
+        numSparkJobRowsInOutputOp, isFirstRow, jobIdWithData.jobData.get)
     } else {
       generateDroppedJobRow(outputOpData, outputOpDescription, formattedOutputOpDuration,
-        numSparkJobRowsInOutputOp, isFirstRow, sparkJob.sparkJobId)
+        numSparkJobRowsInOutputOp, isFirstRow, jobIdWithData.sparkJobId)
     }
   }
 
@@ -94,15 +94,15 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
       formattedOutputOpDuration: String,
       numSparkJobRowsInOutputOp: Int,
       isFirstRow: Boolean,
-      sparkJob: JobUIData): Seq[Node] = {
+      sparkJob: JobData): Seq[Node] = {
     val duration: Option[Long] = {
       sparkJob.submissionTime.map { start =>
-        val end = sparkJob.completionTime.getOrElse(System.currentTimeMillis())
-        end - start
+        val end = sparkJob.completionTime.map(_.getTime()).getOrElse(System.currentTimeMillis())
+        end - start.getTime()
       }
     }
     val lastFailureReason =
-      sparkJob.stageIds.sorted.reverse.flatMap(sparkListener.stageIdToInfo.get).
+      sparkJob.stageIds.sorted.reverse.flatMap(getStageData).
       dropWhile(_.failureReason == None).take(1). // get the first info that contains failure
       flatMap(info => info.failureReason).headOption.getOrElse("")
     val formattedDuration = duration.map(d => SparkUIUtils.formatDuration(d)).getOrElse("-")
@@ -135,7 +135,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
         {formattedDuration}
       </td>
       <td class="stage-progress-cell">
-        {sparkJob.completedStageIndices.size}/{sparkJob.stageIds.size - sparkJob.numSkippedStages}
+        {sparkJob.numCompletedStages}/{sparkJob.stageIds.size - sparkJob.numSkippedStages}
         {if (sparkJob.numFailedStages > 0) s"(${sparkJob.numFailedStages} failed)"}
         {if (sparkJob.numSkippedStages > 0) s"(${sparkJob.numSkippedStages} skipped)"}
       </td>
@@ -146,7 +146,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
             completed = sparkJob.numCompletedTasks,
             failed = sparkJob.numFailedTasks,
             skipped = sparkJob.numSkippedTasks,
-            reasonToNumKilled = sparkJob.reasonToNumKilled,
+            reasonToNumKilled = sparkJob.killedTasksSummary,
             total = sparkJob.numTasks - sparkJob.numSkippedTasks)
         }
       </td>
@@ -246,11 +246,19 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
     </div>
   }
 
-  private def getJobData(sparkJobId: SparkJobId): Option[JobUIData] = {
-    sparkListener.activeJobs.get(sparkJobId).orElse {
-      sparkListener.completedJobs.find(_.jobId == sparkJobId).orElse {
-        sparkListener.failedJobs.find(_.jobId == sparkJobId)
-      }
+  private def getJobData(sparkJobId: SparkJobId): Option[JobData] = {
+    try {
+      Some(store.job(sparkJobId))
+    } catch {
+      case _: NoSuchElementException => None
+    }
+  }
+
+  private def getStageData(stageId: Int): Option[StageData] = {
+    try {
+      Some(store.lastStageAttempt(stageId))
+    } catch {
+      case _: NoSuchElementException => None
     }
   }
 
@@ -282,25 +290,22 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
         val sparkJobIds = outputOpIdToSparkJobIds.getOrElse(outputOpId, Seq.empty)
         (outputOperation, sparkJobIds)
       }.toSeq.sortBy(_._1.id)
-    sparkListener.synchronized {
-      val outputOpWithJobs = outputOps.map { case (outputOpData, sparkJobIds) =>
-          (outputOpData,
-            sparkJobIds.map(sparkJobId => SparkJobIdWithUIData(sparkJobId, getJobData(sparkJobId))))
-        }
+    val outputOpWithJobs = outputOps.map { case (outputOpData, sparkJobIds) =>
+        (outputOpData, sparkJobIds.map { jobId => SparkJobIdWithUIData(jobId, getJobData(jobId)) })
+      }
 
-      <table id="batch-job-table" class="table table-bordered table-striped table-condensed">
-        <thead>
-          {columns}
-        </thead>
-        <tbody>
-          {
-            outputOpWithJobs.map { case (outputOpData, sparkJobIds) =>
-              generateOutputOpIdRow(outputOpData, sparkJobIds)
-            }
+    <table id="batch-job-table" class="table table-bordered table-striped table-condensed">
+      <thead>
+        {columns}
+      </thead>
+      <tbody>
+        {
+          outputOpWithJobs.map { case (outputOpData, sparkJobs) =>
+            generateOutputOpIdRow(outputOpData, sparkJobs)
           }
-        </tbody>
-      </table>
-    }
+        }
+      </tbody>
+    </table>
   }
 
   def render(request: HttpServletRequest): Seq[Node] = streamingListener.synchronized {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
index 6a3b3200dccdb..a6997359d64d2 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
@@ -29,7 +29,9 @@ private[streaming] object HdfsUtils {
     // If the file exists and we have append support, append instead of creating a new file
     val stream: FSDataOutputStream = {
       if (dfs.isFile(dfsPath)) {
-        if (conf.getBoolean("hdfs.append.support", false) || dfs.isInstanceOf[RawLocalFileSystem]) {
+        if (conf.getBoolean("dfs.support.append", true) ||
+            conf.getBoolean("hdfs.append.support", false) ||
+            dfs.isInstanceOf[RawLocalFileSystem]) {
           dfs.append(dfsPath)
         } else {
           throw new IllegalStateException("File exists and there is no append support!")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
index 3a21cfae5ac2f..89524cd84ff32 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
@@ -364,7 +364,7 @@ private[streaming] object OpenHashMapBasedStateMap {
   }
 
   /**
-   * Internal class to represent a marker the demarkate the end of all state data in the
+   * Internal class to represent a marker that demarcates the end of all state data in the
    * serialized bytes.
    */
   class LimitMarker(val num: Int) extends Serializable
diff --git a/tools/pom.xml b/tools/pom.xml
index 37427e8da62d8..242219e29f50f 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.3.0-SNAPSHOT</version>
+    <version>2.4.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>