Skip to content

Commit eccb9fb

Browse files
committed
Revert "[SPARK-4508] [SQL] build native date type to conform behavior to Hive"
This reverts commit 1646f89.
1 parent cfea300 commit eccb9fb

File tree

49 files changed

+112
-191
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+112
-191
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ package org.apache.spark.sql
2020
import scala.util.hashing.MurmurHash3
2121

2222
import org.apache.spark.sql.catalyst.expressions.GenericRow
23-
import org.apache.spark.sql.types.DateUtils
23+
2424

2525
object Row {
2626
/**

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717

1818
package org.apache.spark.sql.catalyst
1919

20-
import java.sql.Timestamp
20+
import java.sql.{Date, Timestamp}
2121

2222
import org.apache.spark.util.Utils
2323
import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute, AttributeReference, Row}
2424
import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
2525
import org.apache.spark.sql.types._
2626

27+
2728
/**
2829
* A default version of ScalaReflection that uses the runtime universe.
2930
*/
@@ -71,7 +72,6 @@ trait ScalaReflection {
7172
}.toArray)
7273
case (d: BigDecimal, _) => Decimal(d)
7374
case (d: java.math.BigDecimal, _) => Decimal(d)
74-
case (d: java.sql.Date, _) => DateUtils.fromJavaDate(d)
7575
case (other, _) => other
7676
}
7777

@@ -85,7 +85,6 @@ trait ScalaReflection {
8585
}
8686
case (r: Row, s: StructType) => convertRowToScala(r, s)
8787
case (d: Decimal, _: DecimalType) => d.toJavaBigDecimal
88-
case (i: Int, DateType) => DateUtils.toJavaDate(i)
8988
case (other, _) => other
9089
}
9190

@@ -160,7 +159,7 @@ trait ScalaReflection {
160159
valueDataType, valueContainsNull = valueNullable), nullable = true)
161160
case t if t <:< typeOf[String] => Schema(StringType, nullable = true)
162161
case t if t <:< typeOf[Timestamp] => Schema(TimestampType, nullable = true)
163-
case t if t <:< typeOf[java.sql.Date] => Schema(DateType, nullable = true)
162+
case t if t <:< typeOf[Date] => Schema(DateType, nullable = true)
164163
case t if t <:< typeOf[BigDecimal] => Schema(DecimalType.Unlimited, nullable = true)
165164
case t if t <:< typeOf[java.math.BigDecimal] => Schema(DecimalType.Unlimited, nullable = true)
166165
case t if t <:< typeOf[Decimal] => Schema(DecimalType.Unlimited, nullable = true)
@@ -192,7 +191,7 @@ trait ScalaReflection {
192191
case obj: LongType.JvmType => LongType
193192
case obj: FloatType.JvmType => FloatType
194193
case obj: DoubleType.JvmType => DoubleType
195-
case obj: java.sql.Date => DateType
194+
case obj: DateType.JvmType => DateType
196195
case obj: java.math.BigDecimal => DecimalType.Unlimited
197196
case obj: Decimal => DecimalType.Unlimited
198197
case obj: TimestampType.JvmType => TimestampType

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ class SqlParser extends AbstractSparkSQLParser {
5252
protected val CAST = Keyword("CAST")
5353
protected val COALESCE = Keyword("COALESCE")
5454
protected val COUNT = Keyword("COUNT")
55-
protected val DATE = Keyword("DATE")
5655
protected val DECIMAL = Keyword("DECIMAL")
5756
protected val DESC = Keyword("DESC")
5857
protected val DISTINCT = Keyword("DISTINCT")
@@ -384,7 +383,6 @@ class SqlParser extends AbstractSparkSQLParser {
384383
| DOUBLE ^^^ DoubleType
385384
| fixedDecimalType
386385
| DECIMAL ^^^ DecimalType.Unlimited
387-
| DATE ^^^ DateType
388386
)
389387

390388
protected lazy val fixedDecimalType: Parser[DataType] =

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
113113
// UDFToString
114114
private[this] def castToString(from: DataType): Any => Any = from match {
115115
case BinaryType => buildCast[Array[Byte]](_, new String(_, "UTF-8"))
116-
case DateType => buildCast[Int](_, d => DateUtils.toString(d))
116+
case DateType => buildCast[Date](_, dateToString)
117117
case TimestampType => buildCast[Timestamp](_, timestampToString)
118118
case _ => buildCast[Any](_, _.toString)
119119
}
@@ -131,7 +131,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
131131
buildCast[Timestamp](_, t => t.getTime() != 0 || t.getNanos() != 0)
132132
case DateType =>
133133
// Hive would return null when cast from date to boolean
134-
buildCast[Int](_, d => null)
134+
buildCast[Date](_, d => null)
135135
case LongType =>
136136
buildCast[Long](_, _ != 0)
137137
case IntegerType =>
@@ -171,7 +171,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
171171
case ByteType =>
172172
buildCast[Byte](_, b => new Timestamp(b))
173173
case DateType =>
174-
buildCast[Int](_, d => new Timestamp(DateUtils.toJavaDate(d).getTime))
174+
buildCast[Date](_, d => new Timestamp(d.getTime))
175175
// TimestampWritable.decimalToTimestamp
176176
case DecimalType() =>
177177
buildCast[Decimal](_, d => decimalToTimestamp(d))
@@ -224,24 +224,37 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
224224
}
225225
}
226226

227+
// Converts Timestamp to string according to Hive TimestampWritable convention
228+
private[this] def timestampToDateString(ts: Timestamp): String = {
229+
Cast.threadLocalDateFormat.get.format(ts)
230+
}
231+
227232
// DateConverter
228233
private[this] def castToDate(from: DataType): Any => Any = from match {
229234
case StringType =>
230235
buildCast[String](_, s =>
231-
try DateUtils.fromJavaDate(Date.valueOf(s))
232-
catch { case _: java.lang.IllegalArgumentException => null }
233-
)
236+
try Date.valueOf(s) catch { case _: java.lang.IllegalArgumentException => null })
234237
case TimestampType =>
235238
// throw valid precision more than seconds, according to Hive.
236239
// Timestamp.nanos is in 0 to 999,999,999, no more than a second.
237-
buildCast[Timestamp](_, t => DateUtils.millisToDays(t.getTime))
240+
buildCast[Timestamp](_, t => new Date(Math.floor(t.getTime / 1000.0).toLong * 1000))
238241
// Hive throws this exception as a Semantic Exception
239-
// It is never possible to compare result when hive return with exception,
240-
// so we can return null
242+
// It is never possible to compare result when hive return with exception, so we can return null
241243
// NULL is more reasonable here, since the query itself obeys the grammar.
242244
case _ => _ => null
243245
}
244246

247+
// Date cannot be cast to long, according to hive
248+
private[this] def dateToLong(d: Date) = null
249+
250+
// Date cannot be cast to double, according to hive
251+
private[this] def dateToDouble(d: Date) = null
252+
253+
// Converts Date to string according to Hive DateWritable convention
254+
private[this] def dateToString(d: Date): String = {
255+
Cast.threadLocalDateFormat.get.format(d)
256+
}
257+
245258
// LongConverter
246259
private[this] def castToLong(from: DataType): Any => Any = from match {
247260
case StringType =>
@@ -251,7 +264,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
251264
case BooleanType =>
252265
buildCast[Boolean](_, b => if (b) 1L else 0L)
253266
case DateType =>
254-
buildCast[Int](_, d => null)
267+
buildCast[Date](_, d => dateToLong(d))
255268
case TimestampType =>
256269
buildCast[Timestamp](_, t => timestampToLong(t))
257270
case x: NumericType =>
@@ -267,7 +280,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
267280
case BooleanType =>
268281
buildCast[Boolean](_, b => if (b) 1 else 0)
269282
case DateType =>
270-
buildCast[Int](_, d => null)
283+
buildCast[Date](_, d => dateToLong(d))
271284
case TimestampType =>
272285
buildCast[Timestamp](_, t => timestampToLong(t).toInt)
273286
case x: NumericType =>
@@ -283,7 +296,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
283296
case BooleanType =>
284297
buildCast[Boolean](_, b => if (b) 1.toShort else 0.toShort)
285298
case DateType =>
286-
buildCast[Int](_, d => null)
299+
buildCast[Date](_, d => dateToLong(d))
287300
case TimestampType =>
288301
buildCast[Timestamp](_, t => timestampToLong(t).toShort)
289302
case x: NumericType =>
@@ -299,7 +312,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
299312
case BooleanType =>
300313
buildCast[Boolean](_, b => if (b) 1.toByte else 0.toByte)
301314
case DateType =>
302-
buildCast[Int](_, d => null)
315+
buildCast[Date](_, d => dateToLong(d))
303316
case TimestampType =>
304317
buildCast[Timestamp](_, t => timestampToLong(t).toByte)
305318
case x: NumericType =>
@@ -329,7 +342,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
329342
case BooleanType =>
330343
buildCast[Boolean](_, b => changePrecision(if (b) Decimal(1) else Decimal(0), target))
331344
case DateType =>
332-
buildCast[Int](_, d => null) // date can't cast to decimal in Hive
345+
buildCast[Date](_, d => null) // date can't cast to decimal in Hive
333346
case TimestampType =>
334347
// Note that we lose precision here.
335348
buildCast[Timestamp](_, t => changePrecision(Decimal(timestampToDouble(t)), target))
@@ -354,7 +367,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
354367
case BooleanType =>
355368
buildCast[Boolean](_, b => if (b) 1d else 0d)
356369
case DateType =>
357-
buildCast[Int](_, d => null)
370+
buildCast[Date](_, d => dateToDouble(d))
358371
case TimestampType =>
359372
buildCast[Timestamp](_, t => timestampToDouble(t))
360373
case x: NumericType =>
@@ -370,7 +383,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
370383
case BooleanType =>
371384
buildCast[Boolean](_, b => if (b) 1f else 0f)
372385
case DateType =>
373-
buildCast[Int](_, d => null)
386+
buildCast[Date](_, d => dateToDouble(d))
374387
case TimestampType =>
375388
buildCast[Timestamp](_, t => timestampToDouble(t).toFloat)
376389
case x: NumericType =>
@@ -429,16 +442,16 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
429442

430443
object Cast {
431444
// `SimpleDateFormat` is not thread-safe.
432-
private[sql] val threadLocalTimestampFormat = new ThreadLocal[DateFormat] {
445+
private[sql] val threadLocalDateFormat = new ThreadLocal[DateFormat] {
433446
override def initialValue() = {
434-
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
447+
new SimpleDateFormat("yyyy-MM-dd")
435448
}
436449
}
437450

438451
// `SimpleDateFormat` is not thread-safe.
439-
private[sql] val threadLocalDateFormat = new ThreadLocal[DateFormat] {
452+
private[sql] val threadLocalTimestampFormat = new ThreadLocal[DateFormat] {
440453
override def initialValue() = {
441-
new SimpleDateFormat("yyyy-MM-dd")
454+
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
442455
}
443456
}
444457
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,6 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
246246
new String(${eval.primitiveTerm}.asInstanceOf[Array[Byte]])
247247
""".children
248248

249-
case Cast(child @ DateType(), StringType) =>
250-
child.castOrNull(c => q"org.apache.spark.sql.types.DateUtils.toString($c)", StringType)
251-
252249
case Cast(child @ NumericType(), IntegerType) =>
253250
child.castOrNull(c => q"$c.toInt", IntegerType)
254251

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ object Literal {
3535
case d: java.math.BigDecimal => Literal(Decimal(d), DecimalType.Unlimited)
3636
case d: Decimal => Literal(d, DecimalType.Unlimited)
3737
case t: Timestamp => Literal(t, TimestampType)
38-
case d: Date => Literal(DateUtils.fromJavaDate(d), DateType)
38+
case d: Date => Literal(d, DateType)
3939
case a: Array[Byte] => Literal(a, BinaryType)
4040
case null => Literal(null, NullType)
4141
}

sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala

Lines changed: 0 additions & 60 deletions
This file was deleted.

sql/catalyst/src/main/scala/org/apache/spark/sql/types/dataTypes.scala

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.types
1919

20-
import java.sql.Timestamp
20+
import java.sql.{Date, Timestamp}
2121

2222
import scala.math.Numeric.{FloatAsIfIntegral, DoubleAsIfIntegral}
2323
import scala.reflect.ClassTag
@@ -387,16 +387,18 @@ case object TimestampType extends NativeType {
387387
*/
388388
@DeveloperApi
389389
case object DateType extends NativeType {
390-
private[sql] type JvmType = Int
390+
private[sql] type JvmType = Date
391391

392392
@transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
393393

394-
private[sql] val ordering = implicitly[Ordering[JvmType]]
394+
private[sql] val ordering = new Ordering[JvmType] {
395+
def compare(x: Date, y: Date) = x.compareTo(y)
396+
}
395397

396398
/**
397-
* The default size of a value of the DateType is 4 bytes.
399+
* The default size of a value of the DateType is 8 bytes.
398400
*/
399-
override def defaultSize: Int = 4
401+
override def defaultSize: Int = 8
400402
}
401403

402404

0 commit comments

Comments
 (0)