Skip to content

Commit f0eb0a7

Browse files
adrian-wangmarmbrus
authored andcommitted
[SPARK-4149][SQL] ISO 8601 support for json date time strings
This implement the feature davies mentioned in #2901 (diff) Author: Daoyuan Wang <[email protected]> Closes #3012 from adrian-wang/iso8601 and squashes the following commits: 50df6e7 [Daoyuan Wang] json data timestamp ISO8601 support (cherry picked from commit a1fc059) Signed-off-by: Michael Armbrust <[email protected]>
1 parent ff071e3 commit f0eb0a7

File tree

3 files changed

+40
-2
lines changed

3 files changed

+40
-2
lines changed

sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.spark.sql.json
1919

2020
import org.apache.spark.sql.catalyst.types.decimal.Decimal
21+
import org.apache.spark.sql.types.util.DataTypeConversions
2122

2223
import scala.collection.Map
2324
import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}
@@ -378,15 +379,15 @@ private[sql] object JsonRDD extends Logging {
378379
private def toDate(value: Any): Date = {
379380
value match {
380381
// only support string as date
381-
case value: java.lang.String => Date.valueOf(value)
382+
case value: java.lang.String => new Date(DataTypeConversions.stringToTime(value).getTime)
382383
}
383384
}
384385

385386
private def toTimestamp(value: Any): Timestamp = {
386387
value match {
387388
case value: java.lang.Integer => new Timestamp(value.asInstanceOf[Int].toLong)
388389
case value: java.lang.Long => new Timestamp(value)
389-
case value: java.lang.String => Timestamp.valueOf(value)
390+
case value: java.lang.String => toTimestamp(DataTypeConversions.stringToTime(value).getTime)
390391
}
391392
}
392393

sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.sql.types.util
1919

20+
import java.text.SimpleDateFormat
21+
2022
import scala.collection.JavaConverters._
2123

2224
import org.apache.spark.sql._
@@ -129,6 +131,34 @@ protected[sql] object DataTypeConversions {
129131
StructType(structType.getFields.map(asScalaStructField))
130132
}
131133

134+
def stringToTime(s: String): java.util.Date = {
135+
if (!s.contains('T')) {
136+
// JDBC escape string
137+
if (s.contains(' ')) {
138+
java.sql.Timestamp.valueOf(s)
139+
} else {
140+
java.sql.Date.valueOf(s)
141+
}
142+
} else if (s.endsWith("Z")) {
143+
// this is zero timezone of ISO8601
144+
stringToTime(s.substring(0, s.length - 1) + "GMT-00:00")
145+
} else if (s.indexOf("GMT") == -1) {
146+
// timezone with ISO8601
147+
val inset = "+00.00".length
148+
val s0 = s.substring(0, s.length - inset)
149+
val s1 = s.substring(s.length - inset, s.length)
150+
if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
151+
stringToTime(s0 + "GMT" + s1)
152+
} else {
153+
stringToTime(s0 + ".0GMT" + s1)
154+
}
155+
} else {
156+
// ISO8601 with GMT insert
157+
val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" )
158+
ISO8601GMT.parse(s)
159+
}
160+
}
161+
132162
/** Converts Java objects to catalyst rows / types */
133163
def convertJavaToCatalyst(a: Any, dataType: DataType): Any = (a, dataType) match {
134164
case (obj, udt: UserDefinedType[_]) => ScalaReflection.convertToCatalyst(obj, udt) // Scala type

sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ class JsonSuite extends QueryTest {
6666

6767
val strDate = "2014-10-15"
6868
checkTypePromotion(Date.valueOf(strDate), enforceCorrectType(strDate, DateType))
69+
70+
val ISO8601Time1 = "1970-01-01T01:00:01.0Z"
71+
checkTypePromotion(new Timestamp(3601000), enforceCorrectType(ISO8601Time1, TimestampType))
72+
checkTypePromotion(new Date(3601000), enforceCorrectType(ISO8601Time1, DateType))
73+
val ISO8601Time2 = "1970-01-01T02:00:01-01:00"
74+
checkTypePromotion(new Timestamp(10801000), enforceCorrectType(ISO8601Time2, TimestampType))
75+
checkTypePromotion(new Date(10801000), enforceCorrectType(ISO8601Time2, DateType))
6976
}
7077

7178
test("Get compatible type") {

0 commit comments

Comments
 (0)