diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala index d9d7a3fea3963..d666a5cbf1a9f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala @@ -180,7 +180,10 @@ private[sql] object JsonRDD extends Logging { } private def typeOfPrimitiveValue: PartialFunction[Any, DataType] = { - ScalaReflection.typeOfObject orElse { + def autoDetect: PartialFunction[Any, DataType] = { + case value: String => DataTypeConversions.guessTypeFromString(value) + } + autoDetect orElse ScalaReflection.typeOfObject orElse { // Since we do not have a data type backed by BigInteger, // when we see a Java BigInteger, we use DecimalType. case value: java.math.BigInteger => DecimalType.Unlimited diff --git a/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala b/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala index d4258156f18f6..9e69552eab701 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala @@ -131,6 +131,19 @@ protected[sql] object DataTypeConversions { StructType(structType.getFields.map(asScalaStructField)) } + def guessTypeFromString(s:String): DataType = { + try { + val res = stringToTime(s) + if (res.isInstanceOf[java.sql.Date]) { + DateType + } else { + TimestampType + } + } catch { + case _: Throwable => StringType + } + } + def stringToTime(s: String): java.util.Date = { if (!s.contains('T')) { // JDBC escape string diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala index f8ca2c773d9ab..648499931646e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala @@ -202,7 +202,8 @@ class JsonSuite extends QueryTest { StructField("integer", IntegerType, true) :: StructField("long", LongType, true) :: StructField("null", StringType, true) :: - StructField("string", StringType, true) :: Nil) + StructField("string", StringType, true) :: + StructField("time", TimestampType, true) :: Nil) assert(expectedSchema === jsonSchemaRDD.schema) @@ -216,7 +217,8 @@ class JsonSuite extends QueryTest { 10, 21474836470L, null, - "this is a simple string.") :: Nil + "this is a simple string.", + java.sql.Timestamp.valueOf("2014-11-11 12:00:00.0")) :: Nil ) } @@ -540,7 +542,8 @@ class JsonSuite extends QueryTest { StructField("integer", IntegerType, true) :: StructField("long", LongType, true) :: StructField("null", StringType, true) :: - StructField("string", StringType, true) :: Nil) + StructField("string", StringType, true) :: + StructField("time", TimestampType, true) :: Nil) assert(expectedSchema === jsonSchemaRDD.schema) @@ -554,7 +557,8 @@ class JsonSuite extends QueryTest { 10, 21474836470L, null, - "this is a simple string.") :: Nil + "this is a simple string.", + java.sql.Timestamp.valueOf("2014-11-11 12:00:00.0")) :: Nil ) } @@ -580,7 +584,8 @@ class JsonSuite extends QueryTest { 10, 21474836470L, null, - "this is a simple string.") :: Nil + "this is a simple string.", + java.sql.Timestamp.valueOf("2014-11-11 12:00:00.0")) :: Nil ) } @@ -596,7 +601,8 @@ class JsonSuite extends QueryTest { StructField("integer", IntegerType, true) :: StructField("long", LongType, true) :: StructField("null", StringType, true) :: - StructField("string", StringType, true) :: Nil) + StructField("string", StringType, true) :: + StructField("time", TimestampType, true) :: Nil) val jsonSchemaRDD1 = jsonFile(path, schema) @@ -612,7 +618,8 @@ class JsonSuite extends QueryTest { 10, 21474836470L, null, - "this is a simple string.") :: Nil + "this is a simple string.", + java.sql.Timestamp.valueOf("2014-11-11 12:00:00.0")) :: Nil ) val jsonSchemaRDD2 = jsonRDD(primitiveFieldAndType, schema) @@ -629,7 +636,8 @@ class JsonSuite extends QueryTest { 10, 21474836470L, null, - "this is a simple string.") :: Nil + "this is a simple string.", + java.sql.Timestamp.valueOf("2014-11-11 12:00:00.0")) :: Nil ) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala index e5773a55875bc..2810eab1e01b7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala @@ -29,7 +29,8 @@ object TestJsonData { "bigInteger":92233720368547758070, "double":1.7976931348623157E308, "boolean":true, - "null":null + "null":null, + "time":"2014-11-11 12:00:00.0" }""" :: Nil) val primitiveFieldValueTypeConflict =