@@ -38,19 +38,11 @@ private[sql] object JsonRDD extends Logging {
38
38
json : RDD [String ],
39
39
schema : StructType ,
40
40
columnNameOfCorruptRecords : String ): RDD [Row ] = {
41
- // Reuse the mutable row for each record, however we still need to
42
- // create a new row for every nested struct type in each record
43
- val mutableRow = new SpecificMutableRow (schema.fields.map(_.dataType))
44
- parseJson(json, columnNameOfCorruptRecords).mapPartitions( iter => {
45
- iter.map { parsed =>
46
- schema.fields.zipWithIndex.foreach {
47
- case (StructField (name, dataType, _, _), i) =>
48
- mutableRow.update(i, parsed.get(name).flatMap(v => Option (v)).map(
49
- enforceCorrectType(_, dataType)).orNull)
50
- }
51
- mutableRow : Row
52
- }
53
- })
41
+ // Reuse the mutable row for each record and all innner nested structures
42
+ parseJson(json, columnNameOfCorruptRecords).mapPartitions {
43
+ val row = new GenericMutableRow (schema.fields.length)
44
+ iter => iter.map(parsed => asRow(parsed, schema, row))
45
+ }
54
46
}
55
47
56
48
private [sql] def inferSchema (
@@ -413,7 +405,7 @@ private[sql] object JsonRDD extends Logging {
413
405
}
414
406
}
415
407
416
- private [json] def enforceCorrectType (value : Any , desiredType : DataType ): Any = {
408
+ private [json] def enforceCorrectType (value : Any , desiredType : DataType , slot : Any = null ): Any = {
417
409
if (value == null ) {
418
410
null
419
411
} else {
@@ -428,20 +420,29 @@ private[sql] object JsonRDD extends Logging {
428
420
case NullType => null
429
421
case ArrayType (elementType, _) =>
430
422
value.asInstanceOf [Seq [Any ]].map(enforceCorrectType(_, elementType))
431
- case struct : StructType => asRow(value.asInstanceOf [Map [String , Any ]], struct)
423
+ case struct : StructType =>
424
+ asRow(value.asInstanceOf [Map [String , Any ]], struct, slot.asInstanceOf [GenericMutableRow ])
432
425
case DateType => toDate(value)
433
426
case TimestampType => toTimestamp(value)
434
427
}
435
428
}
436
429
}
437
430
438
- private def asRow (json : Map [String ,Any ], schema : StructType ): Row = {
439
- // TODO: Reuse the row instead of creating a new one for every record.
440
- val row = new GenericMutableRow (schema.fields.length)
441
- schema.fields.zipWithIndex.foreach {
442
- case (StructField (name, dataType, _, _), i) =>
443
- row.update(i, json.get(name).flatMap(v => Option (v)).map(
444
- enforceCorrectType(_, dataType)).orNull)
431
+ private def asRow (
432
+ json : Map [String ,Any ],
433
+ schema : StructType ,
434
+ mutable : GenericMutableRow = null ): Row = {
435
+ val row = if (mutable == null ) {
436
+ new GenericMutableRow (schema.fields.length)
437
+ } else {
438
+ mutable
439
+ }
440
+
441
+ for (i <- 0 until schema.fields.length) {
442
+ val fieldName = schema.fields(i).name
443
+ val fieldType = schema.fields(i).dataType
444
+ row.update(i, json.get(fieldName).flatMap(v => Option (v)).map(
445
+ enforceCorrectType(_, fieldType, row(i))).orNull)
445
446
}
446
447
447
448
row
0 commit comments