Skip to content

Commit d86e437

Browse files
Move update & initialization out of potentially long loop.
1 parent 2245c87 commit d86e437

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,13 +164,17 @@ class HadoopTableReader(@transient _tableDesc: TableDesc, @transient sc: HiveCon
164164
hivePartitionRDD.mapPartitions { iter =>
165165
val hconf = broadcastedHiveConf.value.value
166166
val rowWithPartArr = new Array[Object](2)
167+
168+
// The update and deserializer initialization are intentionally
169+
// kept out of the below iter.map loop to save performance.
170+
rowWithPartArr.update(1, partValues)
171+
val deserializer = localDeserializer.newInstance()
172+
deserializer.initialize(hconf, partProps)
173+
167174
// Map each tuple to a row object
168175
iter.map { value =>
169-
val deserializer = localDeserializer.newInstance()
170-
deserializer.initialize(hconf, partProps)
171176
val deserializedRow = deserializer.deserialize(value)
172177
rowWithPartArr.update(0, deserializedRow)
173-
rowWithPartArr.update(1, partValues)
174178
rowWithPartArr.asInstanceOf[Object]
175179
}
176180
}

0 commit comments

Comments
 (0)