@@ -94,20 +94,27 @@ case class HiveTableScan(
94
94
(_ : Any , partitionKeys : Array [String ]) => {
95
95
val value = partitionKeys(ordinal)
96
96
val dataType = relation.partitionKeys(ordinal).dataType
97
- castFromString(value, dataType)
97
+ unwrapHiveData( castFromString(value, dataType) )
98
98
}
99
99
} else {
100
100
val ref = objectInspector.getAllStructFieldRefs
101
101
.find(_.getFieldName == a.name)
102
102
.getOrElse(sys.error(s " Can't find attribute $a" ))
103
103
(row : Any , _ : Array [String ]) => {
104
104
val data = objectInspector.getStructFieldData(row, ref)
105
- unwrapData(data, ref.getFieldObjectInspector)
105
+ unwrapHiveData( unwrapData(data, ref.getFieldObjectInspector) )
106
106
}
107
107
}
108
108
}
109
109
}
110
110
111
+ private def unwrapHiveData (value : Any ) = value match {
112
+ case maybeNull : String if maybeNull.toLowerCase == " null" => null
113
+ case varchar : HiveVarchar => varchar.getValue
114
+ case decimal : HiveDecimal => BigDecimal (decimal.bigDecimalValue)
115
+ case other => other
116
+ }
117
+
111
118
private def castFromString (value : String , dataType : DataType ) = {
112
119
Cast (Literal (value), dataType).eval(null )
113
120
}
@@ -143,20 +150,34 @@ case class HiveTableScan(
143
150
}
144
151
145
152
def execute () = {
146
- inputRdd.map { row =>
147
- val values = row match {
148
- case Array (deserializedRow : AnyRef , partitionKeys : Array [String ]) =>
149
- attributeFunctions.map(_(deserializedRow, partitionKeys))
150
- case deserializedRow : AnyRef =>
151
- attributeFunctions.map(_(deserializedRow, Array .empty))
153
+ inputRdd.mapPartitions { iterator =>
154
+ if (iterator.isEmpty) {
155
+ Iterator .empty
156
+ } else {
157
+ val mutableRow = new GenericMutableRow (attributes.length)
158
+ val buffered = iterator.buffered
159
+
160
+ (buffered.head match {
161
+ case Array (_, _) =>
162
+ buffered.map { case Array (deserializedRow, partitionKeys : Array [String ]) =>
163
+ (deserializedRow, partitionKeys)
164
+ }
165
+
166
+ case _ =>
167
+ buffered.map { deserializedRow =>
168
+ (deserializedRow, Array .empty[String ])
169
+ }
170
+ }).map { case (deserializedRow, partitionKeys : Array [String ]) =>
171
+ var i = 0
172
+
173
+ while (i < attributes.length) {
174
+ mutableRow(i) = attributeFunctions(i)(deserializedRow, partitionKeys)
175
+ i += 1
176
+ }
177
+
178
+ mutableRow : Row
179
+ }
152
180
}
153
- buildRow(values.map {
154
- case n : String if n.toLowerCase == " null" => null
155
- case varchar : org.apache.hadoop.hive.common.`type`.HiveVarchar => varchar.getValue
156
- case decimal : org.apache.hadoop.hive.common.`type`.HiveDecimal =>
157
- BigDecimal (decimal.bigDecimalValue)
158
- case other => other
159
- })
160
181
}
161
182
}
162
183
0 commit comments