-
Notifications
You must be signed in to change notification settings - Fork 28.8k
[SPARK-2650][SQL] Build column buffers in smaller batches #1880
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,13 +28,14 @@ import org.apache.spark.sql.Row | |
import org.apache.spark.SparkConf | ||
|
||
object InMemoryRelation { | ||
def apply(useCompression: Boolean, child: SparkPlan): InMemoryRelation = | ||
new InMemoryRelation(child.output, useCompression, child)() | ||
def apply(useCompression: Boolean, batchSize: Int, child: SparkPlan): InMemoryRelation = | ||
new InMemoryRelation(child.output, useCompression, batchSize, child)() | ||
} | ||
|
||
private[sql] case class InMemoryRelation( | ||
output: Seq[Attribute], | ||
useCompression: Boolean, | ||
batchSize: Int, | ||
child: SparkPlan) | ||
(private var _cachedColumnBuffers: RDD[Array[ByteBuffer]] = null) | ||
extends LogicalPlan with MultiInstanceRelation { | ||
|
@@ -43,22 +44,31 @@ private[sql] case class InMemoryRelation( | |
// As in Spark, the actual work of caching is lazy. | ||
if (_cachedColumnBuffers == null) { | ||
val output = child.output | ||
val cached = child.execute().mapPartitions { iterator => | ||
val columnBuilders = output.map { attribute => | ||
ColumnBuilder(ColumnType(attribute.dataType).typeId, 0, attribute.name, useCompression) | ||
}.toArray | ||
|
||
var row: Row = null | ||
while (iterator.hasNext) { | ||
row = iterator.next() | ||
var i = 0 | ||
while (i < row.length) { | ||
columnBuilders(i).appendFrom(row, i) | ||
i += 1 | ||
val cached = child.execute().mapPartitions { baseIterator => | ||
new Iterator[Array[ByteBuffer]] { | ||
def next() = { | ||
val columnBuilders = output.map { attribute => | ||
ColumnBuilder(ColumnType(attribute.dataType).typeId, 0, attribute.name, useCompression) | ||
}.toArray | ||
|
||
var row: Row = null | ||
var rowCount = 0 | ||
|
||
while (baseIterator.hasNext && rowCount < batchSize) { | ||
row = baseIterator.next() | ||
var i = 0 | ||
while (i < row.length) { | ||
columnBuilders(i).appendFrom(row, i) | ||
i += 1 | ||
} | ||
rowCount += 1 | ||
} | ||
|
||
columnBuilders.map(_.build()) | ||
} | ||
} | ||
|
||
Iterator.single(columnBuilders.map(_.build())) | ||
def hasNext = baseIterator.hasNext | ||
} | ||
}.cache() | ||
|
||
cached.setName(child.toString) | ||
|
@@ -74,6 +84,7 @@ private[sql] case class InMemoryRelation( | |
new InMemoryRelation( | ||
output.map(_.newInstance), | ||
useCompression, | ||
batchSize, | ||
child)( | ||
_cachedColumnBuffers).asInstanceOf[this.type] | ||
} | ||
|
@@ -90,22 +101,31 @@ private[sql] case class InMemoryColumnarTableScan( | |
|
||
override def execute() = { | ||
relation.cachedColumnBuffers.mapPartitions { iterator => | ||
val columnBuffers = iterator.next() | ||
assert(!iterator.hasNext) | ||
// Find the ordinals of the requested columns. If none are requested, use the first. | ||
val requestedColumns = | ||
if (attributes.isEmpty) { | ||
Seq(0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can use the narrowest one instead of the 1st one by checking default sizes of columns: val narrowest = relation.output.indices.minBy { i =>
ColumnType(relation.output(i).dataType).defaultSize
}
Seq(narrowest) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that would be better. Really though I think we should use statistics from #1883 to skip decoding entirely. |
||
} else { | ||
attributes.map(a => relation.output.indexWhere(_.exprId == a.exprId)) | ||
} | ||
|
||
new Iterator[Row] { | ||
// Find the ordinals of the requested columns. If none are requested, use the first. | ||
val requestedColumns = | ||
if (attributes.isEmpty) { | ||
Seq(0) | ||
} else { | ||
attributes.map(a => relation.output.indexWhere(_.exprId == a.exprId)) | ||
} | ||
private[this] var columnBuffers: Array[ByteBuffer] = null | ||
private[this] var columnAccessors: Seq[ColumnAccessor] = null | ||
nextBatch() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @marmbrus @liancheng It's great to see we will support the smaller batch processing in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe I don't get it correctly, but do you mean we should try to reuse batch buffers rather than always allocate new ones for a new batch? I like the idea, and it can surely make the column buffer building process more memory efficient. But currently due to the way There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, @liancheng , you're right. Sorry, I didn't make it clearer, I will create another PR for this. :-) |
||
|
||
private[this] val nextRow = new GenericMutableRow(columnAccessors.length) | ||
|
||
val columnAccessors = requestedColumns.map(columnBuffers(_)).map(ColumnAccessor(_)) | ||
val nextRow = new GenericMutableRow(columnAccessors.length) | ||
def nextBatch() = { | ||
columnBuffers = iterator.next() | ||
columnAccessors = requestedColumns.map(columnBuffers(_)).map(ColumnAccessor(_)) | ||
} | ||
|
||
override def next() = { | ||
if (!columnAccessors.head.hasNext) { | ||
nextBatch() | ||
} | ||
|
||
var i = 0 | ||
while (i < nextRow.length) { | ||
columnAccessors(i).extractTo(nextRow, i) | ||
|
@@ -114,7 +134,7 @@ private[sql] case class InMemoryColumnarTableScan( | |
nextRow | ||
} | ||
|
||
override def hasNext = columnAccessors.head.hasNext | ||
override def hasNext = columnAccessors.head.hasNext || iterator.hasNext | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A more precise initial buffer size can be used here: