Skip to content

Commit 9916202

Browse files
committed
change RDD.sliding return type to RDD[Seq[T]]
1 parent 284d991 commit 9916202

File tree

2 files changed

+10
-11
lines changed

2 files changed

+10
-11
lines changed

core/src/main/scala/org/apache/spark/rdd/RDD.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -958,9 +958,9 @@ abstract class RDD[T: ClassTag](
958958
* trigger a Spark job if the parent RDD has more than one partitions and the window size is
959959
* greater than 1.
960960
*/
961-
def sliding(windowSize: Int): RDD[Array[T]] = {
961+
def sliding(windowSize: Int): RDD[Seq[T]] = {
962962
if (windowSize == 1) {
963-
this.map(Array(_))
963+
this.map(Seq(_))
964964
} else {
965965
new SlidingRDD[T](this, windowSize)
966966
}

core/src/main/scala/org/apache/spark/rdd/SlidingRDD.scala

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import scala.reflect.ClassTag
2323
import org.apache.spark.{TaskContext, Partition}
2424

2525
private[spark]
26-
class SlidingRDDPartition[T](val idx: Int, val prev: Partition, val tail: Array[T])
26+
class SlidingRDDPartition[T](val idx: Int, val prev: Partition, val tail: Seq[T])
2727
extends Partition with Serializable {
2828
override val index: Int = idx
2929
}
@@ -42,16 +42,15 @@ class SlidingRDDPartition[T](val idx: Int, val prev: Partition, val tail: Array[
4242
*/
4343
private[spark]
4444
class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int)
45-
extends RDD[Array[T]](parent) {
45+
extends RDD[Seq[T]](parent) {
4646

4747
require(windowSize > 1, s"Window size must be greater than 1, but got $windowSize.")
4848

49-
override def compute(split: Partition, context: TaskContext): Iterator[Array[T]] = {
49+
override def compute(split: Partition, context: TaskContext): Iterator[Seq[T]] = {
5050
val part = split.asInstanceOf[SlidingRDDPartition[T]]
5151
(firstParent[T].iterator(part.prev, context) ++ part.tail)
5252
.sliding(windowSize)
53-
.map(_.toArray)
54-
.filter(_.size == windowSize)
53+
.withPartial(false)
5554
}
5655

5756
override def getPreferredLocations(split: Partition): Seq[String] =
@@ -63,7 +62,7 @@ class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int
6362
if (n == 0) {
6463
Array.empty
6564
} else if (n == 1) {
66-
Array(new SlidingRDDPartition[T](0, parentPartitions(0), Array.empty))
65+
Array(new SlidingRDDPartition[T](0, parentPartitions(0), Seq.empty))
6766
} else {
6867
val n1 = n - 1
6968
val w1 = windowSize - 1
@@ -75,7 +74,7 @@ class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int
7574
var partitionIndex = 0
7675
while (i < n1) {
7776
var j = i
78-
val tail = mutable.ArrayBuffer[T]()
77+
val tail = mutable.ListBuffer[T]()
7978
// Keep appending to the current tail until appended a head of size w1.
8079
while (j < n1 && nextHeads(j).size < w1) {
8180
tail ++= nextHeads(j)
@@ -85,14 +84,14 @@ class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int
8584
tail ++= nextHeads(j)
8685
j += 1
8786
}
88-
partitions += new SlidingRDDPartition[T](partitionIndex, parentPartitions(i), tail.toArray)
87+
partitions += new SlidingRDDPartition[T](partitionIndex, parentPartitions(i), tail.toSeq)
8988
partitionIndex += 1
9089
// Skip appended heads.
9190
i = j
9291
}
9392
// If the head of last partition has size w1, we also need to add this partition.
9493
if (nextHeads(n1 - 1).size == w1) {
95-
partitions += new SlidingRDDPartition[T](partitionIndex, parentPartitions(n1), Array.empty)
94+
partitions += new SlidingRDDPartition[T](partitionIndex, parentPartitions(n1), Seq.empty)
9695
}
9796
partitions.toArray
9897
}

0 commit comments

Comments
 (0)