@@ -23,7 +23,7 @@ import scala.reflect.ClassTag
23
23
import org .apache .spark .{TaskContext , Partition }
24
24
25
25
private [spark]
26
- class SlidingRDDPartition [T ](val idx : Int , val prev : Partition , val tail : Array [T ])
26
+ class SlidingRDDPartition [T ](val idx : Int , val prev : Partition , val tail : Seq [T ])
27
27
extends Partition with Serializable {
28
28
override val index : Int = idx
29
29
}
@@ -42,16 +42,15 @@ class SlidingRDDPartition[T](val idx: Int, val prev: Partition, val tail: Array[
42
42
*/
43
43
private [spark]
44
44
class SlidingRDD [T : ClassTag ](@ transient val parent : RDD [T ], val windowSize : Int )
45
- extends RDD [Array [T ]](parent) {
45
+ extends RDD [Seq [T ]](parent) {
46
46
47
47
require(windowSize > 1 , s " Window size must be greater than 1, but got $windowSize. " )
48
48
49
- override def compute (split : Partition , context : TaskContext ): Iterator [Array [T ]] = {
49
+ override def compute (split : Partition , context : TaskContext ): Iterator [Seq [T ]] = {
50
50
val part = split.asInstanceOf [SlidingRDDPartition [T ]]
51
51
(firstParent[T ].iterator(part.prev, context) ++ part.tail)
52
52
.sliding(windowSize)
53
- .map(_.toArray)
54
- .filter(_.size == windowSize)
53
+ .withPartial(false )
55
54
}
56
55
57
56
override def getPreferredLocations (split : Partition ): Seq [String ] =
@@ -63,7 +62,7 @@ class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int
63
62
if (n == 0 ) {
64
63
Array .empty
65
64
} else if (n == 1 ) {
66
- Array (new SlidingRDDPartition [T ](0 , parentPartitions(0 ), Array .empty))
65
+ Array (new SlidingRDDPartition [T ](0 , parentPartitions(0 ), Seq .empty))
67
66
} else {
68
67
val n1 = n - 1
69
68
val w1 = windowSize - 1
@@ -75,7 +74,7 @@ class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int
75
74
var partitionIndex = 0
76
75
while (i < n1) {
77
76
var j = i
78
- val tail = mutable.ArrayBuffer [T ]()
77
+ val tail = mutable.ListBuffer [T ]()
79
78
// Keep appending to the current tail until appended a head of size w1.
80
79
while (j < n1 && nextHeads(j).size < w1) {
81
80
tail ++= nextHeads(j)
@@ -85,14 +84,14 @@ class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int
85
84
tail ++= nextHeads(j)
86
85
j += 1
87
86
}
88
- partitions += new SlidingRDDPartition [T ](partitionIndex, parentPartitions(i), tail.toArray )
87
+ partitions += new SlidingRDDPartition [T ](partitionIndex, parentPartitions(i), tail.toSeq )
89
88
partitionIndex += 1
90
89
// Skip appended heads.
91
90
i = j
92
91
}
93
92
// If the head of last partition has size w1, we also need to add this partition.
94
93
if (nextHeads(n1 - 1 ).size == w1) {
95
- partitions += new SlidingRDDPartition [T ](partitionIndex, parentPartitions(n1), Array .empty)
94
+ partitions += new SlidingRDDPartition [T ](partitionIndex, parentPartitions(n1), Seq .empty)
96
95
}
97
96
partitions.toArray
98
97
}
0 commit comments