File tree Expand file tree Collapse file tree 4 files changed +41
-11
lines changed
examples/src/main/python/streaming
streaming/src/main/scala/org/apache/spark/streaming/api/python Expand file tree Collapse file tree 4 files changed +41
-11
lines changed Original file line number Diff line number Diff line change @@ -118,11 +118,9 @@ def add_shuffle_key(split, iterator):
118
118
keyed = PipelinedDStream (self , add_shuffle_key )
119
119
keyed ._bypass_serializer = True
120
120
with _JavaStackTrace (self .ctx ) as st :
121
- #JavaDStream
122
- pairDStream = self .ctx ._jvm .PairwiseDStream (keyed ._jdstream .dstream ()).asJavaPairDStream ()
123
121
partitioner = self .ctx ._jvm .PythonPartitioner (numPartitions ,
124
- id (partitionFunc ))
125
- jdstream = pairDStream . partitionBy ( partitioner ).values ()
122
+ id (partitionFunc ))
123
+ jdstream = self . ctx . _jvm . PairwiseDStream ( keyed . _jdstream . dstream (), partitioner ).asJavaDStream ()
126
124
dstream = DStream (jdstream , self ._ssc , BatchedSerializer (outputSerializer ))
127
125
# This is required so that id(partitionFunc) remains unique, even if
128
126
# partitionFunc is a lambda:
Original file line number Diff line number Diff line change @@ -59,3 +59,30 @@ class PythonDStream[T: ClassTag](
59
59
val asJavaDStream = JavaDStream .fromDStream(this )
60
60
}
61
61
62
+
63
+ private class PairwiseDStream (prev: DStream [Array [Byte ]], partitioner : Partitioner ) extends
64
+ DStream [Array [Byte ]](prev.ssc){
65
+ override def dependencies = List (prev)
66
+
67
+ override def slideDuration : Duration = prev.slideDuration
68
+
69
+ override def compute (validTime: Time ): Option [RDD [Array [Byte ]]]= {
70
+ prev.getOrCompute(validTime) match {
71
+ case Some (rdd)=> Some (rdd)
72
+ val pairwiseRDD = new PairwiseRDD (rdd)
73
+ /*
74
+ * This is equivalent to following python code
75
+ * with _JavaStackTrace(self.context) as st:
76
+ * pairRDD = self.ctx._jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
77
+ * partitioner = self.ctx._jvm.PythonPartitioner(numPartitions,
78
+ * id(partitionFunc))
79
+ * jrdd = pairRDD.partitionBy(partitioner).values()
80
+ * rdd = RDD(jrdd, self.ctx, BatchedSerializer(outputSerializer))
81
+ */
82
+ Some (pairwiseRDD.asJavaPairRDD.partitionBy(partitioner).values().rdd)
83
+ case None => None
84
+ }
85
+ }
86
+ val asJavaDStream = JavaDStream .fromDStream(this )
87
+ // val asJavaPairDStream : JavaPairDStream[Long, Array[Byte]] = JavaPairDStream.fromJavaDStream(this)
88
+ }
Original file line number Diff line number Diff line change
1
+ /*
2
+
1
3
package org.apache.spark.streaming.api.python
2
4
3
5
import org.apache.spark.Accumulator
@@ -10,11 +12,8 @@ import org.apache.spark.streaming.dstream.DStream
10
12
11
13
import scala.reflect.ClassTag
12
14
13
- /**
14
- * Created by ken on 7/15/14.
15
- */
16
15
class PythonTransformedDStream[T: ClassTag](
17
- parents : Seq [ DStream [T ] ],
16
+ parent: DStream[T],
18
17
command: Array[Byte],
19
18
envVars: JMap[String, String],
20
19
pythonIncludes: JList[String],
@@ -30,8 +29,14 @@ class PythonTransformedDStream[T: ClassTag](
30
29
31
30
//pythonDStream compute
32
31
override def compute(validTime: Time): Option[RDD[Array[Byte]]] = {
33
- val parentRDDs = parents.map(_.getOrCompute(validTime).orNull).toSeq
34
- Some ()
32
+
33
+ // val parentRDDs = parents.map(_.getOrCompute(validTime).orNull).toSeq
34
+ // parents.map(_.getOrCompute(validTime).orNull).to
35
+ // parent = parents.head.asInstanceOf[RDD]
36
+ // Some()
35
37
}
36
- val asJavaDStream = JavaDStream .fromDStream(this )
38
+
39
+ val asJavaDStream = JavaDStream.fromDStream(this)
37
40
}
41
+
42
+ */
You can’t perform that action at this time.
0 commit comments