Skip to content

Commit bceab8a

Browse files
committed
Commented DStream corner cases for serializability checking.
1 parent 64d04d2 commit bceab8a

File tree

1 file changed

+15
-0
lines changed
  • streaming/src/main/scala/org/apache/spark/streaming/dstream

1 file changed

+15
-0
lines changed

streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,9 @@ abstract class DStream[T: ClassTag] (
532532
* 'this' DStream will be registered as an output stream and therefore materialized.
533533
*/
534534
def foreachRDD(foreachFunc: (RDD[T], Time) => Unit) {
535+
// because the DStream is reachable from the outer object here, and because
536+
// DStreams can't be serialized with closures, we can't proactively check
537+
// it for serializability and so we pass the optional false to SparkContext.clean
535538
new ForEachDStream(this, context.sparkContext.clean(foreachFunc, false)).register()
536539
}
537540

@@ -540,6 +543,9 @@ abstract class DStream[T: ClassTag] (
540543
* on each RDD of 'this' DStream.
541544
*/
542545
def transform[U: ClassTag](transformFunc: RDD[T] => RDD[U]): DStream[U] = {
546+
// because the DStream is reachable from the outer object here, and because
547+
// DStreams can't be serialized with closures, we can't proactively check
548+
// it for serializability and so we pass the optional false to SparkContext.clean
543549
transform((r: RDD[T], t: Time) => context.sparkContext.clean(transformFunc(r), false))
544550
}
545551

@@ -548,6 +554,9 @@ abstract class DStream[T: ClassTag] (
548554
* on each RDD of 'this' DStream.
549555
*/
550556
def transform[U: ClassTag](transformFunc: (RDD[T], Time) => RDD[U]): DStream[U] = {
557+
// because the DStream is reachable from the outer object here, and because
558+
// DStreams can't be serialized with closures, we can't proactively check
559+
// it for serializability and so we pass the optional false to SparkContext.clean
551560
val cleanedF = context.sparkContext.clean(transformFunc, false)
552561
val realTransformFunc = (rdds: Seq[RDD[_]], time: Time) => {
553562
assert(rdds.length == 1)
@@ -563,6 +572,9 @@ abstract class DStream[T: ClassTag] (
563572
def transformWith[U: ClassTag, V: ClassTag](
564573
other: DStream[U], transformFunc: (RDD[T], RDD[U]) => RDD[V]
565574
): DStream[V] = {
575+
// because the DStream is reachable from the outer object here, and because
576+
// DStreams can't be serialized with closures, we can't proactively check
577+
// it for serializability and so we pass the optional false to SparkContext.clean
566578
val cleanedF = ssc.sparkContext.clean(transformFunc, false)
567579
transformWith(other, (rdd1: RDD[T], rdd2: RDD[U], time: Time) => cleanedF(rdd1, rdd2))
568580
}
@@ -574,6 +586,9 @@ abstract class DStream[T: ClassTag] (
574586
def transformWith[U: ClassTag, V: ClassTag](
575587
other: DStream[U], transformFunc: (RDD[T], RDD[U], Time) => RDD[V]
576588
): DStream[V] = {
589+
// because the DStream is reachable from the outer object here, and because
590+
// DStreams can't be serialized with closures, we can't proactively check
591+
// it for serializability and so we pass the optional false to SparkContext.clean
577592
val cleanedF = ssc.sparkContext.clean(transformFunc, false)
578593
val realTransformFunc = (rdds: Seq[RDD[_]], time: Time) => {
579594
assert(rdds.length == 2)

0 commit comments

Comments
 (0)