@@ -22,8 +22,6 @@ import java.net.InetSocketAddress
22
22
import java .nio .ByteBuffer
23
23
import java .util .concurrent .{LinkedBlockingQueue , TimeUnit , Executors }
24
24
25
- import org .apache .spark .flume .sink .SparkSinkUtils
26
-
27
25
import scala .collection .JavaConversions ._
28
26
import scala .reflect .ClassTag
29
27
@@ -33,45 +31,44 @@ import org.apache.avro.ipc.specific.SpecificRequestor
33
31
import org .jboss .netty .channel .socket .nio .NioClientSocketChannelFactory
34
32
35
33
import org .apache .spark .Logging
36
- import org .apache .spark .flume .{SparkSinkEvent , SparkFlumeProtocol }
37
34
import org .apache .spark .storage .StorageLevel
38
35
import org .apache .spark .streaming .StreamingContext
39
36
import org .apache .spark .streaming .dstream .ReceiverInputDStream
40
37
import org .apache .spark .streaming .receiver .Receiver
38
+ import org .apache .spark .streaming .flume .sink ._
39
+
41
40
42
41
/**
43
42
* A [[ReceiverInputDStream ]] that can be used to read data from several Flume agents running
44
43
* [[org.apache.spark.flume.sink.SparkSink ]]s.
45
- * @param ssc_ Streaming context that will execute this input stream
44
+ * @param _ssc Streaming context that will execute this input stream
46
45
* @param addresses List of addresses at which SparkSinks are listening
47
46
* @param maxBatchSize Maximum size of a batch
48
47
* @param parallelism Number of parallel connections to open
49
48
* @param storageLevel The storage level to use.
50
49
* @tparam T Class type of the object of this stream
51
50
*/
51
+ private [streaming]
52
52
class FlumePollingInputDStream [T : ClassTag ](
53
- @ transient ssc_ : StreamingContext ,
54
- val addresses : Seq [InetSocketAddress ],
55
- val maxBatchSize : Int ,
56
- val parallelism : Int ,
57
- storageLevel : StorageLevel
58
- ) extends ReceiverInputDStream [SparkFlumePollingEvent ](ssc_) {
59
- /**
60
- * Gets the receiver object that will be sent to the worker nodes
61
- * to receive data. This method needs to defined by any specific implementation
62
- * of a NetworkInputDStream.
63
- */
53
+ @ transient _ssc : StreamingContext ,
54
+ val addresses : Seq [InetSocketAddress ],
55
+ val maxBatchSize : Int ,
56
+ val parallelism : Int ,
57
+ storageLevel : StorageLevel
58
+ ) extends ReceiverInputDStream [SparkFlumePollingEvent ](_ssc) {
59
+
64
60
override def getReceiver (): Receiver [SparkFlumePollingEvent ] = {
65
61
new FlumePollingReceiver (addresses, maxBatchSize, parallelism, storageLevel)
66
62
}
67
63
}
68
64
69
- private [streaming] class FlumePollingReceiver (
70
- addresses : Seq [InetSocketAddress ],
71
- maxBatchSize : Int ,
72
- parallelism : Int ,
73
- storageLevel : StorageLevel
74
- ) extends Receiver [SparkFlumePollingEvent ](storageLevel) with Logging {
65
+ private [streaming]
66
+ class FlumePollingReceiver (
67
+ addresses : Seq [InetSocketAddress ],
68
+ maxBatchSize : Int ,
69
+ parallelism : Int ,
70
+ storageLevel : StorageLevel
71
+ ) extends Receiver [SparkFlumePollingEvent ](storageLevel) with Logging {
75
72
76
73
lazy val channelFactoryExecutor =
77
74
Executors .newCachedThreadPool(new ThreadFactoryBuilder ().setDaemon(true ).
@@ -150,14 +147,6 @@ private[streaming] class FlumePollingReceiver(
150
147
}
151
148
}
152
149
153
- override def store (dataItem : SparkFlumePollingEvent ) {
154
- // Not entirely sure store is thread-safe for all storage levels - so wrap it in synchronized
155
- // This takes a performance hit, since the parallelism is useful only for pulling data now.
156
- this .synchronized {
157
- super .store(dataItem)
158
- }
159
- }
160
-
161
150
override def onStop (): Unit = {
162
151
logInfo(" Shutting down Flume Polling Receiver" )
163
152
receiverExecutor.shutdownNow()
@@ -176,6 +165,9 @@ private[streaming] class FlumePollingReceiver(
176
165
private class FlumeConnection (val transceiver : NettyTransceiver ,
177
166
val client : SparkFlumeProtocol .Callback )
178
167
168
+ /**
169
+ * Companion object of [[SparkFlumePollingEvent ]]
170
+ */
179
171
private [streaming] object SparkFlumePollingEvent {
180
172
def fromSparkSinkEvent (in : SparkSinkEvent ): SparkFlumePollingEvent = {
181
173
val event = new SparkFlumePollingEvent ()
@@ -189,7 +181,7 @@ private[streaming] object SparkFlumePollingEvent {
189
181
* SparkSinkEvent is identical to AvroFlumeEvent, we need to create a new class and a wrapper
190
182
* around that to make it externalizable.
191
183
*/
192
- class SparkFlumePollingEvent () extends Externalizable with Logging {
184
+ class SparkFlumePollingEvent extends Externalizable with Logging {
193
185
var event : SparkSinkEvent = new SparkSinkEvent ()
194
186
195
187
/* De-serialize from bytes. */
0 commit comments