@@ -155,14 +155,13 @@ object KafkaUtils {
155
155
}
156
156
157
157
/**
158
- * Create a RDD from the
159
- * Starting and ending offsets are specified in advance,
160
- * so that you can control exactly-once semantics.
158
+ * Create a RDD from Kafka using offset ranges for each topic and partition.
159
+ *
161
160
* @param sc SparkContext object
162
161
* @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
163
- * configuration parameters</a>.
164
- * Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
165
- * NOT zookeeper servers, specified in host1:port1,host2:port2 form.
162
+ * configuration parameters</a>. Requires "metadata.broker.list" or "bootstrap.servers"
163
+ * to be set with Kafka broker(s) (NOT zookeeper servers) specified in
164
+ * host1:port1,host2:port2 form.
166
165
* @param offsetRanges Each OffsetRange in the batch corresponds to a
167
166
* range of offsets for a given Kafka topic/partition
168
167
*/
@@ -186,18 +185,21 @@ object KafkaUtils {
186
185
new KafkaRDD [K , V , KD , VD , (K , V )](sc, kafkaParams, offsetRanges, leaders, messageHandler)
187
186
}
188
187
189
- /** A batch-oriented interface for consuming from Kafka.
190
- * Starting and ending offsets are specified in advance,
191
- * so that you can control exactly-once semantics.
188
+ /**
189
+ * :: Experimental ::
190
+ * Create a RDD from Kafka using offset ranges for each topic and partition. This allows you
191
+ * specify the Kafka leader to connect to (to optimize fetching) and access the message as well
192
+ * as the metadata.
193
+ *
192
194
* @param sc SparkContext object
193
195
* @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
194
- * configuration parameters</a>.
195
- * Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
196
- * NOT zookeeper servers, specified in host1:port1,host2:port2 form.
196
+ * configuration parameters</a>. Requires "metadata.broker.list" or "bootstrap.servers"
197
+ * to be set with Kafka broker(s) (NOT zookeeper servers) specified in
198
+ * host1:port1,host2:port2 form.
197
199
* @param offsetRanges Each OffsetRange in the batch corresponds to a
198
200
* range of offsets for a given Kafka topic/partition
199
201
* @param leaders Kafka leaders for each offset range in batch
200
- * @param messageHandler function for translating each message into the desired type
202
+ * @param messageHandler function for translating each message and metadata into the desired type
201
203
*/
202
204
@ Experimental
203
205
def createRDD [
@@ -219,47 +221,73 @@ object KafkaUtils {
219
221
}
220
222
221
223
224
+ /**
225
+ * Create a RDD from Kafka using offset ranges for each topic and partition.
226
+ *
227
+ * @param jsc JavaSparkContext object
228
+ * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
229
+ * configuration parameters</a>. Requires "metadata.broker.list" or "bootstrap.servers"
230
+ * to be set with Kafka broker(s) (NOT zookeeper servers) specified in
231
+ * host1:port1,host2:port2 form.
232
+ * @param offsetRanges Each OffsetRange in the batch corresponds to a
233
+ * range of offsets for a given Kafka topic/partition
234
+ */
222
235
@ Experimental
223
- def createRDD [K , V , KD <: Decoder [K ], VD <: Decoder [V ], R ](
236
+ def createRDD [K , V , KD <: Decoder [K ], VD <: Decoder [V ]](
224
237
jsc : JavaSparkContext ,
225
238
keyClass : Class [K ],
226
239
valueClass : Class [V ],
227
240
keyDecoderClass : Class [KD ],
228
241
valueDecoderClass : Class [VD ],
229
- recordClass : Class [R ],
230
242
kafkaParams : JMap [String , String ],
231
- offsetRanges : Array [OffsetRange ],
232
- leaders : Array [Leader ],
233
- messageHandler : JFunction [MessageAndMetadata [K , V ], R ]
234
- ): JavaRDD [R ] = {
243
+ offsetRanges : Array [OffsetRange ]
244
+ ): JavaPairRDD [K , V ] = {
235
245
implicit val keyCmt : ClassTag [K ] = ClassTag (keyClass)
236
246
implicit val valueCmt : ClassTag [V ] = ClassTag (valueClass)
237
247
implicit val keyDecoderCmt : ClassTag [KD ] = ClassTag (keyDecoderClass)
238
248
implicit val valueDecoderCmt : ClassTag [VD ] = ClassTag (valueDecoderClass)
239
- implicit val recordCmt : ClassTag [R ] = ClassTag (recordClass)
240
- createRDD[K , V , KD , VD , R ](
241
- jsc.sc, Map (kafkaParams.toSeq: _* ), offsetRanges, leaders, messageHandler.call _)
249
+ new JavaPairRDD (createRDD[K , V , KD , VD ](
250
+ jsc.sc, Map (kafkaParams.toSeq: _* ), offsetRanges))
242
251
}
243
252
253
+ /**
254
+ * :: Experimental ::
255
+ * Create a RDD from Kafka using offset ranges for each topic and partition. This allows you
256
+ * specify the Kafka leader to connect to (to optimize fetching) and access the message as well
257
+ * as the metadata.
258
+ *
259
+ * @param jsc JavaSparkContext object
260
+ * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
261
+ * configuration parameters</a>. Requires "metadata.broker.list" or "bootstrap.servers"
262
+ * to be set with Kafka broker(s) (NOT zookeeper servers) specified in
263
+ * host1:port1,host2:port2 form.
264
+ * @param offsetRanges Each OffsetRange in the batch corresponds to a
265
+ * range of offsets for a given Kafka topic/partition
266
+ * @param leaders Kafka leaders for each offset range in batch
267
+ * @param messageHandler function for translating each message and metadata into the desired type
268
+ */
244
269
@ Experimental
245
- def createRDD [K , V , KD <: Decoder [K ], VD <: Decoder [V ]](
270
+ def createRDD [K , V , KD <: Decoder [K ], VD <: Decoder [V ], R ](
246
271
jsc : JavaSparkContext ,
247
272
keyClass : Class [K ],
248
273
valueClass : Class [V ],
249
274
keyDecoderClass : Class [KD ],
250
275
valueDecoderClass : Class [VD ],
276
+ recordClass : Class [R ],
251
277
kafkaParams : JMap [String , String ],
252
- offsetRanges : Array [OffsetRange ]
253
- ): JavaPairRDD [K , V ] = {
278
+ offsetRanges : Array [OffsetRange ],
279
+ leaders : Array [Leader ],
280
+ messageHandler : JFunction [MessageAndMetadata [K , V ], R ]
281
+ ): JavaRDD [R ] = {
254
282
implicit val keyCmt : ClassTag [K ] = ClassTag (keyClass)
255
283
implicit val valueCmt : ClassTag [V ] = ClassTag (valueClass)
256
284
implicit val keyDecoderCmt : ClassTag [KD ] = ClassTag (keyDecoderClass)
257
285
implicit val valueDecoderCmt : ClassTag [VD ] = ClassTag (valueDecoderClass)
258
- new JavaPairRDD (createRDD[K , V , KD , VD ](
259
- jsc.sc, Map (kafkaParams.toSeq: _* ), offsetRanges))
286
+ implicit val recordCmt : ClassTag [R ] = ClassTag (recordClass)
287
+ createRDD[K , V , KD , VD , R ](
288
+ jsc.sc, Map (kafkaParams.toSeq: _* ), offsetRanges, leaders, messageHandler.call _)
260
289
}
261
290
262
-
263
291
/**
264
292
* :: Experimental ::
265
293
* Create an input stream that pulls messages from a Kafka Broker. This stream can guarantee
@@ -270,7 +298,7 @@ object KafkaUtils {
270
298
* - Offsets: This does not use Zookeeper to store offsets. The consumed offsets are tracked
271
299
* by the stream itself. For interoperability with Kafka monitoring tools that depend on
272
300
* Zookeeper, you have to update Kafka/Zookeeper yourself from the streaming application.
273
- * - Failure Recovery: To recover from driver failures, you have to enable checkpointing
301
+ * - Failure Recovery: To recover from driver failures, you have to enable checkpointing
274
302
* in the [[StreamingContext ]]. The information on consumed offset can be
275
303
* recovered from the checkpoint. See the programming guide for details (constraints, etc.).
276
304
* - End-to-end semantics: This stream ensures that every records is effectively received and
@@ -375,7 +403,7 @@ object KafkaUtils {
375
403
* - Offsets: This does not use Zookeeper to store offsets. The consumed offsets are tracked
376
404
* by the stream itself. For interoperability with Kafka monitoring tools that depend on
377
405
* Zookeeper, you have to update Kafka/Zookeeper yourself from the streaming application.
378
- * - Failure Recovery: To recover from driver failures, you have to enable checkpointing
406
+ * - Failure Recovery: To recover from driver failures, you have to enable checkpointing
379
407
* in the [[StreamingContext ]]. The information on consumed offset can be
380
408
* recovered from the checkpoint. See the programming guide for details (constraints, etc.).
381
409
* - End-to-end semantics: This stream ensures that every records is effectively received and
@@ -433,7 +461,7 @@ object KafkaUtils {
433
461
* - Offsets: This does not use Zookeeper to store offsets. The consumed offsets are tracked
434
462
* by the stream itself. For interoperability with Kafka monitoring tools that depend on
435
463
* Zookeeper, you have to update Kafka/Zookeeper yourself from the streaming application.
436
- * - Failure Recovery: To recover from driver failures, you have to enable checkpointing
464
+ * - Failure Recovery: To recover from driver failures, you have to enable checkpointing
437
465
* in the [[StreamingContext ]]. The information on consumed offset can be
438
466
* recovered from the checkpoint. See the programming guide for details (constraints, etc.).
439
467
* - End-to-end semantics: This stream ensures that every records is effectively received and
0 commit comments