@@ -76,14 +76,6 @@ private[spark] class DirectKafkaInputDStream[K, V](
76
76
77
77
@ transient val serviceConsumer : Consumer [K , V ] = consumerStrategy.serviceConsumer
78
78
79
- def consumerForAssign (): KafkaConsumer [Long , String ] = this .synchronized {
80
- val properties = consumerStrategy.executorKafkaParams
81
- properties.put(" max.poll.records" , " 1" )
82
- properties.put(ConsumerConfig .GROUP_ID_CONFIG ,
83
- s " ${properties.get(ConsumerConfig .GROUP_ID_CONFIG )}_assignGroup " )
84
- new KafkaConsumer [Long , String ](properties)
85
- }
86
-
87
79
override def persist (newLevel : StorageLevel ): DStream [ConsumerRecord [K , V ]] = {
88
80
logError(" Kafka ConsumerRecord is not serializable. " +
89
81
" Use .map to extract fields before calling .persist or .window" )
@@ -288,26 +280,27 @@ private[spark] class DirectKafkaInputDStream[K, V](
288
280
289
281
override def start (): Unit = {
290
282
val c = consumer
291
- val consumerAssign = consumerForAssign
292
283
val pollTimeout = ssc.sparkContext.getConf
293
- .getLong(" spark.streaming.kafka.consumer.driver.poll.ms" , 120000 )
284
+ .getLong(" spark.streaming.kafka.consumer.driver.poll.ms" , 5000 )
294
285
paranoidPoll(c)
295
286
if (currentOffsets.isEmpty) {
296
287
currentOffsets = c.assignment().asScala.map { tp =>
297
288
tp -> {
298
289
val position = c.position(tp)
299
290
300
- consumerAssign .assign(ju.Arrays .asList(tp))
301
- val records = consumerAssign .poll(pollTimeout).iterator()
291
+ serviceConsumer .assign(ju.Arrays .asList(tp))
292
+ val records = serviceConsumer .poll(pollTimeout).iterator()
302
293
val firstRecordOffset = if (records.hasNext) {
303
294
records.next().offset()
304
295
} else {
305
296
c.endOffsets(ju.Arrays .asList(tp)).get(tp).longValue()
306
297
}
307
298
308
299
if (position < firstRecordOffset) {
300
+ serviceConsumer.seek(tp, firstRecordOffset)
309
301
firstRecordOffset
310
302
} else {
303
+ serviceConsumer.seek(tp, position)
311
304
position
312
305
}
313
306
}
0 commit comments