File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
mllib/src/main/scala/org/apache/spark/mllib/clustering Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -32,6 +32,7 @@ import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer
32
32
import org .apache .spark .mllib .linalg .{Vector , DenseVector , SparseVector , Matrices }
33
33
import org .apache .spark .rdd .RDD
34
34
import org .apache .spark .util .Utils
35
+ import org .apache .spark .mllib .rdd .RDDFunctions ._
35
36
36
37
37
38
/**
@@ -430,8 +431,7 @@ private[clustering] object LDA {
430
431
else if (D / 1000 < 4 ) 4
431
432
else D / 1000
432
433
val batchNumber = (D / batchSize + 1 ).toInt
433
- // todo: performance killer, need to be replaced
434
- private val batches = documents.randomSplit(Array .fill[Double ](batchNumber)(1.0 ))
434
+ private val batches = documents.sliding(batchNumber).collect()
435
435
436
436
// Initialize the variational distribution q(beta|lambda)
437
437
var _lambda = getGammaMatrix(k, vocabSize) // K * V
You can’t perform that action at this time.
0 commit comments