fix for setOptimizer

hhbyyh · hhbyyh · commit 6149ca6438b0 · 2015-05-02T00:48:14.000+08:00
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -210,14 +210,15 @@ class LDA private (
 
   /**
    * Set the LDAOptimizer used to perform the actual calculation by algorithm name.
-   * Currently "em" is supported.
+   * Currently "em", "online" is supported.
    */
   def setOptimizer(optimizerName: String): this.type = {
     this.ldaOptimizer =
       optimizerName.toLowerCase match {
         case "em" => new EMLDAOptimizer
+        case "online" => new OnlineLDAOptimizer
         case other =>
-          throw new IllegalArgumentException(s"Only em is supported but got $other.")
+          throw new IllegalArgumentException(s"Only em, online are supported but got $other.")
       }
     this
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -396,7 +396,7 @@ class OnlineLDAOptimizer extends LDAOptimizer {
     val batchResult = statsSum :* expElogbeta
 
     // Note that this is an optimization to avoid batch.count
-    update(batchResult, iteration, (miniBatchFraction * corpusSize).toInt)
+    update(batchResult, iteration, (miniBatchFraction * corpusSize).ceil.toInt)
     this
   }
 

Original file line number	Diff line number	Diff line change
`@@ -396,7 +396,7 @@ class OnlineLDAOptimizer extends LDAOptimizer {`
`396`	`396`	`val batchResult = statsSum :* expElogbeta`
`397`	`397`
`398`	`398`	`// Note that this is an optimization to avoid batch.count`
`399`		`- update(batchResult, iteration, (miniBatchFraction * corpusSize).toInt)`
	`399`	`+ update(batchResult, iteration, (miniBatchFraction * corpusSize).ceil.toInt)`
`400`	`400`	`this`
`401`	`401`	`}`
`402`	`402`