update LRWithLBFGS

mengxr · mengxr · commit 456ab7c1a59f · 2014-08-14T16:32:05.000-07:00
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
@@ -21,7 +21,7 @@ import org.apache.log4j.{Level, Logger}
 import scopt.OptionParser
 
 import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.mllib.classification.{LogisticRegressionWithSGD, SVMWithSGD}
+import org.apache.spark.mllib.classification.{LogisticRegressionWithLBFGS, SVMWithSGD}
 import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.mllib.optimization.{SquaredL2Updater, L1Updater}
@@ -66,7 +66,8 @@ object BinaryClassification {
         .text("number of iterations")
         .action((x, c) => c.copy(numIterations = x))
       opt[Double]("stepSize")
-        .text(s"initial step size, default: ${defaultParams.stepSize}")
+        .text("initial step size (ignored by logistic regression), " +
+          s"default: ${defaultParams.stepSize}")
         .action((x, c) => c.copy(stepSize = x))
       opt[String]("algorithm")
         .text(s"algorithm (${Algorithm.values.mkString(",")}), " +
@@ -125,10 +126,9 @@ object BinaryClassification {
 
     val model = params.algorithm match {
       case LR =>
-        val algorithm = new LogisticRegressionWithSGD()
+        val algorithm = new LogisticRegressionWithLBFGS()
         algorithm.optimizer
           .setNumIterations(params.numIterations)
-          .setStepSize(params.stepSize)
           .setUpdater(updater)
           .setRegParam(params.regParam)
         algorithm.run(training).clearThreshold()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -73,6 +73,8 @@ class LogisticRegressionModel (
 /**
  * Train a classification model for Logistic Regression using Stochastic Gradient Descent.
  * NOTE: Labels used in Logistic Regression should be {0, 1}
+ *
+ * Using [[LogisticRegressionWithLBFGS]] is recommended over this.
  */
 class LogisticRegressionWithSGD private (
     private var stepSize: Double,
@@ -191,51 +193,19 @@ object LogisticRegressionWithSGD {
 
 /**
  * Train a classification model for Logistic Regression using Limited-memory BFGS.
+ * Standard feature scaling and L2 regularization are used by default.
  * NOTE: Labels used in Logistic Regression should be {0, 1}
  */
-class LogisticRegressionWithLBFGS private (
-    private var convergenceTol: Double,
-    private var maxNumIterations: Int,
-    private var regParam: Double)
+class LogisticRegressionWithLBFGS
   extends GeneralizedLinearAlgorithm[LogisticRegressionModel] with Serializable {
 
-  /**
-   * Construct a LogisticRegression object with default parameters
-   */
-  def this() = this(1E-4, 100, 0.0)
-
   this.setFeatureScaling(true)
 
-  private val gradient = new LogisticGradient()
-  private val updater = new SimpleUpdater()
-  // Have to return new LBFGS object every time since users can reset the parameters anytime.
-  override def optimizer = new LBFGS(gradient, updater)
-    .setNumCorrections(10)
-    .setConvergenceTol(convergenceTol)
-    .setMaxNumIterations(maxNumIterations)
-    .setRegParam(regParam)
+  override val optimizer = new LBFGS(new LogisticGradient, new SquaredL2Updater)
 
   override protected val validators = List(DataValidators.binaryLabelValidator)
 
-  /**
-   * Set the convergence tolerance of iterations for L-BFGS. Default 1E-4.
-   * Smaller value will lead to higher accuracy with the cost of more iterations.
-   */
-  def setConvergenceTol(convergenceTol: Double): this.type = {
-    this.convergenceTol = convergenceTol
-    this
-  }
-
-  /**
-   * Set the maximal number of iterations for L-BFGS. Default 100.
-   */
-  def setNumIterations(numIterations: Int): this.type = {
-    this.maxNumIterations = numIterations
-    this
-  }
-
   override protected def createModel(weights: Vector, intercept: Double) = {
     new LogisticRegressionModel(weights, intercept)
   }
-
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -69,8 +69,17 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater)
 
   /**
    * Set the maximal number of iterations for L-BFGS. Default 100.
+   * @deprecated use [[setNumIterations()]] instead
    */
+  @deprecated("use setNumIterations instead", "1.1.0")
   def setMaxNumIterations(iters: Int): this.type = {
+    this.setNumCorrections(iters)
+  }
+
+  /**
+   * Set the maximal number of iterations for L-BFGS. Default 100.
+   */
+  def setNumIterations(iters: Int): this.type = {
     this.maxNumIterations = iters
     this
   }