From 3ac8874c351a228b596dfac018eb1578876aebef Mon Sep 17 00:00:00 2001
From: Michael Giannakopoulos <miccagiann@gmail.com>
Date: Sun, 27 Jul 2014 22:23:06 -0400
Subject: [PATCH 1/9] Added support for regularizer and intercection parameters
 for linear regression method.

---
 .../mllib/api/python/PythonMLLibAPI.scala     | 59 +++++++++++++++++++
 python/pyspark/mllib/regression.py            | 17 ++++++
 2 files changed, 76 insertions(+)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 954621ee8b933..a4170dbd8a20a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -24,6 +24,7 @@ import org.apache.spark.api.java.{JavaSparkContext, JavaRDD}
 import org.apache.spark.mllib.classification._
 import org.apache.spark.mllib.clustering._
 import org.apache.spark.mllib.linalg.{SparseVector, Vector, Vectors}
+import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.recommendation._
 import org.apache.spark.mllib.regression._
 import org.apache.spark.mllib.util.MLUtils
@@ -261,6 +262,64 @@ class PythonMLLibAPI extends Serializable {
       initialWeightsBA)
   }
 
+  /**
+   * Java stub for Python mllib LinearRegressionWithSGD.train() function
+   * allowing users to define the regularizer and intercept parameters using L2
+   * optimization.
+   */
+  def trainLinearRegressionModelWithSGDL2Opt(
+      dataBytesJRDD: JavaRDD[Array[Byte]],
+      numIterations: Int,
+      stepSize: Double,
+      regParam: Double,
+      intercept: Boolean,
+      miniBatchFraction: Double,
+      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+    val lrAlg = new LinearRegressionWithSGD()
+    lrAlg.setIntercept(intercept)
+    lrAlg.optimizer.
+      setNumIterations(numIterations).
+      setRegParam(regParam).
+      setStepSize(stepSize).
+      setUpdater(new SquaredL2Updater)
+    trainRegressionModel(
+      (data, initialWeights) =>
+        lrAlg.run(
+          data,
+          initialWeights),
+      dataBytesJRDD,
+      initialWeightsBA)
+  }
+  
+  /**
+   * Java stub for Python mllib LinearRegressionWithSGD.train() function
+   * allowing users to define the regularizer and intercept parameters using L1
+   * optimization.
+   */
+  def trainLinearRegressionModelWithSGDL1Opt(
+      dataBytesJRDD: JavaRDD[Array[Byte]],
+      numIterations: Int,
+      stepSize: Double,
+      regParam: Double,
+      intercept: Boolean,
+      miniBatchFraction: Double,
+      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+    val lrAlg = new LinearRegressionWithSGD()
+    lrAlg.setIntercept(intercept)
+    lrAlg.optimizer.
+      setNumIterations(numIterations).
+      setRegParam(regParam).
+      setStepSize(stepSize).
+      setUpdater(new L1Updater)
+    trainRegressionModel(
+      (data, initialWeights) =>
+        lrAlg.run(
+          data,
+          initialWeights),
+      dataBytesJRDD,
+      initialWeightsBA)
+  }
+
   /**
    * Java stub for Python mllib LassoWithSGD.train()
    */
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index b84bc531dec8c..fa50407909c7a 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -120,6 +120,23 @@ def train(cls, data, iterations=100, step=1.0,
             d._jrdd, iterations, step, miniBatchFraction, i)
         return _regression_train_wrapper(sc, train_f, LinearRegressionModel, data, initialWeights)
 
+    @classmethod
+    def trainL2Opt(cls, data, iterations=100, step=1.0, regParam=1.0,
+                   intercept=False, miniBatchFraction=1.0, initialWeights=None):
+        """Train a linear regression model on the given data using L2 optimizer."""
+        sc = data.context
+        train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGDL2Opt(
+            d._jrdd, iterations, step, regParam, intercept, miniBatchFraction, i)
+        return _regression_train_wrapper(sc, train_f, LinearRegressionModel, data, initialWeights)
+
+    @classmethod
+    def trainL1Opt(cls, data, iterations=100, step=1.0, regParam=1.0,
+                   intercept=False, miniBatchFraction=1.0, initialWeights=None):
+        """Train a linear regression model on the given data using L1 optimizer."""
+        sc = data.context
+        train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGDL1Opt(
+            d._jrdd, iterations, step, regParam, intercept, miniBatchFraction, i)
+        return _regression_train_wrapper(sc, train_f, LinearRegressionModel, data, initialWeights)
 
 class LassoModel(LinearRegressionModelBase):
     """A linear regression model derived from a least-squares fit with an

From 78853ec7fa680e8256e94a64c43dd4f7b886af89 Mon Sep 17 00:00:00 2001
From: Michael Giannakopoulos <miccagiann@gmail.com>
Date: Wed, 30 Jul 2014 00:55:39 -0400
Subject: [PATCH 2/9] Providing intercept and regualizer functionallity for
 linear methods in only one function.

---
 .../mllib/api/python/PythonMLLibAPI.scala     | 74 +++++--------------
 python/pyspark/mllib/regression.py            | 44 +++++------
 2 files changed, 39 insertions(+), 79 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index a4170dbd8a20a..fbce412c773ef 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -43,6 +43,16 @@ class PythonMLLibAPI extends Serializable {
   private val DENSE_MATRIX_MAGIC: Byte = 3
   private val LABELED_POINT_MAGIC: Byte = 4
 
+  /**
+   * Enumeration used to define the type of Regularizer
+   * used for linear methods.
+   */
+  object RegularizerType extends Serializable {
+    val L2 : Int = 0
+    val L1 : Int = 1
+    val NONE : Int = 2
+  }
+
   private[python] def deserializeDoubleVector(bytes: Array[Byte], offset: Int = 0): Vector = {
     require(bytes.length - offset >= 5, "Byte array too short")
     val magic = bytes(offset)
@@ -245,33 +255,11 @@ class PythonMLLibAPI extends Serializable {
    * Java stub for Python mllib LinearRegressionWithSGD.train()
    */
   def trainLinearRegressionModelWithSGD(
-      dataBytesJRDD: JavaRDD[Array[Byte]],
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
-    trainRegressionModel(
-      (data, initialWeights) =>
-        LinearRegressionWithSGD.train(
-          data,
-          numIterations,
-          stepSize,
-          miniBatchFraction,
-          initialWeights),
-      dataBytesJRDD,
-      initialWeightsBA)
-  }
-
-  /**
-   * Java stub for Python mllib LinearRegressionWithSGD.train() function
-   * allowing users to define the regularizer and intercept parameters using L2
-   * optimization.
-   */
-  def trainLinearRegressionModelWithSGDL2Opt(
       dataBytesJRDD: JavaRDD[Array[Byte]],
       numIterations: Int,
       stepSize: Double,
       regParam: Double,
+      regType: Int,
       intercept: Boolean,
       miniBatchFraction: Double,
       initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
@@ -280,42 +268,14 @@ class PythonMLLibAPI extends Serializable {
     lrAlg.optimizer.
       setNumIterations(numIterations).
       setRegParam(regParam).
-      setStepSize(stepSize).
-      setUpdater(new SquaredL2Updater)
+      setStepSize(stepSize)
+    if (regType == RegularizerType.L2)
+      lrAlg.optimizer.setUpdater(new SquaredL2Updater)
+    else if (regType == RegularizerType.L1)
+      lrAlg.optimizer.setUpdater(new L1Updater)
     trainRegressionModel(
       (data, initialWeights) =>
-        lrAlg.run(
-          data,
-          initialWeights),
-      dataBytesJRDD,
-      initialWeightsBA)
-  }
-  
-  /**
-   * Java stub for Python mllib LinearRegressionWithSGD.train() function
-   * allowing users to define the regularizer and intercept parameters using L1
-   * optimization.
-   */
-  def trainLinearRegressionModelWithSGDL1Opt(
-      dataBytesJRDD: JavaRDD[Array[Byte]],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double,
-      intercept: Boolean,
-      miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
-    val lrAlg = new LinearRegressionWithSGD()
-    lrAlg.setIntercept(intercept)
-    lrAlg.optimizer.
-      setNumIterations(numIterations).
-      setRegParam(regParam).
-      setStepSize(stepSize).
-      setUpdater(new L1Updater)
-    trainRegressionModel(
-      (data, initialWeights) =>
-        lrAlg.run(
-          data,
-          initialWeights),
+        lrAlg.run(data, initialWeights),
       dataBytesJRDD,
       initialWeightsBA)
   }
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index fa50407909c7a..644ae849d2033 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -109,33 +109,33 @@ class LinearRegressionModel(LinearRegressionModelBase):
     True
     """
 
+class RegularizerType(object):
+    L2 = 0
+    L1 = 1
+    NONE = 2
 
 class LinearRegressionWithSGD(object):
     @classmethod
-    def train(cls, data, iterations=100, step=1.0,
-              miniBatchFraction=1.0, initialWeights=None):
+    def train(cls, data, iterations=100, step=1.0, regParam=1.0, regType=None,
+              intercept=False, miniBatchFraction=1.0, initialWeights=None):
         """Train a linear regression model on the given data."""
         sc = data.context
-        train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-            d._jrdd, iterations, step, miniBatchFraction, i)
-        return _regression_train_wrapper(sc, train_f, LinearRegressionModel, data, initialWeights)
-
-    @classmethod
-    def trainL2Opt(cls, data, iterations=100, step=1.0, regParam=1.0,
-                   intercept=False, miniBatchFraction=1.0, initialWeights=None):
-        """Train a linear regression model on the given data using L2 optimizer."""
-        sc = data.context
-        train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGDL2Opt(
-            d._jrdd, iterations, step, regParam, intercept, miniBatchFraction, i)
-        return _regression_train_wrapper(sc, train_f, LinearRegressionModel, data, initialWeights)
-
-    @classmethod
-    def trainL1Opt(cls, data, iterations=100, step=1.0, regParam=1.0,
-                   intercept=False, miniBatchFraction=1.0, initialWeights=None):
-        """Train a linear regression model on the given data using L1 optimizer."""
-        sc = data.context
-        train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGDL1Opt(
-            d._jrdd, iterations, step, regParam, intercept, miniBatchFraction, i)
+        if regType is None:
+            train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
+                d._jrdd, iterations, step, regParam, sc._jvm.PythonMLLibAPI().RegularizerType().NONE(),
+                intercept, miniBatchFraction, i)
+        elif regType == RegularizerType.L2:
+            train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
+                d._jrdd, iterations, step, regParam, sc._jvm.PythonMLLibAPI().RegularizerType().L2(),
+                intercept, miniBatchFraction, i)
+        elif regType == RegularizerType.L1:
+            train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
+                d._jrdd, iterations, step, regParam, sc._jvm.PythonMLLibAPI().RegularizerType().L1(),
+                intercept, miniBatchFraction, i)
+        elif regType == RegularizerType.NONE:
+            train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
+                d._jrdd, iterations, step, regParam, sc._jvm.PythonMLLibAPI().RegularizerType().NONE(),
+                intercept, miniBatchFraction, i)
         return _regression_train_wrapper(sc, train_f, LinearRegressionModel, data, initialWeights)
 
 class LassoModel(LinearRegressionModelBase):

From b962744906857b8096df9beb119373b4a3bf4bf9 Mon Sep 17 00:00:00 2001
From: Michael Giannakopoulos <miccagiann@gmail.com>
Date: Wed, 30 Jul 2014 20:24:42 -0400
Subject: [PATCH 3/9] Replaced the enum classes, with strings-keywords for
 defining the values of 'regType' parameter.

---
 .../mllib/api/python/PythonMLLibAPI.scala     | 16 ++--------
 python/pyspark/mllib/regression.py            | 32 +++++++++----------
 2 files changed, 18 insertions(+), 30 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index fbce412c773ef..b4f3b23199ac5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -43,16 +43,6 @@ class PythonMLLibAPI extends Serializable {
   private val DENSE_MATRIX_MAGIC: Byte = 3
   private val LABELED_POINT_MAGIC: Byte = 4
 
-  /**
-   * Enumeration used to define the type of Regularizer
-   * used for linear methods.
-   */
-  object RegularizerType extends Serializable {
-    val L2 : Int = 0
-    val L1 : Int = 1
-    val NONE : Int = 2
-  }
-
   private[python] def deserializeDoubleVector(bytes: Array[Byte], offset: Int = 0): Vector = {
     require(bytes.length - offset >= 5, "Byte array too short")
     val magic = bytes(offset)
@@ -259,7 +249,7 @@ class PythonMLLibAPI extends Serializable {
       numIterations: Int,
       stepSize: Double,
       regParam: Double,
-      regType: Int,
+      regType: String,
       intercept: Boolean,
       miniBatchFraction: Double,
       initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
@@ -269,9 +259,9 @@ class PythonMLLibAPI extends Serializable {
       setNumIterations(numIterations).
       setRegParam(regParam).
       setStepSize(stepSize)
-    if (regType == RegularizerType.L2)
+    if (regType == "SquaredUpdater")
       lrAlg.optimizer.setUpdater(new SquaredL2Updater)
-    else if (regType == RegularizerType.L1)
+    else if (regType == "L1Updater")
       lrAlg.optimizer.setUpdater(new L1Updater)
     trainRegressionModel(
       (data, initialWeights) =>
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 644ae849d2033..e14952f67f5e7 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -109,33 +109,31 @@ class LinearRegressionModel(LinearRegressionModelBase):
     True
     """
 
-class RegularizerType(object):
-    L2 = 0
-    L1 = 1
-    NONE = 2
-
 class LinearRegressionWithSGD(object):
     @classmethod
     def train(cls, data, iterations=100, step=1.0, regParam=1.0, regType=None,
               intercept=False, miniBatchFraction=1.0, initialWeights=None):
-        """Train a linear regression model on the given data."""
+        """Train a linear regression model on the given data. The 'regType' parameter can take
+           one from the following string values: "L1Updater" for invoking the lasso regularizer,
+           "SquaredUpdater" for invoking the ridge regularizer or "NONE" for not using a
+           regularizer at all. The user can determine the regularizer parameter by setting the
+           appropriate value to variable 'regParam' (by default is set to 1.0)."""
         sc = data.context
         if regType is None:
             train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-                d._jrdd, iterations, step, regParam, sc._jvm.PythonMLLibAPI().RegularizerType().NONE(),
-                intercept, miniBatchFraction, i)
-        elif regType == RegularizerType.L2:
+                d._jrdd, iterations, step, regParam, "NONE", intercept, miniBatchFraction, i)
+        elif regType == "SquaredUpdater":
             train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-                d._jrdd, iterations, step, regParam, sc._jvm.PythonMLLibAPI().RegularizerType().L2(),
-                intercept, miniBatchFraction, i)
-        elif regType == RegularizerType.L1:
+                d._jrdd, iterations, step, regParam, regType, intercept, miniBatchFraction, i)
+        elif regType == "L1Updater":
             train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-                d._jrdd, iterations, step, regParam, sc._jvm.PythonMLLibAPI().RegularizerType().L1(),
-                intercept, miniBatchFraction, i)
-        elif regType == RegularizerType.NONE:
+                d._jrdd, iterations, step, regParam, regType, intercept, miniBatchFraction, i)
+        elif regType == "NONE":
             train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-                d._jrdd, iterations, step, regParam, sc._jvm.PythonMLLibAPI().RegularizerType().NONE(),
-                intercept, miniBatchFraction, i)
+                d._jrdd, iterations, step, regParam, regType, intercept, miniBatchFraction, i)
+        else:
+            raise ValueError("Invalid value for 'regType' parameter. Can only be initialized " +
+                             "using the following string values [L1Updater, SquaredUpdater, NONE].")
         return _regression_train_wrapper(sc, train_f, LinearRegressionModel, data, initialWeights)
 
 class LassoModel(LinearRegressionModelBase):

From ec50ee90dd02d62e31fa600d3d76dd4db01b695e Mon Sep 17 00:00:00 2001
From: Michael Giannakopoulos <miccagiann@gmail.com>
Date: Wed, 30 Jul 2014 20:55:10 -0400
Subject: [PATCH 4/9] Shorten the if-elif-else statement in regression.py file

---
 python/pyspark/mllib/regression.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index e14952f67f5e7..dbf0600f1db8b 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -122,13 +122,7 @@ def train(cls, data, iterations=100, step=1.0, regParam=1.0, regType=None,
         if regType is None:
             train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
                 d._jrdd, iterations, step, regParam, "NONE", intercept, miniBatchFraction, i)
-        elif regType == "SquaredUpdater":
-            train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-                d._jrdd, iterations, step, regParam, regType, intercept, miniBatchFraction, i)
-        elif regType == "L1Updater":
-            train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-                d._jrdd, iterations, step, regParam, regType, intercept, miniBatchFraction, i)
-        elif regType == "NONE":
+        elif regType == "SquaredUpdater" or regType == "L1Updater" or regType == "NONE":
             train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
                 d._jrdd, iterations, step, regParam, regType, intercept, miniBatchFraction, i)
         else:

From 638be479c474dbb91cdbffdf0f5e8568877a9288 Mon Sep 17 00:00:00 2001
From: Michael Giannakopoulos <miccagiann@gmail.com>
Date: Wed, 30 Jul 2014 23:16:07 -0400
Subject: [PATCH 5/9] Modified code to comply with code standards.

---
 .../mllib/api/python/PythonMLLibAPI.scala     | 12 +++----
 python/pyspark/mllib/regression.py            | 34 ++++++++++++++-----
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index b4f3b23199ac5..0ab20777e9b7a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -255,13 +255,13 @@ class PythonMLLibAPI extends Serializable {
       initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
     val lrAlg = new LinearRegressionWithSGD()
     lrAlg.setIntercept(intercept)
-    lrAlg.optimizer.
-      setNumIterations(numIterations).
-      setRegParam(regParam).
-      setStepSize(stepSize)
-    if (regType == "SquaredUpdater")
+    lrAlg.optimizer
+      .setNumIterations(numIterations)
+      .setRegParam(regParam)
+      .setStepSize(stepSize)
+    if (regType == "l2")
       lrAlg.optimizer.setUpdater(new SquaredL2Updater)
-    else if (regType == "L1Updater")
+    else if (regType == "l1")
       lrAlg.optimizer.setUpdater(new L1Updater)
     trainRegressionModel(
       (data, initialWeights) =>
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index dbf0600f1db8b..a4a13f214ffe8 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -113,21 +113,39 @@ class LinearRegressionWithSGD(object):
     @classmethod
     def train(cls, data, iterations=100, step=1.0, regParam=1.0, regType=None,
               intercept=False, miniBatchFraction=1.0, initialWeights=None):
-        """Train a linear regression model on the given data. The 'regType' parameter can take
-           one from the following string values: "L1Updater" for invoking the lasso regularizer,
-           "SquaredUpdater" for invoking the ridge regularizer or "NONE" for not using a
-           regularizer at all. The user can determine the regularizer parameter by setting the
-           appropriate value to variable 'regParam' (by default is set to 1.0)."""
+        """
+        Train a linear regression model on the given data.
+
+        @param data:              The training data.
+        @param iterations:        The number of iterations (default: 100).
+        @param step:              The step parameter used in SGD
+                                  (default: 1.0).
+        @param regParam:          The regularizer parameter (default: 1.0).
+        @param regType:           The type of regularizer used for training
+                                  our model.
+                                  Allowed values: "l1" for using L1Updater,
+                                                  "l2" for using
+                                                       SquaredL2Updater,
+                                                  "none" for no regularizer.
+                                  (default: None)
+        @param intercept:         Boolean parameter which indicates the use
+                                  or not of the augmented representation for
+                                  training data (i.e. whether bias features
+                                  are activated or not).
+        @param miniBatchFraction: Fraction of data to be used for each SGD
+                                  iteration.
+        @param initialWeights:    The initial weights (default: None).
+        """
         sc = data.context
         if regType is None:
             train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-                d._jrdd, iterations, step, regParam, "NONE", intercept, miniBatchFraction, i)
-        elif regType == "SquaredUpdater" or regType == "L1Updater" or regType == "NONE":
+                d._jrdd, iterations, step, regParam, "none", intercept, miniBatchFraction, i)
+        elif regType == "l2" or regType == "l1" or regType == "none":
             train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
                 d._jrdd, iterations, step, regParam, regType, intercept, miniBatchFraction, i)
         else:
             raise ValueError("Invalid value for 'regType' parameter. Can only be initialized " +
-                             "using the following string values [L1Updater, SquaredUpdater, NONE].")
+                             "using the following string values: [l1, l2, none].")
         return _regression_train_wrapper(sc, train_f, LinearRegressionModel, data, initialWeights)
 
 class LassoModel(LinearRegressionModelBase):

From 8eba9c57ba6d295e6f465ea08f862624a87afbac Mon Sep 17 00:00:00 2001
From: Michael Giannakopoulos <miccagiann@gmail.com>
Date: Fri, 1 Aug 2014 19:13:46 -0400
Subject: [PATCH 6/9] Change function signatures. Exception is thrown from the
 scala component and not from the python one.

---
 .../mllib/api/python/PythonMLLibAPI.scala     |  9 ++++---
 python/pyspark/mllib/regression.py            | 24 ++++++++-----------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 0ab20777e9b7a..729d639c86e7e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -248,11 +248,11 @@ class PythonMLLibAPI extends Serializable {
       dataBytesJRDD: JavaRDD[Array[Byte]],
       numIterations: Int,
       stepSize: Double,
+      miniBatchFraction: Double,
+      initialWeightsBA: Array[Byte], 
       regParam: Double,
       regType: String,
-      intercept: Boolean,
-      miniBatchFraction: Double,
-      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+      intercept: Boolean): java.util.List[java.lang.Object] = {
     val lrAlg = new LinearRegressionWithSGD()
     lrAlg.setIntercept(intercept)
     lrAlg.optimizer
@@ -263,6 +263,9 @@ class PythonMLLibAPI extends Serializable {
       lrAlg.optimizer.setUpdater(new SquaredL2Updater)
     else if (regType == "l1")
       lrAlg.optimizer.setUpdater(new L1Updater)
+    else if (regType != "none")
+      throw new java.lang.IllegalArgumentException("Invalid value for 'regType' parameter."
+        + "Can only be initialized using the following string values: [l1, l2, none].")
     trainRegressionModel(
       (data, initialWeights) =>
         lrAlg.run(data, initialWeights),
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index a4a13f214ffe8..32a50d367a9d2 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -111,8 +111,8 @@ class LinearRegressionModel(LinearRegressionModelBase):
 
 class LinearRegressionWithSGD(object):
     @classmethod
-    def train(cls, data, iterations=100, step=1.0, regParam=1.0, regType=None,
-              intercept=False, miniBatchFraction=1.0, initialWeights=None):
+    def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
+              initialWeights=None, regParam=1.0, regType=None, intercept=False):
         """
         Train a linear regression model on the given data.
 
@@ -120,6 +120,9 @@ def train(cls, data, iterations=100, step=1.0, regParam=1.0, regType=None,
         @param iterations:        The number of iterations (default: 100).
         @param step:              The step parameter used in SGD
                                   (default: 1.0).
+        @param miniBatchFraction: Fraction of data to be used for each SGD
+                                  iteration.
+        @param initialWeights:    The initial weights (default: None).
         @param regParam:          The regularizer parameter (default: 1.0).
         @param regType:           The type of regularizer used for training
                                   our model.
@@ -127,27 +130,20 @@ def train(cls, data, iterations=100, step=1.0, regParam=1.0, regType=None,
                                                   "l2" for using
                                                        SquaredL2Updater,
                                                   "none" for no regularizer.
-                                  (default: None)
+                                  (default: "none")
         @param intercept:         Boolean parameter which indicates the use
                                   or not of the augmented representation for
                                   training data (i.e. whether bias features
                                   are activated or not).
-        @param miniBatchFraction: Fraction of data to be used for each SGD
-                                  iteration.
-        @param initialWeights:    The initial weights (default: None).
         """
         sc = data.context
         if regType is None:
-            train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-                d._jrdd, iterations, step, regParam, "none", intercept, miniBatchFraction, i)
-        elif regType == "l2" or regType == "l1" or regType == "none":
-            train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
-                d._jrdd, iterations, step, regParam, regType, intercept, miniBatchFraction, i)
-        else:
-            raise ValueError("Invalid value for 'regType' parameter. Can only be initialized " +
-                             "using the following string values: [l1, l2, none].")
+            regType = "none"
+        train_f = lambda d, i: sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
+            d._jrdd, iterations, step, miniBatchFraction, i, regParam, regType, intercept)
         return _regression_train_wrapper(sc, train_f, LinearRegressionModel, data, initialWeights)
 
+
 class LassoModel(LinearRegressionModelBase):
     """A linear regression model derived from a least-squares fit with an
     l_1 penalty term.

From 44e6ff0ab39d3937a0553116acabe9ea3ad6107d Mon Sep 17 00:00:00 2001
From: Michael Giannakopoulos <miccagiann@gmail.com>
Date: Fri, 1 Aug 2014 19:19:04 -0400
Subject: [PATCH 7/9] Adding a blank line before python class
 LinearRegressionWithSGD.

---
 python/pyspark/mllib/regression.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 32a50d367a9d2..041b119269427 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -109,6 +109,7 @@ class LinearRegressionModel(LinearRegressionModelBase):
     True
     """
 
+
 class LinearRegressionWithSGD(object):
     @classmethod
     def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,

From fed8eaa95202e99ab075396bde1bf4cc56c474e7 Mon Sep 17 00:00:00 2001
From: Michael Giannakopoulos <miccagiann@gmail.com>
Date: Fri, 1 Aug 2014 19:36:54 -0400
Subject: [PATCH 8/9] Adding a space in the message related to the
 IllegalArgumentException.

---
 .../org/apache/spark/mllib/api/python/PythonMLLibAPI.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 729d639c86e7e..785ccef51d2a6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -265,7 +265,7 @@ class PythonMLLibAPI extends Serializable {
       lrAlg.optimizer.setUpdater(new L1Updater)
     else if (regType != "none")
       throw new java.lang.IllegalArgumentException("Invalid value for 'regType' parameter."
-        + "Can only be initialized using the following string values: [l1, l2, none].")
+        + " Can only be initialized using the following string values: [l1, l2, none].")
     trainRegressionModel(
       (data, initialWeights) =>
         lrAlg.run(data, initialWeights),

From 8dcb888048d1b7879f4a253ec58fee99829c7e37 Mon Sep 17 00:00:00 2001
From: Michael Giannakopoulos <miccagiann@gmail.com>
Date: Fri, 1 Aug 2014 21:03:45 -0400
Subject: [PATCH 9/9] Putting the if/else if statements in brackets.

---
 .../org/apache/spark/mllib/api/python/PythonMLLibAPI.scala | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 785ccef51d2a6..82a993d03119b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -259,13 +259,14 @@ class PythonMLLibAPI extends Serializable {
       .setNumIterations(numIterations)
       .setRegParam(regParam)
       .setStepSize(stepSize)
-    if (regType == "l2")
+    if (regType == "l2") {
       lrAlg.optimizer.setUpdater(new SquaredL2Updater)
-    else if (regType == "l1")
+    } else if (regType == "l1") {
       lrAlg.optimizer.setUpdater(new L1Updater)
-    else if (regType != "none")
+    } else if (regType != "none") {
       throw new java.lang.IllegalArgumentException("Invalid value for 'regType' parameter."
         + " Can only be initialized using the following string values: [l1, l2, none].")
+    }
     trainRegressionModel(
       (data, initialWeights) =>
         lrAlg.run(data, initialWeights),