Skip to content

Commit 70d1da9

Browse files
committed
Add Python API for MultilayerPerceptronClassifier and fix bug
1 parent 3ca995b commit 70d1da9

File tree

2 files changed

+121
-2
lines changed

2 files changed

+121
-2
lines changed

mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,8 @@ class MultilayerPerceptronClassifier(override val uid: String)
172172
@Experimental
173173
class MultilayerPerceptronClassifierModel private[ml] (
174174
override val uid: String,
175-
layers: Array[Int],
176-
weights: Vector)
175+
val layers: Array[Int],
176+
val weights: Vector)
177177
extends PredictionModel[Vector, MultilayerPerceptronClassifierModel]
178178
with Serializable {
179179

python/pyspark/ml/classification.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -774,6 +774,125 @@ def theta(self):
774774
return self._call_java("theta")
775775

776776

777+
@inherit_doc
778+
class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
779+
HasMaxIter, HasTol, HasSeed):
780+
"""
781+
Classifier trainer based on the Multilayer Perceptron.
782+
Each layer has sigmoid activation function, output layer has softmax.
783+
Number of inputs has to be equal to the size of feature vectors.
784+
Number of outputs has to be equal to the total number of labels.
785+
786+
>>> from pyspark.sql import Row
787+
>>> from pyspark.mllib.linalg import Vectors
788+
>>> df = sc.parallelize([
789+
... Row(label=0.0, features=Vectors.dense([0.0, 0.0])),
790+
... Row(label=1.0, features=Vectors.dense([0.0, 1.0])),
791+
... Row(label=1.0, features=Vectors.dense([1.0, 0.0])),
792+
... Row(label=0.0, features=Vectors.dense([1.0, 1.0]))]).toDF()
793+
>>> layers = [2, 5, 2]
794+
>>> lr = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=1, seed=11)
795+
>>> model = lr.fit(df)
796+
>>> test0 = sc.parallelize([Row(features=Vectors.dense([1.0, 0.0]))]).toDF()
797+
>>> model.transform(test0).head().prediction
798+
1.0
799+
>>> test1 = sc.parallelize([Row(features=Vectors.dense([0.0, 0.0]))]).toDF()
800+
>>> model.transform(test1).head().prediction
801+
0.0
802+
"""
803+
804+
# a placeholder to make it appear in the generated doc
805+
layers = Param(Params._dummy(), "layers", "Sizes of layers from input layer to output layer " +
806+
"E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with 100 " +
807+
"neurons and output layer of 10 neurons, default is [1, 1].")
808+
blockSize = Param(Params._dummy(), "blockSize", "Block size for stacking input data in " +
809+
"matrices. Data is stacked within partitions. If block size is more than " +
810+
"remaining data in a partition then it is adjusted to the size of this " +
811+
"data. Recommended size is between 10 and 1000, default is 128.")
812+
813+
@keyword_only
814+
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
815+
maxIter=100, tol=1e-4, seed=None, layers=[1, 1], blockSize=128):
816+
"""
817+
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
818+
maxIter=100, tol=1e-4, seed=None, layers=[1, 1], blockSize=128)
819+
"""
820+
super(MultilayerPerceptronClassifier, self).__init__()
821+
self._java_obj = self._new_java_obj(
822+
"org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid)
823+
self.layers = Param(self, "layers", "Sizes of layers from input layer to output layer " +
824+
"E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with " +
825+
"100 neurons and output layer of 10 neurons, default is [1, 1].")
826+
self.blockSize = Param(self, "blockSize", "Block size for stacking input data in " +
827+
"matrices. Data is stacked within partitions. If block size is " +
828+
"more than remaining data in a partition then it is adjusted to " +
829+
"the size of this data. Recommended size is between 10 and 1000, " +
830+
"default is 128.")
831+
self._setDefault(maxIter=100, tol=1E-4, layers=[1, 1], blockSize=128)
832+
kwargs = self.__init__._input_kwargs
833+
self.setParams(**kwargs)
834+
835+
@keyword_only
836+
def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
837+
maxIter=100, tol=1e-4, seed=None, layers=[1, 1], blockSize=128):
838+
"""
839+
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
840+
maxIter=100, tol=1e-4, seed=None, layers=[1, 1], blockSize=128)
841+
Sets params for MultilayerPerceptronClassifier.
842+
"""
843+
kwargs = self.setParams._input_kwargs
844+
return self._set(**kwargs)
845+
846+
def _create_model(self, java_model):
847+
return MultilayerPerceptronClassifierModel(java_model)
848+
849+
def setLayers(self, value):
850+
"""
851+
Sets the value of :py:attr:`layers`.
852+
"""
853+
self._paramMap[self.layers] = value
854+
return self
855+
856+
def getLayers(self):
857+
"""
858+
Gets the value of layers or its default value.
859+
"""
860+
return self.getOrDefault(self.layers)
861+
862+
def setBlockSize(self, value):
863+
"""
864+
Sets the value of :py:attr:`blockSize`.
865+
"""
866+
self._paramMap[self.blockSize] = value
867+
return self
868+
869+
def getBlockSize(self):
870+
"""
871+
Gets the value of blockSize or its default value.
872+
"""
873+
return self.getOrDefault(self.blockSize)
874+
875+
876+
class MultilayerPerceptronClassifierModel(JavaModel):
877+
"""
878+
Model fitted by MultilayerPerceptronClassifier.
879+
"""
880+
881+
@property
882+
def layers(self):
883+
"""
884+
array of layer sizes including input and output layers.
885+
"""
886+
return self._call_java("layers")
887+
888+
@property
889+
def weights(self):
890+
"""
891+
vector of initial weights for the model that consists of the weights of layers.
892+
"""
893+
return self._call_java("weights")
894+
895+
777896
if __name__ == "__main__":
778897
import doctest
779898
from pyspark.context import SparkContext

0 commit comments

Comments
 (0)