@@ -774,6 +774,125 @@ def theta(self):
774
774
return self ._call_java ("theta" )
775
775
776
776
777
+ @inherit_doc
778
+ class MultilayerPerceptronClassifier (JavaEstimator , HasFeaturesCol , HasLabelCol , HasPredictionCol ,
779
+ HasMaxIter , HasTol , HasSeed ):
780
+ """
781
+ Classifier trainer based on the Multilayer Perceptron.
782
+ Each layer has sigmoid activation function, output layer has softmax.
783
+ Number of inputs has to be equal to the size of feature vectors.
784
+ Number of outputs has to be equal to the total number of labels.
785
+
786
+ >>> from pyspark.sql import Row
787
+ >>> from pyspark.mllib.linalg import Vectors
788
+ >>> df = sc.parallelize([
789
+ ... Row(label=0.0, features=Vectors.dense([0.0, 0.0])),
790
+ ... Row(label=1.0, features=Vectors.dense([0.0, 1.0])),
791
+ ... Row(label=1.0, features=Vectors.dense([1.0, 0.0])),
792
+ ... Row(label=0.0, features=Vectors.dense([1.0, 1.0]))]).toDF()
793
+ >>> layers = [2, 5, 2]
794
+ >>> lr = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=1, seed=11)
795
+ >>> model = lr.fit(df)
796
+ >>> test0 = sc.parallelize([Row(features=Vectors.dense([1.0, 0.0]))]).toDF()
797
+ >>> model.transform(test0).head().prediction
798
+ 1.0
799
+ >>> test1 = sc.parallelize([Row(features=Vectors.dense([0.0, 0.0]))]).toDF()
800
+ >>> model.transform(test1).head().prediction
801
+ 0.0
802
+ """
803
+
804
+ # a placeholder to make it appear in the generated doc
805
+ layers = Param (Params ._dummy (), "layers" , "Sizes of layers from input layer to output layer " +
806
+ "E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with 100 " +
807
+ "neurons and output layer of 10 neurons, default is [1, 1]." )
808
+ blockSize = Param (Params ._dummy (), "blockSize" , "Block size for stacking input data in " +
809
+ "matrices. Data is stacked within partitions. If block size is more than " +
810
+ "remaining data in a partition then it is adjusted to the size of this " +
811
+ "data. Recommended size is between 10 and 1000, default is 128." )
812
+
813
+ @keyword_only
814
+ def __init__ (self , featuresCol = "features" , labelCol = "label" , predictionCol = "prediction" ,
815
+ maxIter = 100 , tol = 1e-4 , seed = None , layers = [1 , 1 ], blockSize = 128 ):
816
+ """
817
+ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
818
+ maxIter=100, tol=1e-4, seed=None, layers=[1, 1], blockSize=128)
819
+ """
820
+ super (MultilayerPerceptronClassifier , self ).__init__ ()
821
+ self ._java_obj = self ._new_java_obj (
822
+ "org.apache.spark.ml.classification.MultilayerPerceptronClassifier" , self .uid )
823
+ self .layers = Param (self , "layers" , "Sizes of layers from input layer to output layer " +
824
+ "E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with " +
825
+ "100 neurons and output layer of 10 neurons, default is [1, 1]." )
826
+ self .blockSize = Param (self , "blockSize" , "Block size for stacking input data in " +
827
+ "matrices. Data is stacked within partitions. If block size is " +
828
+ "more than remaining data in a partition then it is adjusted to " +
829
+ "the size of this data. Recommended size is between 10 and 1000, " +
830
+ "default is 128." )
831
+ self ._setDefault (maxIter = 100 , tol = 1E-4 , layers = [1 , 1 ], blockSize = 128 )
832
+ kwargs = self .__init__ ._input_kwargs
833
+ self .setParams (** kwargs )
834
+
835
+ @keyword_only
836
+ def setParams (self , featuresCol = "features" , labelCol = "label" , predictionCol = "prediction" ,
837
+ maxIter = 100 , tol = 1e-4 , seed = None , layers = [1 , 1 ], blockSize = 128 ):
838
+ """
839
+ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
840
+ maxIter=100, tol=1e-4, seed=None, layers=[1, 1], blockSize=128)
841
+ Sets params for MultilayerPerceptronClassifier.
842
+ """
843
+ kwargs = self .setParams ._input_kwargs
844
+ return self ._set (** kwargs )
845
+
846
+ def _create_model (self , java_model ):
847
+ return MultilayerPerceptronClassifierModel (java_model )
848
+
849
+ def setLayers (self , value ):
850
+ """
851
+ Sets the value of :py:attr:`layers`.
852
+ """
853
+ self ._paramMap [self .layers ] = value
854
+ return self
855
+
856
+ def getLayers (self ):
857
+ """
858
+ Gets the value of layers or its default value.
859
+ """
860
+ return self .getOrDefault (self .layers )
861
+
862
+ def setBlockSize (self , value ):
863
+ """
864
+ Sets the value of :py:attr:`blockSize`.
865
+ """
866
+ self ._paramMap [self .blockSize ] = value
867
+ return self
868
+
869
+ def getBlockSize (self ):
870
+ """
871
+ Gets the value of blockSize or its default value.
872
+ """
873
+ return self .getOrDefault (self .blockSize )
874
+
875
+
876
+ class MultilayerPerceptronClassifierModel (JavaModel ):
877
+ """
878
+ Model fitted by MultilayerPerceptronClassifier.
879
+ """
880
+
881
+ @property
882
+ def layers (self ):
883
+ """
884
+ array of layer sizes including input and output layers.
885
+ """
886
+ return self ._call_java ("layers" )
887
+
888
+ @property
889
+ def weights (self ):
890
+ """
891
+ vector of initial weights for the model that consists of the weights of layers.
892
+ """
893
+ return self ._call_java ("weights" )
894
+
895
+
777
896
if __name__ == "__main__" :
778
897
import doctest
779
898
from pyspark .context import SparkContext
0 commit comments