|
26 | 26 | __all__ = ['LogisticRegression', 'LogisticRegressionModel', 'DecisionTreeClassifier',
|
27 | 27 | 'DecisionTreeClassificationModel', 'GBTClassifier', 'GBTClassificationModel',
|
28 | 28 | 'RandomForestClassifier', 'RandomForestClassificationModel', 'NaiveBayes',
|
29 |
| - 'NaiveBayesModel'] |
| 29 | + 'NaiveBayesModel', 'MultilayerPerceptronClassifier', |
| 30 | + 'MultilayerPerceptronClassificationModel'] |
30 | 31 |
|
31 | 32 |
|
32 | 33 | @inherit_doc
|
@@ -755,6 +756,135 @@ def theta(self):
|
755 | 756 | return self._call_java("theta")
|
756 | 757 |
|
757 | 758 |
|
| 759 | +@inherit_doc |
| 760 | +class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, |
| 761 | + HasMaxIter, HasTol, HasSeed): |
| 762 | + """ |
| 763 | + Classifier trainer based on the Multilayer Perceptron. |
| 764 | + Each layer has sigmoid activation function, output layer has softmax. |
| 765 | + Number of inputs has to be equal to the size of feature vectors. |
| 766 | + Number of outputs has to be equal to the total number of labels. |
| 767 | +
|
| 768 | + >>> from pyspark.mllib.linalg import Vectors |
| 769 | + >>> df = sqlContext.createDataFrame([ |
| 770 | + ... (0.0, Vectors.dense([0.0, 0.0])), |
| 771 | + ... (1.0, Vectors.dense([0.0, 1.0])), |
| 772 | + ... (1.0, Vectors.dense([1.0, 0.0])), |
| 773 | + ... (0.0, Vectors.dense([1.0, 1.0]))], ["label", "features"]) |
| 774 | + >>> mlp = MultilayerPerceptronClassifier(maxIter=100, layers=[2, 5, 2], blockSize=1, seed=11) |
| 775 | + >>> model = mlp.fit(df) |
| 776 | + >>> model.layers |
| 777 | + [2, 5, 2] |
| 778 | + >>> model.weights.size |
| 779 | + 27 |
| 780 | + >>> testDF = sqlContext.createDataFrame([ |
| 781 | + ... (Vectors.dense([1.0, 0.0]),), |
| 782 | + ... (Vectors.dense([0.0, 0.0]),)], ["features"]) |
| 783 | + >>> model.transform(testDF).show() |
| 784 | + +---------+----------+ |
| 785 | + | features|prediction| |
| 786 | + +---------+----------+ |
| 787 | + |[1.0,0.0]| 1.0| |
| 788 | + |[0.0,0.0]| 0.0| |
| 789 | + +---------+----------+ |
| 790 | + ... |
| 791 | + """ |
| 792 | + |
| 793 | + # a placeholder to make it appear in the generated doc |
| 794 | + layers = Param(Params._dummy(), "layers", "Sizes of layers from input layer to output layer " + |
| 795 | + "E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with 100 " + |
| 796 | + "neurons and output layer of 10 neurons, default is [1, 1].") |
| 797 | + blockSize = Param(Params._dummy(), "blockSize", "Block size for stacking input data in " + |
| 798 | + "matrices. Data is stacked within partitions. If block size is more than " + |
| 799 | + "remaining data in a partition then it is adjusted to the size of this " + |
| 800 | + "data. Recommended size is between 10 and 1000, default is 128.") |
| 801 | + |
| 802 | + @keyword_only |
| 803 | + def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", |
| 804 | + maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128): |
| 805 | + """ |
| 806 | + __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ |
| 807 | + maxIter=100, tol=1e-4, seed=None, layers=[1, 1], blockSize=128) |
| 808 | + """ |
| 809 | + super(MultilayerPerceptronClassifier, self).__init__() |
| 810 | + self._java_obj = self._new_java_obj( |
| 811 | + "org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid) |
| 812 | + self.layers = Param(self, "layers", "Sizes of layers from input layer to output layer " + |
| 813 | + "E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with " + |
| 814 | + "100 neurons and output layer of 10 neurons, default is [1, 1].") |
| 815 | + self.blockSize = Param(self, "blockSize", "Block size for stacking input data in " + |
| 816 | + "matrices. Data is stacked within partitions. If block size is " + |
| 817 | + "more than remaining data in a partition then it is adjusted to " + |
| 818 | + "the size of this data. Recommended size is between 10 and 1000, " + |
| 819 | + "default is 128.") |
| 820 | + self._setDefault(maxIter=100, tol=1E-4, layers=[1, 1], blockSize=128) |
| 821 | + kwargs = self.__init__._input_kwargs |
| 822 | + self.setParams(**kwargs) |
| 823 | + |
| 824 | + @keyword_only |
| 825 | + def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", |
| 826 | + maxIter=100, tol=1e-4, seed=None, layers=None, blockSize=128): |
| 827 | + """ |
| 828 | + setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ |
| 829 | + maxIter=100, tol=1e-4, seed=None, layers=[1, 1], blockSize=128) |
| 830 | + Sets params for MultilayerPerceptronClassifier. |
| 831 | + """ |
| 832 | + kwargs = self.setParams._input_kwargs |
| 833 | + if layers is None: |
| 834 | + return self._set(**kwargs).setLayers([1, 1]) |
| 835 | + else: |
| 836 | + return self._set(**kwargs) |
| 837 | + |
| 838 | + def _create_model(self, java_model): |
| 839 | + return MultilayerPerceptronClassificationModel(java_model) |
| 840 | + |
| 841 | + def setLayers(self, value): |
| 842 | + """ |
| 843 | + Sets the value of :py:attr:`layers`. |
| 844 | + """ |
| 845 | + self._paramMap[self.layers] = value |
| 846 | + return self |
| 847 | + |
| 848 | + def getLayers(self): |
| 849 | + """ |
| 850 | + Gets the value of layers or its default value. |
| 851 | + """ |
| 852 | + return self.getOrDefault(self.layers) |
| 853 | + |
| 854 | + def setBlockSize(self, value): |
| 855 | + """ |
| 856 | + Sets the value of :py:attr:`blockSize`. |
| 857 | + """ |
| 858 | + self._paramMap[self.blockSize] = value |
| 859 | + return self |
| 860 | + |
| 861 | + def getBlockSize(self): |
| 862 | + """ |
| 863 | + Gets the value of blockSize or its default value. |
| 864 | + """ |
| 865 | + return self.getOrDefault(self.blockSize) |
| 866 | + |
| 867 | + |
| 868 | +class MultilayerPerceptronClassificationModel(JavaModel): |
| 869 | + """ |
| 870 | + Model fitted by MultilayerPerceptronClassifier. |
| 871 | + """ |
| 872 | + |
| 873 | + @property |
| 874 | + def layers(self): |
| 875 | + """ |
| 876 | + array of layer sizes including input and output layers. |
| 877 | + """ |
| 878 | + return self._call_java("javaLayers") |
| 879 | + |
| 880 | + @property |
| 881 | + def weights(self): |
| 882 | + """ |
| 883 | + vector of initial weights for the model that consists of the weights of layers. |
| 884 | + """ |
| 885 | + return self._call_java("weights") |
| 886 | + |
| 887 | + |
758 | 888 | if __name__ == "__main__":
|
759 | 889 | import doctest
|
760 | 890 | from pyspark.context import SparkContext
|
|
0 commit comments