18
18
package org .apache .spark .mllib .regression
19
19
20
20
import java .io .Serializable
21
+ import java .lang .{Double => JDouble }
21
22
import java .util .Arrays .binarySearch
22
23
23
24
import org .apache .spark .api .java .{JavaDoubleRDD , JavaRDD }
@@ -53,8 +54,9 @@ class IsotonicRegressionModel (
53
54
* @param testData Features to be labeled.
54
55
* @return Predicted labels.
55
56
*/
56
- def predict (testData : RDD [Double ]): RDD [Double ] =
57
+ def predict (testData : RDD [Double ]): RDD [Double ] = {
57
58
testData.map(predict)
59
+ }
58
60
59
61
/**
60
62
* Predict labels for provided features.
@@ -63,8 +65,9 @@ class IsotonicRegressionModel (
63
65
* @param testData Features to be labeled.
64
66
* @return Predicted labels.
65
67
*/
66
- def predict (testData : JavaDoubleRDD ): JavaDoubleRDD =
68
+ def predict (testData : JavaDoubleRDD ): JavaDoubleRDD = {
67
69
JavaDoubleRDD .fromRDD(predict(testData.rdd.asInstanceOf [RDD [Double ]]))
70
+ }
68
71
69
72
/**
70
73
* Predict a single label.
@@ -75,8 +78,8 @@ class IsotonicRegressionModel (
75
78
* If testData exactly matches a boundary then associated prediction is directly returned
76
79
* If testData is lower or higher than all boundaries
77
80
* then first or last prediction is returned respectively
78
- * If testData falls between two values in boundary then predictions is treated as piecewise
79
- * linear function and interpolated value is returned
81
+ * If testData falls between two values in boundary then predictions is treated
82
+ * as piecewise linear function and interpolated value is returned
80
83
*/
81
84
def predict (testData : Double ): Double = {
82
85
@@ -88,8 +91,8 @@ class IsotonicRegressionModel (
88
91
89
92
val normalisedInsertIndex = - insertIndex - 1
90
93
91
- // Find if the index was lower than all values,
92
- // higher than all values, inbetween two values or exact match.
94
+ // Find if the index was lower than all values,
95
+ // higher than all values, inbetween two values or exact match.
93
96
if (insertIndex == - 1 ) {
94
97
predictions.head
95
98
} else if (normalisedInsertIndex == boundaries.length){
@@ -121,37 +124,50 @@ class IsotonicRegressionModel (
121
124
* "An approach to parallelizing isotonic regression."
122
125
* Applied Mathematics and Parallel Computing. Physica-Verlag HD, 1996. 141-147.
123
126
*/
124
- class IsotonicRegression extends Serializable {
127
+ class IsotonicRegression private ( private var isotonic : Boolean ) extends Serializable {
125
128
126
129
/**
127
- * Run pool adjacent violators algorithm to obtain isotonic regression model.
130
+ * Constructs IsotonicRegression instance with default parameter isotonic = true
131
+ * @return New instance of IsotonicRegression
132
+ */
133
+ def this () = this (true )
134
+
135
+ /**
136
+ * Sets the isotonic parameter
137
+ * @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
138
+ * @return The instance of IsotonicRegression
139
+ */
140
+ def setIsotonic (isotonic : Boolean ): this .type = {
141
+ this .isotonic = isotonic
142
+ this
143
+ }
144
+
145
+ /**
146
+ * Run IsotonicRegression algorithm to obtain isotonic regression model.
128
147
*
129
148
* @param input RDD of tuples (label, feature, weight) where label is dependent variable
130
149
* for which we calculate isotonic regression, feature is independent variable
131
150
* and weight represents number of measures with default 1.
132
151
*
133
- * @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
134
152
* @return Isotonic regression model.
135
153
*/
136
- def run (
137
- input : RDD [(Double , Double , Double )],
138
- isotonic : Boolean ): IsotonicRegressionModel =
154
+ def run (input : RDD [(Double , Double , Double )]): IsotonicRegressionModel = {
139
155
createModel(parallelPoolAdjacentViolators(input, isotonic), isotonic)
156
+ }
140
157
141
- /**
158
+ /**
142
159
* Run pool adjacent violators algorithm to obtain isotonic regression model.
143
160
*
144
161
* @param input JavaRDD of tuples (label, feature, weight) where label is dependent variable
145
162
* for which we calculate isotonic regression, feature is independent variable
146
163
* and weight represents number of measures with default 1.
147
164
*
148
- * @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
149
165
* @return Isotonic regression model.
150
166
*/
151
167
def run (
152
- input : JavaRDD [(java.lang. Double , java.lang. Double , java.lang. Double )],
153
- isotonic : Boolean ) : IsotonicRegressionModel =
154
- run(input.rdd. asInstanceOf [ RDD [( Double , Double , Double )]], isotonic)
168
+ input : JavaRDD [(JDouble , JDouble , JDouble )]) : IsotonicRegressionModel = {
169
+ run(input.rdd. asInstanceOf [ RDD [( Double , Double , Double )]])
170
+ }
155
171
156
172
/**
157
173
* Creates isotonic regression model with given parameters.
@@ -164,11 +180,7 @@ class IsotonicRegression extends Serializable {
164
180
protected def createModel (
165
181
predictions : Array [(Double , Double , Double )],
166
182
isotonic : Boolean ): IsotonicRegressionModel = {
167
-
168
- val labels = predictions.map(_._1)
169
- val features = predictions.map(_._2)
170
-
171
- new IsotonicRegressionModel (features, labels)
183
+ new IsotonicRegressionModel (predictions.map(_._2), predictions.map(_._1))
172
184
}
173
185
174
186
/**
@@ -249,4 +261,4 @@ class IsotonicRegression extends Serializable {
249
261
250
262
poolAdjacentViolators(parallelStepResult.collect(), isotonic)
251
263
}
252
- }
264
+ }
0 commit comments