@@ -20,6 +20,7 @@ package org.apache.spark.mllib.regression
20
20
import org .scalatest .{Matchers , FunSuite }
21
21
22
22
import org .apache .spark .mllib .util .MLlibTestSparkContext
23
+ import org .apache .spark .mllib .util .TestingUtils ._
23
24
24
25
class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with Matchers {
25
26
@@ -28,15 +29,13 @@ class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with M
28
29
}
29
30
30
31
private def generateIsotonicInput (labels : Seq [Double ]): Seq [(Double , Double , Double )] = {
31
- labels.zip( 1 to labels.size).map(point => (point._1, point._2 .toDouble, 1d ))
32
+ Seq .tabulate( labels.size)(i => (labels(i), i .toDouble, 1d ))
32
33
}
33
34
34
35
private def generateIsotonicInput (
35
36
labels : Seq [Double ],
36
37
weights : Seq [Double ]): Seq [(Double , Double , Double )] = {
37
- labels.zip(1 to labels.size)
38
- .zip(weights)
39
- .map(point => (point._1._1, point._1._2.toDouble, point._2))
38
+ Seq .tabulate(labels.size)(i => (labels(i), i.toDouble, weights(i)))
40
39
}
41
40
42
41
private def runIsotonicRegression (
@@ -54,9 +53,24 @@ class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with M
54
53
}
55
54
56
55
test(" increasing isotonic regression" ) {
57
- val model = runIsotonicRegression(Seq (1 , 2 , 3 , 3 , 1 , 6 , 17 , 16 , 17 , 18 ), true )
56
+ /*
57
+ The following result could be re-produced with sklearn.
58
58
59
- assert(model.predictions === Array (1 , 2 , 7d / 3 , 7d / 3 , 7d / 3 , 6 , 16.5 , 16.5 , 17 , 18 ))
59
+ > from sklearn.isotonic import IsotonicRegression
60
+ > x = range(9)
61
+ > y = [1, 2, 3, 1, 6, 17, 16, 17, 18]
62
+ > ir = IsotonicRegression(x, y)
63
+ > print ir.predict(x)
64
+
65
+ array([ 1. , 2. , 2. , 2. , 6. , 16.5, 16.5, 17. , 18. ])
66
+ */
67
+ val model = runIsotonicRegression(Seq (1 , 2 , 3 , 1 , 6 , 17 , 16 , 17 , 18 ), true )
68
+
69
+ assert(Array .tabulate(9 )(x => model.predict(x)) === Array (1 , 2 , 2 , 2 , 6 , 16.5 , 16.5 , 17 , 18 ))
70
+
71
+ assert(model.boundaries === Array (0 , 1 , 3 , 4 , 5 , 6 , 7 , 8 ))
72
+ assert(model.predictions === Array (1 , 2 , 2 , 6 , 16.5 , 16.5 , 17.0 , 18.0 ))
73
+ assert(model.isotonic)
60
74
}
61
75
62
76
test(" isotonic regression with size 0" ) {
@@ -80,74 +94,82 @@ class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with M
80
94
test(" isotonic regression strictly decreasing sequence" ) {
81
95
val model = runIsotonicRegression(Seq (5 , 4 , 3 , 2 , 1 ), true )
82
96
83
- assert(model.predictions === Array (3 , 3 , 3 , 3 , 3 ))
97
+ assert(model.boundaries === Array (0 , 4 ))
98
+ assert(model.predictions === Array (3 , 3 ))
84
99
}
85
100
86
101
test(" isotonic regression with last element violating monotonicity" ) {
87
102
val model = runIsotonicRegression(Seq (1 , 2 , 3 , 4 , 2 ), true )
88
103
89
- assert(model.predictions === Array (1 , 2 , 3 , 3 , 3 ))
104
+ assert(model.boundaries === Array (0 , 1 , 2 , 4 ))
105
+ assert(model.predictions === Array (1 , 2 , 3 , 3 ))
90
106
}
91
107
92
108
test(" isotonic regression with first element violating monotonicity" ) {
93
109
val model = runIsotonicRegression(Seq (4 , 2 , 3 , 4 , 5 ), true )
94
110
95
- assert(model.predictions === Array (3 , 3 , 3 , 4 , 5 ))
111
+ assert(model.boundaries === Array (0 , 2 , 3 , 4 ))
112
+ assert(model.predictions === Array (3 , 3 , 4 , 5 ))
96
113
}
97
114
98
115
test(" isotonic regression with negative labels" ) {
99
116
val model = runIsotonicRegression(Seq (- 1 , - 2 , 0 , 1 , - 1 ), true )
100
117
101
- assert(model.predictions === Array (- 1.5 , - 1.5 , 0 , 0 , 0 ))
118
+ assert(model.boundaries === Array (0 , 1 , 2 , 4 ))
119
+ assert(model.predictions === Array (- 1.5 , - 1.5 , 0 , 0 ))
102
120
}
103
121
104
122
test(" isotonic regression with unordered input" ) {
105
- val trainRDD = sc.parallelize(generateIsotonicInput(Seq (1 , 2 , 3 , 4 , 5 )).reverse).cache()
106
- val model = new IsotonicRegression ().run(trainRDD)
123
+ val trainRDD = sc.parallelize(generateIsotonicInput(Seq (1 , 2 , 3 , 4 , 5 )).reverse, 2 ).cache()
107
124
125
+ val model = new IsotonicRegression ().run(trainRDD)
108
126
assert(model.predictions === Array (1 , 2 , 3 , 4 , 5 ))
109
127
}
110
128
111
129
test(" weighted isotonic regression" ) {
112
130
val model = runIsotonicRegression(Seq (1 , 2 , 3 , 4 , 2 ), Seq (1 , 1 , 1 , 1 , 2 ), true )
113
131
114
- assert(model.predictions === Array (1 , 2 , 2.75 , 2.75 ,2.75 ))
132
+ assert(model.boundaries === Array (0 , 1 , 2 , 4 ))
133
+ assert(model.predictions === Array (1 , 2 , 2.75 , 2.75 ))
115
134
}
116
135
117
136
test(" weighted isotonic regression with weights lower than 1" ) {
118
137
val model = runIsotonicRegression(Seq (1 , 2 , 3 , 2 , 1 ), Seq (1 , 1 , 1 , 0.1 , 0.1 ), true )
119
138
120
- assert(model.predictions.map(round) === Array (1 , 2 , 3.3 / 1.2 , 3.3 / 1.2 , 3.3 / 1.2 ))
139
+ assert(model.boundaries === Array (0 , 1 , 2 , 4 ))
140
+ assert(model.predictions.map(round) === Array (1 , 2 , 3.3 / 1.2 , 3.3 / 1.2 ))
121
141
}
122
142
123
143
test(" weighted isotonic regression with negative weights" ) {
124
144
val model = runIsotonicRegression(Seq (1 , 2 , 3 , 2 , 1 ), Seq (- 1 , 1 , - 3 , 1 , - 5 ), true )
125
145
126
- assert(model.predictions === Array (1.0 , 10.0 / 6 , 10.0 / 6 , 10.0 / 6 , 10.0 / 6 ))
146
+ assert(model.boundaries === Array (0.0 , 1.0 , 4.0 ))
147
+ assert(model.predictions === Array (1.0 , 10.0 / 6 , 10.0 / 6 ))
127
148
}
128
149
129
150
test(" weighted isotonic regression with zero weights" ) {
130
151
val model = runIsotonicRegression(Seq [Double ](1 , 2 , 3 , 2 , 1 ), Seq [Double ](0 , 0 , 0 , 1 , 0 ), true )
131
152
132
- assert(model.predictions === Array (1 , 2 , 2 , 2 , 2 ))
153
+ assert(model.boundaries === Array (0.0 , 1.0 , 4.0 ))
154
+ assert(model.predictions === Array (1 , 2 , 2 ))
133
155
}
134
156
135
157
test(" isotonic regression prediction" ) {
136
158
val model = runIsotonicRegression(Seq (1 , 2 , 7 , 1 , 2 ), true )
137
159
160
+ assert(model.predict(- 2 ) === 1 )
138
161
assert(model.predict(- 1 ) === 1 )
139
- assert(model.predict(0 ) === 1 )
140
- assert(model.predict(1.5 ) === 1.5 )
141
- assert(model.predict(1.75 ) === 1.75 )
142
- assert(model.predict(2 ) === 2 )
143
- assert(model.predict(3 ) === 10d / 3 )
144
- assert(model.predict(10 ) === 10d / 3 )
162
+ assert(model.predict(0.5 ) === 1.5 )
163
+ assert(model.predict(0.75 ) === 1.75 )
164
+ assert(model.predict(1 ) === 2 )
165
+ assert(model.predict(2 ) === 10d / 3 )
166
+ assert(model.predict(9 ) === 10d / 3 )
145
167
}
146
168
147
169
test(" isotonic regression prediction with duplicate features" ) {
148
170
val trainRDD = sc.parallelize(
149
171
Seq [(Double , Double , Double )](
150
- (2 , 1 , 1 ), (1 , 1 , 1 ), (4 , 2 , 1 ), (2 , 2 , 1 ), (6 , 3 , 1 ), (5 , 3 , 1 ))).cache()
172
+ (2 , 1 , 1 ), (1 , 1 , 1 ), (4 , 2 , 1 ), (2 , 2 , 1 ), (6 , 3 , 1 ), (5 , 3 , 1 )), 2 ).cache()
151
173
val model = new IsotonicRegression ().run(trainRDD)
152
174
153
175
assert(model.predict(0 ) === 1 )
@@ -159,7 +181,7 @@ class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with M
159
181
test(" antitonic regression prediction with duplicate features" ) {
160
182
val trainRDD = sc.parallelize(
161
183
Seq [(Double , Double , Double )](
162
- (5 , 1 , 1 ), (6 , 1 , 1 ), (2 , 2 , 1 ), (4 , 2 , 1 ), (1 , 3 , 1 ), (2 , 3 , 1 ))).cache()
184
+ (5 , 1 , 1 ), (6 , 1 , 1 ), (2 , 2 , 1 ), (4 , 2 , 1 ), (1 , 3 , 1 ), (2 , 3 , 1 )), 2 ).cache()
163
185
val model = new IsotonicRegression ().setIsotonic(false ).run(trainRDD)
164
186
165
187
assert(model.predict(0 ) === 6 )
@@ -170,20 +192,50 @@ class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with M
170
192
171
193
test(" isotonic regression RDD prediction" ) {
172
194
val model = runIsotonicRegression(Seq (1 , 2 , 7 , 1 , 2 ), true )
173
- val testRDD = sc.parallelize(List (- 1.0 , 0.0 , 1.5 , 1.75 , 2.0 , 3.0 , 10.0 )).cache()
174
195
175
- assert(model.predict(testRDD).collect() === Array (1 , 1 , 1.5 , 1.75 , 2 , 10.0 / 3 , 10.0 / 3 ))
196
+ val testRDD = sc.parallelize(List (- 2.0 , - 1.0 , 0.5 , 0.75 , 1.0 , 2.0 , 9.0 ), 2 ).cache()
197
+ val predictions = testRDD.map(x => (x, model.predict(x))).collect().sortBy(_._1).map(_._2)
198
+ assert(predictions === Array (1 , 1 , 1.5 , 1.75 , 2 , 10.0 / 3 , 10.0 / 3 ))
176
199
}
177
200
178
201
test(" antitonic regression prediction" ) {
179
202
val model = runIsotonicRegression(Seq (7 , 5 , 3 , 5 , 1 ), false )
180
203
204
+ assert(model.predict(- 2 ) === 7 )
181
205
assert(model.predict(- 1 ) === 7 )
182
- assert(model.predict(0 ) === 7 )
183
- assert(model.predict(1.5 ) === 6 )
184
- assert(model.predict(1.75 ) === 5.5 )
185
- assert(model.predict(2 ) === 5 )
186
- assert(model.predict(3 ) === 4 )
187
- assert(model.predict(10 ) === 1 )
188
- }
189
- }
206
+ assert(model.predict(0.5 ) === 6 )
207
+ assert(model.predict(0.75 ) === 5.5 )
208
+ assert(model.predict(1 ) === 5 )
209
+ assert(model.predict(2 ) === 4 )
210
+ assert(model.predict(9 ) === 1 )
211
+ }
212
+
213
+ test(" model construction" ) {
214
+ val model = new IsotonicRegressionModel (Array (0.0 , 1.0 ), Array (1.0 , 2.0 ), isotonic = true )
215
+ assert(model.predict(- 0.5 ) === 1.0 )
216
+ assert(model.predict(0.0 ) === 1.0 )
217
+ assert(model.predict(0.5 ) ~== 1.5 absTol 1e-14 )
218
+ assert(model.predict(1.0 ) === 2.0 )
219
+ assert(model.predict(1.5 ) === 2.0 )
220
+
221
+ intercept[IllegalArgumentException ] {
222
+ // different array sizes.
223
+ new IsotonicRegressionModel (Array (0.0 , 1.0 ), Array (1.0 ), isotonic = true )
224
+ }
225
+
226
+ intercept[IllegalArgumentException ] {
227
+ // unordered boundaries
228
+ new IsotonicRegressionModel (Array (1.0 , 0.0 ), Array (1.0 , 2.0 ), isotonic = true )
229
+ }
230
+
231
+ intercept[IllegalArgumentException ] {
232
+ // unordered predictions (isotonic)
233
+ new IsotonicRegressionModel (Array (0.0 , 1.0 ), Array (2.0 , 1.0 ), isotonic = true )
234
+ }
235
+
236
+ intercept[IllegalArgumentException ] {
237
+ // unordered predictions (antitonic)
238
+ new IsotonicRegressionModel (Array (0.0 , 1.0 ), Array (1.0 , 2.0 ), isotonic = false )
239
+ }
240
+ }
241
+ }
0 commit comments