18
18
package org .apache .spark .mllib .regression
19
19
20
20
import org .apache .spark .mllib .linalg .Vector
21
- import org .apache .spark .mllib .regression .MonotonicityConstraint .MonotonicityConstraint ._
22
21
import org .apache .spark .rdd .RDD
23
22
24
- /**
25
- * Monotonicity constrains for monotone regression
26
- * Isotonic (increasing)
27
- * Antitonic (decreasing)
28
- */
29
- object MonotonicityConstraint {
30
-
31
- object MonotonicityConstraint {
32
-
33
- sealed trait MonotonicityConstraint {
34
- private [regression] def holds (
35
- current : WeightedLabeledPoint ,
36
- next : WeightedLabeledPoint ): Boolean
37
- }
38
-
39
- /**
40
- * Isotonic monotonicity constraint. Increasing sequence
41
- */
42
- case object Isotonic extends MonotonicityConstraint {
43
- override def holds (current : WeightedLabeledPoint , next : WeightedLabeledPoint ): Boolean = {
44
- current.label <= next.label
45
- }
46
- }
47
-
48
- /**
49
- * Antitonic monotonicity constrain. Decreasing sequence
50
- */
51
- case object Antitonic extends MonotonicityConstraint {
52
- override def holds (current : WeightedLabeledPoint , next : WeightedLabeledPoint ): Boolean = {
53
- current.label >= next.label
54
- }
55
- }
56
- }
57
-
58
- val Isotonic = MonotonicityConstraint .Isotonic
59
- val Antitonic = MonotonicityConstraint .Antitonic
60
- }
61
-
62
23
/**
63
24
* Regression model for Isotonic regression
64
25
*
65
26
* @param predictions Weights computed for every feature.
66
- * @param monotonicityConstraint specifies if the sequence is increasing or decreasing
27
+ * @param isotonic isotonic ( increasing) or antitonic ( decreasing) sequence
67
28
*/
68
29
class IsotonicRegressionModel (
69
30
val predictions : Seq [(Double , Double , Double )],
70
- val monotonicityConstraint : MonotonicityConstraint )
31
+ val isotonic : Boolean )
71
32
extends RegressionModel {
72
33
73
34
override def predict (testData : RDD [Vector ]): RDD [Double ] =
@@ -91,23 +52,23 @@ trait IsotonicRegressionAlgorithm
91
52
*
92
53
* @param predictions labels estimated using isotonic regression algorithm.
93
54
* Used for predictions on new data points.
94
- * @param monotonicityConstraint isotonic or antitonic
55
+ * @param isotonic isotonic (increasing) or antitonic (decreasing) sequence
95
56
* @return isotonic regression model
96
57
*/
97
58
protected def createModel (
98
59
predictions : Seq [(Double , Double , Double )],
99
- monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel
60
+ isotonic : Boolean ): IsotonicRegressionModel
100
61
101
62
/**
102
63
* Run algorithm to obtain isotonic regression model
103
64
*
104
65
* @param input data
105
- * @param monotonicityConstraint ascending or descenting
66
+ * @param isotonic isotonic (increasing) or antitonic (decreasing) sequence
106
67
* @return isotonic regression model
107
68
*/
108
69
def run (
109
70
input : RDD [(Double , Double , Double )],
110
- monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel
71
+ isotonic : Boolean ): IsotonicRegressionModel
111
72
}
112
73
113
74
/**
@@ -118,16 +79,16 @@ class PoolAdjacentViolators private [mllib]
118
79
119
80
override def run (
120
81
input : RDD [(Double , Double , Double )],
121
- monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel = {
82
+ isotonic : Boolean ): IsotonicRegressionModel = {
122
83
createModel(
123
- parallelPoolAdjacentViolators(input, monotonicityConstraint ),
124
- monotonicityConstraint )
84
+ parallelPoolAdjacentViolators(input, isotonic ),
85
+ isotonic )
125
86
}
126
87
127
88
override protected def createModel (
128
89
predictions : Seq [(Double , Double , Double )],
129
- monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel = {
130
- new IsotonicRegressionModel (predictions, monotonicityConstraint )
90
+ isotonic : Boolean ): IsotonicRegressionModel = {
91
+ new IsotonicRegressionModel (predictions, isotonic )
131
92
}
132
93
133
94
/**
@@ -138,32 +99,38 @@ class PoolAdjacentViolators private [mllib]
138
99
* Method in situ mutates input array
139
100
*
140
101
* @param in input data
141
- * @param monotonicityConstraint asc or desc
102
+ * @param isotonic asc or desc
142
103
* @return result
143
104
*/
144
105
private def poolAdjacentViolators (
145
- in : Array [WeightedLabeledPoint ],
146
- monotonicityConstraint : MonotonicityConstraint ): Array [WeightedLabeledPoint ] = {
106
+ in : Array [( Double , Double , Double ) ],
107
+ isotonic : Boolean ): Array [( Double , Double , Double ) ] = {
147
108
148
109
// Pools sub array within given bounds assigning weighted average value to all elements
149
- def pool (in : Array [WeightedLabeledPoint ], start : Int , end : Int ): Unit = {
110
+ def pool (in : Array [( Double , Double , Double ) ], start : Int , end : Int ): Unit = {
150
111
val poolSubArray = in.slice(start, end + 1 )
151
112
152
- val weightedSum = poolSubArray.map(lp => lp.label * lp.weight ).sum
153
- val weight = poolSubArray.map(_.weight ).sum
113
+ val weightedSum = poolSubArray.map(lp => lp._1 * lp._3 ).sum
114
+ val weight = poolSubArray.map(_._3 ).sum
154
115
155
116
for (i <- start to end) {
156
- in(i) = WeightedLabeledPoint (weightedSum / weight, in(i).features , in(i).weight )
117
+ in(i) = (weightedSum / weight, in(i)._2 , in(i)._3 )
157
118
}
158
119
}
159
120
160
121
var i = 0
161
122
123
+ val monotonicityConstrainter : (Double , Double ) => Boolean = (x, y) => if (isotonic) {
124
+ x <= y
125
+ } else {
126
+ x >= y
127
+ }
128
+
162
129
while (i < in.length) {
163
130
var j = i
164
131
165
132
// Find monotonicity violating sequence, if any
166
- while (j < in.length - 1 && ! monotonicityConstraint.holds (in(j), in(j + 1 ))) {
133
+ while (j < in.length - 1 && ! monotonicityConstrainter (in(j)._1 , in(j + 1 )._1 )) {
167
134
j = j + 1
168
135
}
169
136
@@ -173,7 +140,7 @@ class PoolAdjacentViolators private [mllib]
173
140
} else {
174
141
// Otherwise pool the violating sequence
175
142
// And check if pooling caused monotonicity violation in previously processed points
176
- while (i >= 0 && ! monotonicityConstraint.holds (in(i), in(i + 1 ))) {
143
+ while (i >= 0 && ! monotonicityConstrainter (in(i)._1 , in(i + 1 )._1 )) {
177
144
pool(in, i, j)
178
145
i = i - 1
179
146
}
@@ -190,19 +157,19 @@ class PoolAdjacentViolators private [mllib]
190
157
* Calls Pool adjacent violators on each partition and then again on the result
191
158
*
192
159
* @param testData input
193
- * @param monotonicityConstraint asc or desc
160
+ * @param isotonic isotonic (increasing) or antitonic (decreasing) sequence
194
161
* @return result
195
162
*/
196
163
private def parallelPoolAdjacentViolators (
197
164
testData : RDD [(Double , Double , Double )],
198
- monotonicityConstraint : MonotonicityConstraint ): Seq [(Double , Double , Double )] = {
165
+ isotonic : Boolean ): Seq [(Double , Double , Double )] = {
199
166
200
167
poolAdjacentViolators(
201
168
testData
202
169
.sortBy(_._2)
203
170
.cache()
204
- .mapPartitions(it => poolAdjacentViolators(it.toArray, monotonicityConstraint ).toIterator)
205
- .collect(), monotonicityConstraint )
171
+ .mapPartitions(it => poolAdjacentViolators(it.toArray, isotonic ).toIterator)
172
+ .collect(), isotonic )
206
173
}
207
174
}
208
175
@@ -221,11 +188,11 @@ object IsotonicRegression {
221
188
* Each point describes a row of the data
222
189
* matrix A as well as the corresponding right hand side label y
223
190
* and weight as number of measurements
224
- * @param monotonicityConstraint Isotonic (increasing) or Antitonic (decreasing) sequence
191
+ * @param isotonic isotonic (increasing) or antitonic (decreasing) sequence
225
192
*/
226
193
def train (
227
194
input : RDD [(Double , Double , Double )],
228
- monotonicityConstraint : MonotonicityConstraint = Isotonic ): IsotonicRegressionModel = {
229
- new PoolAdjacentViolators ().run(input, monotonicityConstraint )
195
+ isotonic : Boolean = true ): IsotonicRegressionModel = {
196
+ new PoolAdjacentViolators ().run(input, isotonic )
230
197
}
231
198
}
0 commit comments