17
17
18
18
package org .apache .spark .mllib .regression
19
19
20
- import org .apache .spark .mllib .linalg .Vector
20
+ import org .apache .spark .mllib .linalg .{Vectors , Vector }
21
+ import org .apache .spark .mllib .regression .MonotonicityConstraint .Enum .MonotonicityConstraint
21
22
import org .apache .spark .rdd .RDD
22
23
23
- sealed trait MonotonicityConstraint {
24
- def holds (current : LabeledPoint , next : LabeledPoint ): Boolean
25
- }
24
+ object MonotonicityConstraint {
26
25
27
- case object Isotonic extends MonotonicityConstraint {
28
- override def holds (current : LabeledPoint , next : LabeledPoint ): Boolean = {
29
- current.label <= next.label
30
- }
31
- }
32
- case object Antitonic extends MonotonicityConstraint {
33
- override def holds (current : LabeledPoint , next : LabeledPoint ): Boolean = {
34
- current.label >= next.label
26
+ object Enum {
27
+
28
+ sealed trait MonotonicityConstraint {
29
+ private [regression] def holds (current : WeightedLabeledPoint , next : WeightedLabeledPoint ): Boolean
30
+ }
31
+
32
+ case object Isotonic extends MonotonicityConstraint {
33
+ override def holds (current : WeightedLabeledPoint , next : WeightedLabeledPoint ): Boolean = {
34
+ current.label <= next.label
35
+ }
36
+ }
37
+
38
+ case object Antitonic extends MonotonicityConstraint {
39
+ override def holds (current : WeightedLabeledPoint , next : WeightedLabeledPoint ): Boolean = {
40
+ current.label >= next.label
41
+ }
42
+ }
35
43
}
44
+
45
+ val Isotonic = Enum .Isotonic
46
+ val Antitonic = Enum .Antitonic
36
47
}
37
48
38
49
/**
@@ -41,9 +52,10 @@ case object Antitonic extends MonotonicityConstraint {
41
52
* @param predictions Weights computed for every feature.
42
53
*/
43
54
class IsotonicRegressionModel (
44
- val predictions : Seq [LabeledPoint ],
55
+ val predictions : Seq [WeightedLabeledPoint ],
45
56
val monotonicityConstraint : MonotonicityConstraint )
46
57
extends RegressionModel {
58
+
47
59
override def predict (testData : RDD [Vector ]): RDD [Double ] =
48
60
testData.map(predict)
49
61
@@ -60,7 +72,7 @@ trait IsotonicRegressionAlgorithm
60
72
extends Serializable {
61
73
62
74
protected def createModel (
63
- weights : Seq [LabeledPoint ],
75
+ weights : Seq [WeightedLabeledPoint ],
64
76
monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel
65
77
66
78
/**
@@ -70,47 +82,47 @@ trait IsotonicRegressionAlgorithm
70
82
* @return model
71
83
*/
72
84
def run (
73
- input : RDD [LabeledPoint ],
85
+ input : RDD [WeightedLabeledPoint ],
74
86
monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel
75
87
76
88
/**
77
89
* Run algorithm to obtain isotonic regression model
78
90
* @param input data
79
- * @param initialWeights weights
80
91
* @param monotonicityConstraint asc or desc
92
+ * @param weights weights
81
93
* @return
82
94
*/
83
95
def run (
84
- input : RDD [LabeledPoint ],
85
- initialWeights : Vector ,
86
- monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel
96
+ input : RDD [WeightedLabeledPoint ],
97
+ monotonicityConstraint : MonotonicityConstraint ,
98
+ weights : Vector ): IsotonicRegressionModel
87
99
}
88
100
89
101
class PoolAdjacentViolators extends IsotonicRegressionAlgorithm {
90
102
91
103
override def run (
92
- input : RDD [LabeledPoint ],
104
+ input : RDD [WeightedLabeledPoint ],
93
105
monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel = {
94
106
createModel(
95
- parallelPoolAdjacentViolators(input, monotonicityConstraint),
107
+ parallelPoolAdjacentViolators(input, monotonicityConstraint, Vectors .dense( Array ( 0d )) ),
96
108
monotonicityConstraint)
97
109
}
98
110
99
111
override def run (
100
- input : RDD [LabeledPoint ],
101
- initialWeights : Vector ,
102
- monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel = {
103
- ???
112
+ input : RDD [WeightedLabeledPoint ],
113
+ monotonicityConstraint : MonotonicityConstraint ,
114
+ weights : Vector ): IsotonicRegressionModel = {
115
+ createModel(
116
+ parallelPoolAdjacentViolators(input, monotonicityConstraint, weights),
117
+ monotonicityConstraint)
104
118
}
105
119
106
120
override protected def createModel (
107
- weights : Seq [LabeledPoint ],
121
+ predictions : Seq [WeightedLabeledPoint ],
108
122
monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel = {
109
- new IsotonicRegressionModel (weights , monotonicityConstraint)
123
+ new IsotonicRegressionModel (predictions , monotonicityConstraint)
110
124
}
111
125
112
-
113
-
114
126
/**
115
127
* Performs a pool adjacent violators algorithm (PAVA)
116
128
* Uses approach with single processing of data where violators in previously processed
@@ -123,18 +135,18 @@ class PoolAdjacentViolators extends IsotonicRegressionAlgorithm {
123
135
* @return result
124
136
*/
125
137
private def poolAdjacentViolators (
126
- in : Array [LabeledPoint ],
127
- monotonicityConstraint : MonotonicityConstraint ): Array [LabeledPoint ] = {
138
+ in : Array [WeightedLabeledPoint ],
139
+ monotonicityConstraint : MonotonicityConstraint ): Array [WeightedLabeledPoint ] = {
128
140
129
141
// Pools sub array within given bounds assigning weighted average value to all elements
130
- def pool (in : Array [LabeledPoint ], start : Int , end : Int ): Unit = {
142
+ def pool (in : Array [WeightedLabeledPoint ], start : Int , end : Int ): Unit = {
131
143
val poolSubArray = in.slice(start, end + 1 )
132
144
133
- val weightedSum = poolSubArray.map(_ .label).sum
134
- val weight = poolSubArray.length
145
+ val weightedSum = poolSubArray.map(lp => lp .label * lp.weight ).sum
146
+ val weight = poolSubArray.map(_.weight).sum
135
147
136
148
for (i <- start to end) {
137
- in(i) = LabeledPoint (weightedSum / weight, in(i).features)
149
+ in(i) = WeightedLabeledPoint (weightedSum / weight, in(i).features, in(i).weight )
138
150
}
139
151
}
140
152
@@ -175,8 +187,9 @@ class PoolAdjacentViolators extends IsotonicRegressionAlgorithm {
175
187
* @return result
176
188
*/
177
189
private def parallelPoolAdjacentViolators (
178
- testData : RDD [LabeledPoint ],
179
- monotonicityConstraint : MonotonicityConstraint ): Seq [LabeledPoint ] = {
190
+ testData : RDD [WeightedLabeledPoint ],
191
+ monotonicityConstraint : MonotonicityConstraint ,
192
+ weights : Vector ): Seq [WeightedLabeledPoint ] = {
180
193
181
194
poolAdjacentViolators(
182
195
testData
@@ -200,14 +213,14 @@ object IsotonicRegression {
200
213
*
201
214
* @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
202
215
* matrix A as well as the corresponding right hand side label y
203
- * @param initialWeights Initial set of weights to be used. Array should be equal in size to
216
+ * @param weights Initial set of weights to be used. Array should be equal in size to
204
217
* the number of features in the data.
205
218
*/
206
219
def train (
207
- input : RDD [LabeledPoint ],
208
- initialWeights : Vector ,
209
- monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel = {
210
- new PoolAdjacentViolators ().run(input, initialWeights, monotonicityConstraint )
220
+ input : RDD [WeightedLabeledPoint ],
221
+ monotonicityConstraint : MonotonicityConstraint ,
222
+ weights : Vector ): IsotonicRegressionModel = {
223
+ new PoolAdjacentViolators ().run(input, monotonicityConstraint, weights )
211
224
}
212
225
213
226
/**
@@ -219,7 +232,7 @@ object IsotonicRegression {
219
232
* matrix A as well as the corresponding right hand side label y
220
233
*/
221
234
def train (
222
- input : RDD [LabeledPoint ],
235
+ input : RDD [WeightedLabeledPoint ],
223
236
monotonicityConstraint : MonotonicityConstraint ): IsotonicRegressionModel = {
224
237
new PoolAdjacentViolators ().run(input, monotonicityConstraint)
225
238
}
0 commit comments