@@ -28,7 +28,9 @@ import org.apache.spark.rdd.RDD
28
28
* Regression model for isotonic regression.
29
29
*
30
30
* @param boundaries Array of boundaries for which predictions are known.
31
+ * Boundaries must be sorted in increasing order.
31
32
* @param predictions Array of predictions associated to the boundaries at the same index.
33
+ * Result of isotonic regression and therefore is monotone.
32
34
*/
33
35
class IsotonicRegressionModel (
34
36
boundaries : Array [Double ],
@@ -75,67 +77,68 @@ class IsotonicRegressionModel (
75
77
*
76
78
* @param testData Feature to be labeled.
77
79
* @return Predicted label.
78
- * If testData exactly matches a boundary then associated prediction is directly returned
79
- * If testData is lower or higher than all boundaries
80
- * then first or last prediction is returned respectively
81
- * If testData falls between two values in boundary then predictions is treated
82
- * as piecewise linear function and interpolated value is returned
80
+ * If testData exactly matches a boundary then associated prediction is directly returned.
81
+ * If testData is lower or higher than all boundaries.
82
+ * then first or last prediction is returned respectively.
83
+ * If testData falls between two values in boundary array then predictions is treated
84
+ * as piecewise linear function and interpolated value is returned.
83
85
*/
84
86
def predict (testData : Double ): Double = {
85
87
86
88
def linearInterpolation (x1 : Double , y1 : Double , x2 : Double , y2 : Double , x : Double ): Double = {
87
89
y1 + (y2 - y1) * (x - x1) / (x2 - x1)
88
90
}
89
91
90
- val insertIndex = binarySearch(boundaries, testData)
91
-
92
- val normalisedInsertIndex = - insertIndex - 1
92
+ val foundIndex = binarySearch(boundaries, testData)
93
+ val insertIndex = - foundIndex - 1
93
94
94
95
// Find if the index was lower than all values,
95
- // higher than all values, inbetween two values or exact match.
96
- if (insertIndex == - 1 ) {
96
+ // higher than all values, in between two values or exact match.
97
+ if (insertIndex == 0 ) {
97
98
predictions.head
98
- } else if (normalisedInsertIndex == boundaries.length){
99
+ } else if (insertIndex == boundaries.length){
99
100
predictions.last
100
- } else if (insertIndex < 0 ) {
101
+ } else if (foundIndex < 0 ) {
101
102
linearInterpolation(
102
- boundaries(normalisedInsertIndex - 1 ),
103
- predictions(normalisedInsertIndex - 1 ),
104
- boundaries(normalisedInsertIndex ),
105
- predictions(normalisedInsertIndex ),
103
+ boundaries(insertIndex - 1 ),
104
+ predictions(insertIndex - 1 ),
105
+ boundaries(insertIndex ),
106
+ predictions(insertIndex ),
106
107
testData)
107
108
} else {
108
- predictions(insertIndex )
109
+ predictions(foundIndex )
109
110
}
110
111
}
111
112
}
112
113
113
114
/**
114
115
* Isotonic regression.
115
116
* Currently implemented using parallelized pool adjacent violators algorithm.
116
- * Currently only univariate (single feature) algorithm supported.
117
+ * Only univariate (single feature) algorithm supported.
117
118
*
118
119
* Sequential PAV implementation based on:
119
120
* Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani.
120
121
* "Nearly-isotonic regression." Technometrics 53.1 (2011): 54-61.
122
+ * Available from http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf
121
123
*
122
- * Sequential PAV parallelized as per :
124
+ * Sequential PAV parallelization based on :
123
125
* Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset.
124
126
* "An approach to parallelizing isotonic regression."
125
127
* Applied Mathematics and Parallel Computing. Physica-Verlag HD, 1996. 141-147.
128
+ * Available from http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf
126
129
*/
127
130
class IsotonicRegression private (private var isotonic : Boolean ) extends Serializable {
128
131
129
132
/**
130
- * Constructs IsotonicRegression instance with default parameter isotonic = true
131
- * @return New instance of IsotonicRegression
133
+ * Constructs IsotonicRegression instance with default parameter isotonic = true.
134
+ * @return New instance of IsotonicRegression.
132
135
*/
133
136
def this () = this (true )
134
137
135
138
/**
136
- * Sets the isotonic parameter
139
+ * Sets the isotonic parameter.
137
140
* @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
138
- * @return The instance of IsotonicRegression
141
+ * @return This instance of IsotonicRegression.
139
142
*/
140
143
def setIsotonic (isotonic : Boolean ): this .type = {
141
144
this .isotonic = isotonic
@@ -148,7 +151,6 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
148
151
* @param input RDD of tuples (label, feature, weight) where label is dependent variable
149
152
* for which we calculate isotonic regression, feature is independent variable
150
153
* and weight represents number of measures with default 1.
151
- *
152
154
* @return Isotonic regression model.
153
155
*/
154
156
def run (input : RDD [(Double , Double , Double )]): IsotonicRegressionModel = {
@@ -186,7 +188,7 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
186
188
/**
187
189
* Performs a pool adjacent violators algorithm (PAV).
188
190
* Uses approach with single processing of data where violators
189
- * in previously processed data created by pooling are fixed immediatelly .
191
+ * in previously processed data created by pooling are fixed immediately .
190
192
* Uses optimization of discovering monotonicity violating sequences (blocks).
191
193
*
192
194
* @param input Input data of tuples (label, feature, weight).
0 commit comments