Skip to content

Commit e60a34f

Browse files
SPARK-3278 changes after PR comments apache#3519. Styling and comment fixes.
1 parent d93c8f9 commit e60a34f

File tree

1 file changed

+27
-25
lines changed

1 file changed

+27
-25
lines changed

mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ import org.apache.spark.rdd.RDD
2828
* Regression model for isotonic regression.
2929
*
3030
* @param boundaries Array of boundaries for which predictions are known.
31+
* Boundaries must be sorted in increasing order.
3132
* @param predictions Array of predictions associated to the boundaries at the same index.
33+
* Result of isotonic regression and therefore is monotone.
3234
*/
3335
class IsotonicRegressionModel (
3436
boundaries: Array[Double],
@@ -75,67 +77,68 @@ class IsotonicRegressionModel (
7577
*
7678
* @param testData Feature to be labeled.
7779
* @return Predicted label.
78-
* If testData exactly matches a boundary then associated prediction is directly returned
79-
* If testData is lower or higher than all boundaries
80-
* then first or last prediction is returned respectively
81-
* If testData falls between two values in boundary then predictions is treated
82-
* as piecewise linear function and interpolated value is returned
80+
* If testData exactly matches a boundary then associated prediction is directly returned.
81+
* If testData is lower or higher than all boundaries.
82+
* then first or last prediction is returned respectively.
83+
* If testData falls between two values in boundary array then predictions is treated
84+
* as piecewise linear function and interpolated value is returned.
8385
*/
8486
def predict(testData: Double): Double = {
8587

8688
def linearInterpolation(x1: Double, y1: Double, x2: Double, y2: Double, x: Double): Double = {
8789
y1 + (y2 - y1) * (x - x1) / (x2 - x1)
8890
}
8991

90-
val insertIndex = binarySearch(boundaries, testData)
91-
92-
val normalisedInsertIndex = -insertIndex - 1
92+
val foundIndex = binarySearch(boundaries, testData)
93+
val insertIndex = -foundIndex - 1
9394

9495
// Find if the index was lower than all values,
95-
// higher than all values, inbetween two values or exact match.
96-
if (insertIndex == -1) {
96+
// higher than all values, in between two values or exact match.
97+
if (insertIndex == 0) {
9798
predictions.head
98-
} else if (normalisedInsertIndex == boundaries.length){
99+
} else if (insertIndex == boundaries.length){
99100
predictions.last
100-
} else if (insertIndex < 0) {
101+
} else if (foundIndex < 0) {
101102
linearInterpolation(
102-
boundaries(normalisedInsertIndex - 1),
103-
predictions(normalisedInsertIndex - 1),
104-
boundaries(normalisedInsertIndex),
105-
predictions(normalisedInsertIndex),
103+
boundaries(insertIndex - 1),
104+
predictions(insertIndex - 1),
105+
boundaries(insertIndex),
106+
predictions(insertIndex),
106107
testData)
107108
} else {
108-
predictions(insertIndex)
109+
predictions(foundIndex)
109110
}
110111
}
111112
}
112113

113114
/**
114115
* Isotonic regression.
115116
* Currently implemented using parallelized pool adjacent violators algorithm.
116-
* Currently only univariate (single feature) algorithm supported.
117+
* Only univariate (single feature) algorithm supported.
117118
*
118119
* Sequential PAV implementation based on:
119120
* Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani.
120121
* "Nearly-isotonic regression." Technometrics 53.1 (2011): 54-61.
122+
* Available from http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf
121123
*
122-
* Sequential PAV parallelized as per:
124+
* Sequential PAV parallelization based on:
123125
* Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset.
124126
* "An approach to parallelizing isotonic regression."
125127
* Applied Mathematics and Parallel Computing. Physica-Verlag HD, 1996. 141-147.
128+
* Available from http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf
126129
*/
127130
class IsotonicRegression private (private var isotonic: Boolean) extends Serializable {
128131

129132
/**
130-
* Constructs IsotonicRegression instance with default parameter isotonic = true
131-
* @return New instance of IsotonicRegression
133+
* Constructs IsotonicRegression instance with default parameter isotonic = true.
134+
* @return New instance of IsotonicRegression.
132135
*/
133136
def this() = this(true)
134137

135138
/**
136-
* Sets the isotonic parameter
139+
* Sets the isotonic parameter.
137140
* @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
138-
* @return The instance of IsotonicRegression
141+
* @return This instance of IsotonicRegression.
139142
*/
140143
def setIsotonic(isotonic: Boolean): this.type = {
141144
this.isotonic = isotonic
@@ -148,7 +151,6 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
148151
* @param input RDD of tuples (label, feature, weight) where label is dependent variable
149152
* for which we calculate isotonic regression, feature is independent variable
150153
* and weight represents number of measures with default 1.
151-
*
152154
* @return Isotonic regression model.
153155
*/
154156
def run(input: RDD[(Double, Double, Double)]): IsotonicRegressionModel = {
@@ -186,7 +188,7 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
186188
/**
187189
* Performs a pool adjacent violators algorithm (PAV).
188190
* Uses approach with single processing of data where violators
189-
* in previously processed data created by pooling are fixed immediatelly.
191+
* in previously processed data created by pooling are fixed immediately.
190192
* Uses optimization of discovering monotonicity violating sequences (blocks).
191193
*
192194
* @param input Input data of tuples (label, feature, weight).

0 commit comments

Comments
 (0)