@@ -120,23 +120,23 @@ class MLUtilsSuite extends FunSuite with LocalSparkContext {
120
120
for (seed <- 1 to 5 ) {
121
121
val foldedRdds = MLUtils .kFold(data, folds, seed)
122
122
assert(foldedRdds.size === folds)
123
- foldedRdds.map{ case (test, train) =>
123
+ foldedRdds.map { case (test, train) =>
124
124
val result = test.union(train).collect().sorted
125
125
val testSize = test.collect().size.toFloat
126
- assert(testSize > 0 , " Non empty test data" )
126
+ assert(testSize > 0 , " empty test data" )
127
127
val p = 1 / folds.toFloat
128
128
// Within 3 standard deviations of the mean
129
129
val range = 3 * math.sqrt(100 * p * (1 - p))
130
130
val expected = 100 * p
131
131
val lowerBound = expected - range
132
132
val upperBound = expected + range
133
133
assert(testSize > lowerBound,
134
- " Test data (" + testSize + " ) smaller than expected (" + lowerBound + " )" )
134
+ s " Test data ( $ testSize) smaller than expected ( $ lowerBound) " )
135
135
assert(testSize < upperBound,
136
- " Test data (" + testSize + " ) larger than expected (" + upperBound + " )" )
137
- assert(train.collect().size > 0 , " Non empty training data" )
136
+ s " Test data ( $ testSize) larger than expected ( $ upperBound) " )
137
+ assert(train.collect().size > 0 , " empty training data" )
138
138
assert(result === collectedData,
139
- " Each training+test set combined contains all of the data" )
139
+ " Each training+test set combined should contain all of the data. " )
140
140
}
141
141
// K fold cross validation should only have each element in the test set exactly once
142
142
assert(foldedRdds.map(_._1).reduce((x,y) => x.union(y)).collect().sorted ===
0 commit comments