@@ -88,8 +88,11 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
88
88
(x : Int ) => if (x % 10 < (10 * fractionPositive).toInt) " 1" else " 0"
89
89
}
90
90
91
- def checkSize (exact : Boolean , withReplacement : Boolean ,
92
- expected : Long , actual : Long , p : Double ): Boolean = {
91
+ def checkSize (exact : Boolean ,
92
+ withReplacement : Boolean ,
93
+ expected : Long ,
94
+ actual : Long ,
95
+ p : Double ): Boolean = {
93
96
if (exact) {
94
97
return expected == actual
95
98
}
@@ -110,8 +113,8 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
110
113
val sample = stratifiedData.sampleByKey(false , fractions, exact, seed)
111
114
val sampleCounts = sample.countByKey()
112
115
val takeSample = sample.collect()
113
- assert(sampleCounts.forall( {case (k,v) =>
114
- checkSize(exact, false , expectedSampleSize(k), v, samplingRate)}))
116
+ assert(sampleCounts.forall {case (k,v) =>
117
+ checkSize(exact, false , expectedSampleSize(k), v, samplingRate)})
115
118
assert(takeSample.size === takeSample.toSet.size)
116
119
assert(takeSample.forall(x => 1 <= x._2 && x._2 <= n), s " elements not in [1, $n] " )
117
120
}
@@ -128,9 +131,9 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
128
131
val sample = stratifiedData.sampleByKey(true , fractions, exact, seed)
129
132
val sampleCounts = sample.countByKey()
130
133
val takeSample = sample.collect()
131
- assert(sampleCounts.forall( {case (k,v) =>
132
- checkSize(exact, true , expectedSampleSize(k), v, samplingRate)}))
133
- val groupedByKey = takeSample.groupBy( {case (k, v) => k})
134
+ assert(sampleCounts.forall {case (k,v) =>
135
+ checkSize(exact, true , expectedSampleSize(k), v, samplingRate)})
136
+ val groupedByKey = takeSample.groupBy {case (k, v) => k}
134
137
for ((key, v) <- groupedByKey) {
135
138
if (expectedSampleSize(key) >= 100 && samplingRate >= 0.1 ) {
136
139
// sample large enough for there to be repeats with high likelihood
@@ -146,8 +149,10 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
146
149
assert(takeSample.forall(x => 1 <= x._2 && x._2 <= n), s " elements not in [1, $n] " )
147
150
}
148
151
149
- def checkAllCombos (stratifiedData : RDD [(String , Int )], samplingRate : Double ,
150
- seed : Long , n : Long ) {
152
+ def checkAllCombos (stratifiedData : RDD [(String , Int )],
153
+ samplingRate : Double ,
154
+ seed : Long ,
155
+ n : Long ) = {
151
156
takeSampleAndValidateBernoulli(stratifiedData, true , samplingRate, seed, n)
152
157
takeSampleAndValidateBernoulli(stratifiedData, false , samplingRate, seed, n)
153
158
takeSampleAndValidatePoisson(stratifiedData, true , samplingRate, seed, n)
0 commit comments