File tree Expand file tree Collapse file tree 2 files changed +6
-3
lines changed
main/scala/org/apache/spark/rdd
test/scala/org/apache/spark/rdd Expand file tree Collapse file tree 2 files changed +6
-3
lines changed Original file line number Diff line number Diff line change @@ -1151,8 +1151,8 @@ abstract class RDD[T: ClassTag](
1151
1151
*/
1152
1152
@ Experimental
1153
1153
def countApproxDistinct (p : Int , sp : Int ): Long = withScope {
1154
- require(p >= 4 , s " p ( $p) must be at least 4 " )
1155
- require(sp <= 32 , s " sp ( $sp) cannot be greater than 32 " )
1154
+ require(p >= 4 , s " p ( $p) must be >= 4 " )
1155
+ require(sp <= 32 , s " sp ( $sp) must be <= 32 " )
1156
1156
require(sp == 0 || p <= sp, s " p ( $p) cannot be greater than sp ( $sp) " )
1157
1157
val zeroCounter = new HyperLogLogPlus (p, sp)
1158
1158
aggregate(zeroCounter)(
@@ -1177,8 +1177,9 @@ abstract class RDD[T: ClassTag](
1177
1177
* It must be greater than 0.000017.
1178
1178
*/
1179
1179
def countApproxDistinct (relativeSD : Double = 0.05 ): Long = withScope {
1180
+ require(relativeSD > 0.000017 , s " accuracy ( $relativeSD) must be greater than 0.000017 " )
1180
1181
val p = math.ceil(2.0 * math.log(1.054 / relativeSD) / math.log(2 )).toInt
1181
- countApproxDistinct(p, 0 )
1182
+ countApproxDistinct(if (p < 4 ) 4 else p, 0 )
1182
1183
}
1183
1184
1184
1185
/**
Original file line number Diff line number Diff line change @@ -89,6 +89,8 @@ class RDDSuite extends FunSuite with SharedSparkContext {
89
89
val simpleRdd = sc.makeRDD(uniformDistro, 10 )
90
90
assert(error(simpleRdd.countApproxDistinct(8 , 0 ), size) < 0.2 )
91
91
assert(error(simpleRdd.countApproxDistinct(12 , 0 ), size) < 0.1 )
92
+ assert(error(simpleRdd.countApproxDistinct(0.02 ), size) < 0.1 )
93
+ assert(error(simpleRdd.countApproxDistinct(0.5 ), size) < 0.22 )
92
94
}
93
95
94
96
test(" SparkContext.union" ) {
You can’t perform that action at this time.
0 commit comments