Skip to content

Commit 122d378

Browse files
author
Vinod K C
committed
Fixed validation of relativeSD in countApproxDistinct
1 parent 32cdc81 commit 122d378

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

core/src/main/scala/org/apache/spark/rdd/RDD.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,8 +1151,8 @@ abstract class RDD[T: ClassTag](
11511151
*/
11521152
@Experimental
11531153
def countApproxDistinct(p: Int, sp: Int): Long = withScope {
1154-
require(p >= 4, s"p ($p) must be at least 4")
1155-
require(sp <= 32, s"sp ($sp) cannot be greater than 32")
1154+
require(p >= 4, s"p ($p) must be >= 4")
1155+
require(sp <= 32, s"sp ($sp) must be <= 32")
11561156
require(sp == 0 || p <= sp, s"p ($p) cannot be greater than sp ($sp)")
11571157
val zeroCounter = new HyperLogLogPlus(p, sp)
11581158
aggregate(zeroCounter)(
@@ -1177,8 +1177,9 @@ abstract class RDD[T: ClassTag](
11771177
* It must be greater than 0.000017.
11781178
*/
11791179
def countApproxDistinct(relativeSD: Double = 0.05): Long = withScope {
1180+
require(relativeSD > 0.000017, s"accuracy ($relativeSD) must be greater than 0.000017")
11801181
val p = math.ceil(2.0 * math.log(1.054 / relativeSD) / math.log(2)).toInt
1181-
countApproxDistinct(p, 0)
1182+
countApproxDistinct(if (p < 4) 4 else p, 0)
11821183
}
11831184

11841185
/**

core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ class RDDSuite extends FunSuite with SharedSparkContext {
8989
val simpleRdd = sc.makeRDD(uniformDistro, 10)
9090
assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.2)
9191
assert(error(simpleRdd.countApproxDistinct(12, 0), size) < 0.1)
92+
assert(error(simpleRdd.countApproxDistinct(0.02), size) < 0.1)
93+
assert(error(simpleRdd.countApproxDistinct(0.5), size) < 0.22)
9294
}
9395

9496
test("SparkContext.union") {

0 commit comments

Comments
 (0)