Skip to content

Commit 7ef09b6

Browse files
authored
Merge pull request #656 from non/topic/stats
Add various statistical distributions to Gen.
2 parents 5071d6d + 6998ba4 commit 7ef09b6

File tree

2 files changed

+244
-8
lines changed

2 files changed

+244
-8
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package org.scalacheck
2+
3+
import Prop.{forAllNoShrink => forAll}
4+
5+
object StatsSpecification extends Properties("Stats") {
6+
7+
// each test run generates 5k samples, so only do 10 of them.
8+
override def overrideParameters(ps: Test.Parameters): Test.Parameters =
9+
ps.withMinSuccessfulTests(10)
10+
11+
// we sample the distribution 5000 times, and expect the mean and
12+
// standard deviation to be within ±10% of the true value.
13+
val Samples = 5000
14+
val ErrorRate = 0.1
15+
16+
// we'll generate relatively small, well-behaved mean values.
17+
val genMean = Gen.choose(10.0, 20.0)
18+
19+
// generate a number of trials for use with binomial
20+
val genTrials = Gen.choose(10, 30)
21+
22+
// generate a probability value
23+
val genP = Gen.choose(0.2, 0.8)
24+
25+
property("prob") =
26+
forAll(genP) { p =>
27+
val gen = Gen.prob(p).map(b => if (b) 1.0 else 0.0)
28+
check(gen, mean = p, stdDev = Math.sqrt(p * (1.0 - p)))
29+
}
30+
31+
property("gaussian") =
32+
forAll(genMean, genMean) { (mean, stdDev) =>
33+
val gen = Gen.gaussian(mean, stdDev)
34+
check(gen, mean, stdDev)
35+
}
36+
37+
property("exponential") =
38+
forAll(genMean) { mean =>
39+
val gen = Gen.exponential(1.0 / mean)
40+
check(gen, mean = mean, stdDev = mean)
41+
}
42+
43+
property("geometric") =
44+
forAll(genMean) { mean =>
45+
val gen = Gen.geometric(mean).map(_.toDouble)
46+
val p = 1.0 / (mean + 1.0)
47+
val stdDev = Math.sqrt((1.0 - p) / (p * p))
48+
check(gen, mean, stdDev)
49+
}
50+
51+
property("poisson") =
52+
forAll(genMean) { rate =>
53+
val gen = Gen.poisson(rate).map(_.toDouble)
54+
check(gen, mean = rate, stdDev = Math.sqrt(rate))
55+
}
56+
57+
property("binomial") =
58+
forAll(genTrials, genP) { (trials, p) =>
59+
val gen = Gen.binomial(Gen.prob(p), trials).map(_.toDouble)
60+
val mean = trials * p
61+
val stdDev = Math.sqrt(trials * p * (1.0 - p))
62+
check(gen, mean, stdDev)
63+
}
64+
65+
def check(gen: Gen[Double], mean: Double, stdDev: Double): Prop = {
66+
val (e1, e2) = (mean * ErrorRate, stdDev * ErrorRate)
67+
val (μ, σ) = computeStats(gen, Samples)
68+
(mean ± e1).contains(μ) && (stdDev ± e2).contains(σ)
69+
}
70+
71+
def computeStats(g: Gen[Double], samples: Int): (Double, Double) = {
72+
val vg = Gen.buildableOfN[Vector[Double], Double](samples, g)
73+
val xs = vg.sample.get
74+
val mean = xs.sum / xs.size
75+
val stdDev = Math.sqrt(xs.iterator.map(x => Math.pow(x - mean, 2)).sum / xs.size)
76+
(mean, stdDev)
77+
}
78+
79+
case class Bounds(min: Double, max: Double) {
80+
def contains(x: Double): Prop =
81+
Prop(min <= x && x <= max) :| s"($min <= $x <= $max) was false"
82+
}
83+
84+
implicit class MakeBounds(val n: Double) extends AnyVal {
85+
def ±(error: Double): Bounds = Bounds(n - error, n + error)
86+
}
87+
}

src/main/scala/org/scalacheck/Gen.scala

Lines changed: 157 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,13 @@ sealed abstract class Gen[+T] extends Serializable { self =>
3030

3131
import Gen.{R, gen}
3232

33-
/** Just an alias */
34-
private type P = Gen.Parameters
35-
3633
// This is no longer used but preserved here for binary compatibility.
3734
private[scalacheck] def sieveCopy(x: Any): Boolean = true
3835

3936
// If you implement new Gen[_] directly (instead of using
4037
// combinators), make sure to use p.initialSeed or p.useInitialSeed
4138
// in the implementation, instead of using seed directly.
42-
private[scalacheck] def doApply(p: P, seed: Seed): R[T]
39+
private[scalacheck] def doApply(p: Gen.Parameters, seed: Seed): R[T]
4340

4441
//// Public interface ////
4542

@@ -106,7 +103,7 @@ sealed abstract class Gen[+T] extends Serializable { self =>
106103
* This method is identical to [Gen.filter]. */
107104
def suchThat(f: T => Boolean): Gen[T] =
108105
new Gen[T] {
109-
def doApply(p: P, seed: Seed): Gen.R[T] =
106+
def doApply(p: Gen.Parameters, seed: Seed): Gen.R[T] =
110107
p.useInitialSeed(seed) { (p0, s0) =>
111108
val r = self.doApply(p0, s0)
112109
r.copy(r = r.retrieve.filter(f))
@@ -126,7 +123,7 @@ sealed abstract class Gen[+T] extends Serializable { self =>
126123
*/
127124
def retryUntil(p: T => Boolean, maxTries: Int): Gen[T] = {
128125
require(maxTries > 0)
129-
def loop(params: P, seed: Seed, tries: Int): R[T] =
126+
def loop(params: Gen.Parameters, seed: Seed, tries: Int): R[T] =
130127
if (tries > maxTries) throw RetryUntilException(tries) else {
131128
val r = self.doApply(params, seed)
132129
if (r.retrieve.exists(p)) r else loop(params, r.seed, tries + 1)
@@ -175,7 +172,7 @@ sealed abstract class Gen[+T] extends Serializable { self =>
175172

176173
/** Put a label on the generator to make test reports clearer */
177174
def label(l: String): Gen[T] = new Gen[T] {
178-
def doApply(p: P, seed: Seed) =
175+
def doApply(p: Gen.Parameters, seed: Seed) =
179176
p.useInitialSeed(seed) { (p0, s0) =>
180177
val r = self.doApply(p0, s0)
181178
r.copy(l = r.labels + l)
@@ -240,7 +237,7 @@ object Gen extends GenArities with GenVersionSpecific {
240237
r(None, seed).copy(l = labels)
241238
case Some(t) =>
242239
val r = f(t)
243-
r.copy(l = labels | r.labels, sd = r.seed)
240+
r.copy(l = labels | r.labels)
244241
}
245242
}
246243

@@ -947,6 +944,158 @@ object Gen extends GenArities with GenVersionSpecific {
947944

948945
//// Number Generators ////
949946

947+
/**
948+
* Generate a uniformly-distributed Long.
949+
*
950+
* This method has an equally likely method of generating every
951+
* possible Long value.
952+
*/
953+
val long: Gen[Long] =
954+
gen { (_, s0) =>
955+
val (n, s1) = s0.long
956+
r(Some(n), s1)
957+
}
958+
959+
/**
960+
* Generate a Double uniformly-distributed in [0, 1).
961+
*
962+
* This method will generate one of 2^53 distinct Double values in
963+
* the unit interval.
964+
*/
965+
val double: Gen[Double] =
966+
gen { (_, s0) =>
967+
val (x, s1) = s0.double
968+
r(Some(x), s1)
969+
}
970+
971+
/**
972+
* Generates a Boolean which has the given chance to be true.
973+
*
974+
* - prob(1.0) is always true
975+
* - prob(0.5) is true 50% of the time
976+
* - prob(0.1) is true 10% of the time
977+
* - prob(0.0) is never true
978+
*/
979+
def prob(chance: Double): Gen[Boolean] =
980+
if (chance <= 0.0) Gen.const(false)
981+
else if (chance >= 1.0) Gen.const(true)
982+
else gen { (_, s0) =>
983+
val (x, s1) = s0.double
984+
r(Some(x < chance), s1)
985+
}
986+
987+
/**
988+
* Generates Double values according to the given gaussian
989+
* distribution, specified by its mean and standard deviation.
990+
*
991+
* Gaussian distributions are also called normal distributions.
992+
*
993+
* The range of values is theoretically (-∞, ∞) but 99.7% of all
994+
* values will be contained within (mean ± 3 * stdDev).
995+
*/
996+
def gaussian(mean: Double, stdDev: Double): Gen[Double] = {
997+
def loop(s0: Seed): R[Double] = {
998+
val (x0, s1) = s0.double
999+
val (y0, s2) = s1.double
1000+
val x = x0 * 2.0 - 1.0
1001+
val y = y0 * 2.0 - 1.0
1002+
val s = x * x + y * y
1003+
if (s >= 1.0 || s == 0.0) {
1004+
loop(s2)
1005+
} else {
1006+
val scale = stdDev * Math.sqrt(-2.0 * Math.log(s) / s)
1007+
val res = x * scale + mean // dropping y * scale + mean
1008+
r(Some(res), s2)
1009+
}
1010+
}
1011+
gen((_, seed) => loop(seed))
1012+
}
1013+
1014+
/**
1015+
* Generates Double values according to the given exponential
1016+
* distribution, specified by its rate parameter.
1017+
*
1018+
* The mean and standard deviation are both equal to 1/rate.
1019+
*
1020+
* The range of values is [0, ∞).
1021+
*/
1022+
def exponential(rate: Double): Gen[Double] = {
1023+
require(rate > 0.0, s"rate must be positive (got: $rate)")
1024+
val mean = 1.0 / rate
1025+
gen { (_, s0) =>
1026+
val (x, s1) = s0.double
1027+
r(Some(-Math.log(x) * mean), s1)
1028+
}
1029+
}
1030+
1031+
/**
1032+
* Generates Int values according to the given geometric
1033+
* distribution, specified by its mean.
1034+
*
1035+
* This distribution represents the expected number of failures
1036+
* before a successful test, where the probability of a successful
1037+
* test is p = 1 / (mean + 1).
1038+
*
1039+
* The ideal range of values is [0, ∞), although the largest value
1040+
* that can be produced here is 2147483647 (Int.MaxValue).
1041+
*/
1042+
def geometric(mean: Double): Gen[Int] = {
1043+
require(mean > 0.0, s"mean must be positive (got: $mean)")
1044+
val p = 1.0 / (mean + 1.0)
1045+
val lognp = Math.log1p(-p) // log(1 - p)
1046+
gen { (_, s0) =>
1047+
val (u, s1) = s0.double
1048+
r(Some(Math.floor(Math.log(u) / lognp).toInt), s1)
1049+
}
1050+
}
1051+
1052+
/**
1053+
* Generates Int values according to the given Poisson distribution,
1054+
* specified by its rate parameters.
1055+
*
1056+
* The mean equals the rate; the standard deviation is sqrt(rate).
1057+
*
1058+
* In principle any positive value is a valid rate parameter.
1059+
* However, our method of generating values cannot handle large
1060+
* rates, so we require rate <= 745.
1061+
*/
1062+
def poisson(rate: Double): Gen[Int] = {
1063+
require(0 < rate && rate <= 745.0, s"rate must be between 0 and 745 (got $rate)")
1064+
val L = Math.exp(-rate)
1065+
def loop(s0: Seed, k: Int, p: Double): R[Int] =
1066+
if (p <= L) {
1067+
r(Some(k - 1), s0)
1068+
} else {
1069+
val (x, s1) = s0.double
1070+
loop(s1, k + 1, p * x)
1071+
}
1072+
1073+
gen((_, s) => loop(s, 0, 1.0))
1074+
}
1075+
1076+
/**
1077+
* Generates Int values according to the given binomial
1078+
* distribution, specified by the number of trials to conduct, and
1079+
* the probability of a true test.
1080+
*
1081+
* This distribution counts the number of trials which were
1082+
* successful according to a given test probability.
1083+
*
1084+
* The range of values is [0, trials].
1085+
*/
1086+
def binomial(test: Gen[Boolean], trials: Int): Gen[Int] = {
1087+
def loop(ps: Gen.Parameters, s: Seed, i: Int, n: Int): R[Int] =
1088+
if (i >= trials) {
1089+
r(Some(n), s)
1090+
} else {
1091+
val r = test.doPureApply(ps, s)
1092+
val success = r.retrieve.get
1093+
loop(ps, r.seed, i + 1, if (success) n + 1 else n)
1094+
}
1095+
gen((ps, s) => loop(ps, s, 0, 0))
1096+
}
1097+
1098+
9501099
/** Generates positive numbers of uniform distribution, with an
9511100
* upper bound of the generation size parameter. */
9521101
def posNum[T](implicit num: Numeric[T], c: Choose[T]): Gen[T] = {

0 commit comments

Comments
 (0)