File tree Expand file tree Collapse file tree 1 file changed +16
-10
lines changed Expand file tree Collapse file tree 1 file changed +16
-10
lines changed Original file line number Diff line number Diff line change 17
17
18
18
import sys
19
19
import random
20
+ import math
20
21
21
22
22
23
class RDDSamplerBase (object ):
@@ -37,16 +38,21 @@ def getUniformSample(self):
37
38
return self ._random .random ()
38
39
39
40
def getPoissonSample (self , mean ):
40
- # here we simulate drawing numbers n_i ~ Poisson(lambda = 1/mean) by
41
- # drawing a sequence of numbers delta_j ~ Exp(mean)
42
- num_arrivals = 0
43
- cur_time = self ._random .expovariate (mean )
44
-
45
- while cur_time < 1.0 :
46
- cur_time += self ._random .expovariate (mean )
47
- num_arrivals += 1
48
-
49
- return num_arrivals
41
+ # Using Knuth's algorithm described in http://en.wikipedia.org/wiki/Poisson_distribution
42
+ if mean < 20.0 : # one exp and k+1 random calls
43
+ l = math .exp (- mean )
44
+ p = self ._random .random ()
45
+ k = 0
46
+ while p > l :
47
+ k += 1
48
+ p *= self ._random .random ()
49
+ else : # switch to the log domain, k+1 expovariate (random + log) calls
50
+ p = self ._random .expovariate (mean )
51
+ k = 0
52
+ while p < 1.0 :
53
+ k += 1
54
+ p += self ._random .expovariate (mean )
55
+ return k
50
56
51
57
def func (self , split , iterator ):
52
58
raise NotImplementedError
You can’t perform that action at this time.
0 commit comments