@@ -96,10 +96,9 @@ def shuffle(self, vals):
96
96
97
97
class RDDSampler (RDDSamplerBase ):
98
98
99
- def __init__ (self , withReplacement , fraction , seed = None , lowbound = 0.0 ):
99
+ def __init__ (self , withReplacement , fraction , seed = None ):
100
100
RDDSamplerBase .__init__ (self , withReplacement , seed )
101
101
self ._fraction = fraction
102
- self ._lowbound = lowbound
103
102
104
103
def func (self , split , iterator ):
105
104
if self ._withReplacement :
@@ -112,10 +111,23 @@ def func(self, split, iterator):
112
111
yield obj
113
112
else :
114
113
for obj in iterator :
115
- if self ._lowbound <= self . getUniformSample (split ) < self ._fraction :
114
+ if self .getUniformSample (split ) <= self ._fraction :
116
115
yield obj
117
116
118
117
118
+ class RDDRangeSampler (RDDSamplerBase ):
119
+
120
+ def __init__ (self , lowerBound , upperBound , seed = None ):
121
+ RDDSamplerBase .__init__ (self , False , seed )
122
+ self ._lowerBound = lowerBound
123
+ self ._upperBound = upperBound
124
+
125
+ def func (self , split , iterator ):
126
+ for obj in iterator :
127
+ if self ._lowerBound <= self .getUniformSample (split ) < self ._upperBound :
128
+ yield obj
129
+
130
+
119
131
class RDDStratifiedSampler (RDDSamplerBase ):
120
132
121
133
def __init__ (self , withReplacement , fractions , seed = None ):
0 commit comments