@@ -131,16 +131,35 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
131
131
132
132
/**
133
133
* Return a subset of this RDD sampled by key (via stratified sampling).
134
+ *
135
+ * Create a sample of this RDD using variable sampling rates for different keys as specified by
136
+ * `fractions`, a key to sampling rate map.
137
+ *
138
+ * If `exact` is set to false, create the sample via simple random sampling, with one pass
139
+ * over the RDD, to produce a sample of size that's approximately equal to the sum of
140
+ * math.ceil(numItems * samplingRate) over all key values; otherwise, use additional passes over
141
+ * the RDD to create a sample size that's exactly equal to the sum of
142
+ * math.ceil(numItems * samplingRate) over all key values.
134
143
*/
135
144
def sampleByKey (withReplacement : Boolean ,
136
145
fractions : JMap [K , Double ],
137
146
exact : Boolean ,
138
147
seed : Long ): JavaPairRDD [K , V ] =
139
148
new JavaPairRDD [K , V ](rdd.sampleByKey(withReplacement, fractions, exact, seed))
140
149
141
-
142
150
/**
143
151
* Return a subset of this RDD sampled by key (via stratified sampling).
152
+ *
153
+ * Create a sample of this RDD using variable sampling rates for different keys as specified by
154
+ * `fractions`, a key to sampling rate map.
155
+ *
156
+ * If `exact` is set to false, create the sample via simple random sampling, with one pass
157
+ * over the RDD, to produce a sample of size that's approximately equal to the sum of
158
+ * math.ceil(numItems * samplingRate) over all key values; otherwise, use additional passes over
159
+ * the RDD to create a sample size that's exactly equal to the sum of
160
+ * math.ceil(numItems * samplingRate) over all key values.
161
+ *
162
+ * Use Utils.random.nextLong as the default seed for the random number generator
144
163
*/
145
164
def sampleByKey (withReplacement : Boolean ,
146
165
fractions : JMap [K , Double ],
@@ -149,17 +168,33 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
149
168
150
169
/**
151
170
* Return a subset of this RDD sampled by key (via stratified sampling).
171
+ *
172
+ * Create a sample of this RDD using variable sampling rates for different keys as specified by
173
+ * `fractions`, a key to sampling rate map.
174
+ *
175
+ * Produce a sample of size that's approximately equal to the sum of
176
+ * math.ceil(numItems * samplingRate) over all key values with one pass over the RDD via
177
+ * simple random sampling.
152
178
*/
153
179
def sampleByKey (withReplacement : Boolean ,
154
180
fractions : JMap [K , Double ],
155
181
seed : Long ): JavaPairRDD [K , V ] =
156
- sampleByKey(withReplacement, fractions, true , seed)
182
+ sampleByKey(withReplacement, fractions, false , seed)
157
183
158
184
/**
159
185
* Return a subset of this RDD sampled by key (via stratified sampling).
186
+ *
187
+ * Create a sample of this RDD using variable sampling rates for different keys as specified by
188
+ * `fractions`, a key to sampling rate map.
189
+ *
190
+ * Produce a sample of size that's approximately equal to the sum of
191
+ * math.ceil(numItems * samplingRate) over all key values with one pass over the RDD via
192
+ * simple random sampling.
193
+ *
194
+ * Use Utils.random.nextLong as the default seed for the random number generator
160
195
*/
161
196
def sampleByKey (withReplacement : Boolean , fractions : JMap [K , Double ]): JavaPairRDD [K , V ] =
162
- sampleByKey(withReplacement, fractions, true , Utils .random.nextLong)
197
+ sampleByKey(withReplacement, fractions, false , Utils .random.nextLong)
163
198
164
199
/**
165
200
* Return the union of this RDD and another one. Any identical elements will appear multiple
0 commit comments