@@ -19,14 +19,14 @@ package org.apache.spark.mllib.clustering
19
19
20
20
import scala .collection .mutable .ArrayBuffer
21
21
22
- import org .apache .spark .annotation .Experimental
23
22
import org .apache .spark .Logging
24
- import org .apache .spark .SparkContext . _
23
+ import org .apache .spark .annotation . Experimental
25
24
import org .apache .spark .mllib .linalg .{Vector , Vectors }
26
25
import org .apache .spark .mllib .linalg .BLAS .{axpy , scal }
27
26
import org .apache .spark .mllib .util .MLUtils
28
27
import org .apache .spark .rdd .RDD
29
28
import org .apache .spark .storage .StorageLevel
29
+ import org .apache .spark .util .Utils
30
30
import org .apache .spark .util .random .XORShiftRandom
31
31
32
32
/**
@@ -48,9 +48,9 @@ class KMeans private (
48
48
49
49
/**
50
50
* Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, runs: 1,
51
- * initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4, System.nanoTime() }.
51
+ * initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4, seed: random }.
52
52
*/
53
- def this () = this (2 , 20 , 1 , KMeans .K_MEANS_PARALLEL , 5 , 1e-4 , System .nanoTime ())
53
+ def this () = this (2 , 20 , 1 , KMeans .K_MEANS_PARALLEL , 5 , 1e-4 , Utils .random.nextLong ())
54
54
55
55
/** Set the number of clusters to create (k). Default: 2. */
56
56
def setK (k : Int ): this .type = {
@@ -345,17 +345,20 @@ object KMeans {
345
345
* @param maxIterations max number of iterations
346
346
* @param runs number of parallel runs, defaults to 1. The best model is returned.
347
347
* @param initializationMode initialization model, either "random" or "k-means||" (default).
348
+ * @param seed random seed value for cluster initialization
348
349
*/
349
350
def train (
350
351
data : RDD [Vector ],
351
352
k : Int ,
352
353
maxIterations : Int ,
353
354
runs : Int ,
354
- initializationMode : String ): KMeansModel = {
355
+ initializationMode : String ,
356
+ seed : Long ): KMeansModel = {
355
357
new KMeans ().setK(k)
356
358
.setMaxIterations(maxIterations)
357
359
.setRuns(runs)
358
360
.setInitializationMode(initializationMode)
361
+ .setSeed(seed)
359
362
.run(data)
360
363
}
361
364
@@ -367,20 +370,17 @@ object KMeans {
367
370
* @param maxIterations max number of iterations
368
371
* @param runs number of parallel runs, defaults to 1. The best model is returned.
369
372
* @param initializationMode initialization model, either "random" or "k-means||" (default).
370
- * @param seed random seed value for cluster initialization
371
373
*/
372
374
def train (
373
375
data : RDD [Vector ],
374
376
k : Int ,
375
377
maxIterations : Int ,
376
378
runs : Int ,
377
- initializationMode : String ,
378
- seed : Long ): KMeansModel = {
379
+ initializationMode : String ): KMeansModel = {
379
380
new KMeans ().setK(k)
380
381
.setMaxIterations(maxIterations)
381
382
.setRuns(runs)
382
383
.setInitializationMode(initializationMode)
383
- .setSeed(seed)
384
384
.run(data)
385
385
}
386
386
0 commit comments