Skip to content

Commit fb4787c

Browse files
hhbyyhmengxr
authored andcommitted
[SPARK-6268][MLlib] KMeans parameter getter methods
jira: https://issues.apache.org/jira/browse/SPARK-6268 KMeans has many setters for parameters. It should have matching getters. Author: Yuhao Yang <[email protected]> Closes #4974 from hhbyyh/get4Kmeans and squashes the following commits: f44d4dc [Yuhao Yang] add experimental to getRuns f94a3d7 [Yuhao Yang] add get for KMeans
1 parent 8f1bc79 commit fb4787c

File tree

1 file changed

+37
-0
lines changed
  • mllib/src/main/scala/org/apache/spark/mllib/clustering

1 file changed

+37
-0
lines changed

mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,18 +52,33 @@ class KMeans private (
5252
*/
5353
def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4, Utils.random.nextLong())
5454

55+
/**
56+
* Number of clusters to create (k).
57+
*/
58+
def getK: Int = k
59+
5560
/** Set the number of clusters to create (k). Default: 2. */
5661
def setK(k: Int): this.type = {
5762
this.k = k
5863
this
5964
}
6065

66+
/**
67+
* Maximum number of iterations to run.
68+
*/
69+
def getMaxIterations: Int = maxIterations
70+
6171
/** Set maximum number of iterations to run. Default: 20. */
6272
def setMaxIterations(maxIterations: Int): this.type = {
6373
this.maxIterations = maxIterations
6474
this
6575
}
6676

77+
/**
78+
* The initialization algorithm. This can be either "random" or "k-means||".
79+
*/
80+
def getInitializationMode: String = initializationMode
81+
6782
/**
6883
* Set the initialization algorithm. This can be either "random" to choose random points as
6984
* initial cluster centers, or "k-means||" to use a parallel variant of k-means++
@@ -77,6 +92,13 @@ class KMeans private (
7792
this
7893
}
7994

95+
/**
96+
* :: Experimental ::
97+
* Number of runs of the algorithm to execute in parallel.
98+
*/
99+
@Experimental
100+
def getRuns: Int = runs
101+
80102
/**
81103
* :: Experimental ::
82104
* Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm
@@ -92,6 +114,11 @@ class KMeans private (
92114
this
93115
}
94116

117+
/**
118+
* Number of steps for the k-means|| initialization mode
119+
*/
120+
def getInitializationSteps: Int = initializationSteps
121+
95122
/**
96123
* Set the number of steps for the k-means|| initialization mode. This is an advanced
97124
* setting -- the default of 5 is almost always enough. Default: 5.
@@ -104,6 +131,11 @@ class KMeans private (
104131
this
105132
}
106133

134+
/**
135+
* The distance threshold within which we've consider centers to have converged.
136+
*/
137+
def getEpsilon: Double = epsilon
138+
107139
/**
108140
* Set the distance threshold within which we've consider centers to have converged.
109141
* If all centers move less than this Euclidean distance, we stop iterating one run.
@@ -113,6 +145,11 @@ class KMeans private (
113145
this
114146
}
115147

148+
/**
149+
* The random seed for cluster initialization.
150+
*/
151+
def getSeed: Long = seed
152+
116153
/** Set the random seed for cluster initialization. */
117154
def setSeed(seed: Long): this.type = {
118155
this.seed = seed

0 commit comments

Comments
 (0)