apache · XiaoqingWang · Aug 17, 2015 · Aug 18, 2015 · Aug 18, 2015
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -43,6 +43,7 @@ import org.apache.spark.sql.{SQLContext, Row}
  *                the weight for Gaussian i, and weights.sum == 1
  * @param gaussians Array of MultivariateGaussian where gaussians(i) represents
  *                  the Multivariate Gaussian (Normal) Distribution for Gaussian i
+ * @since 1.3.0
  */
 @Experimental
 class GaussianMixtureModel(
@@ -53,32 +54,48 @@ class GaussianMixtureModel(
 
   override protected def formatVersion = "1.0"
 
+  /**
+   * @since 1.4.0
+   */
   override def save(sc: SparkContext, path: String): Unit = {
     GaussianMixtureModel.SaveLoadV1_0.save(sc, path, weights, gaussians)
   }
 
-  /** Number of gaussians in mixture */
+  /**
+   * Number of gaussians in mixture
+   * @since 1.3.0
+   */
   def k: Int = weights.length
 
-  /** Maps given points to their cluster indices. */
+  /**
+   * Maps given points to their cluster indices.
+   * @since 1.3.0
+   */
   def predict(points: RDD[Vector]): RDD[Int] = {
     val responsibilityMatrix = predictSoft(points)
     responsibilityMatrix.map(r => r.indexOf(r.max))
   }
 
-  /** Maps given point to its cluster index. */
+  /**
+   * Maps given point to its cluster index.
+   * @since 1.4.0
+   */
   def predict(point: Vector): Int = {
     val r = computeSoftAssignments(point.toBreeze.toDenseVector, gaussians, weights, k)
     r.indexOf(r.max)
   }
 
-  /** Java-friendly version of [[predict()]] */
+  /**
+   * Java-friendly version of [[predict()]]
+   * @since 1.4.0
+   */
   def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
     predict(points.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Integer]]
 
   /**
    * Given the input vectors, return the membership value of each vector
    * to all mixture components.
+   * @since 1.3.0
    */
   def predictSoft(points: RDD[Vector]): RDD[Array[Double]] = {
     val sc = points.sparkContext
@@ -91,6 +108,7 @@ class GaussianMixtureModel(
 
   /**
    * Given the input vector, return the membership values to all mixture components.
+   * @since 1.4.0
    */
   def predictSoft(point: Vector): Array[Double] = {
     computeSoftAssignments(point.toBreeze.toDenseVector, gaussians, weights, k)
@@ -115,6 +133,9 @@ class GaussianMixtureModel(
   }
 }
 
+/**
+ * @since 1.4.0
+ */
 @Experimental
 object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
 
@@ -147,6 +168,9 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
       sc.parallelize(dataArray, 1).toDF().write.parquet(Loader.dataPath(path))
     }
 
+    /**
+     * @since 1.4.0
+     */
     def load(sc: SparkContext, path: String): GaussianMixtureModel = {
       val dataPath = Loader.dataPath(path)
       val sqlContext = new SQLContext(sc)
@@ -165,6 +189,9 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
     }
   }
 
+  /**
+   * @since 1.4.0
+   */
   override def load(sc: SparkContext, path: String) : GaussianMixtureModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     implicit val formats = DefaultFormats

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -430,11 +430,14 @@ class KMeans private (
 
 /**
  * Top-level methods for calling K-means clustering.
+ * @since 0.8.0
  */
 object KMeans {
 
   // Initialization mode names
+  /** @since 0.8.0 */
   val RANDOM = "random"
+  /** @since 0.8.0 */
   val K_MEANS_PARALLEL = "k-means||"
 
   /**
@@ -446,6 +449,7 @@ object KMeans {
    * @param runs number of parallel runs, defaults to 1. The best model is returned.
    * @param initializationMode initialization model, either "random" or "k-means||" (default).
    * @param seed random seed value for cluster initialization
+   * @since 1.3.0
    */
   def train(
       data: RDD[Vector],
@@ -470,6 +474,7 @@ object KMeans {
    * @param maxIterations max number of iterations
    * @param runs number of parallel runs, defaults to 1. The best model is returned.
    * @param initializationMode initialization model, either "random" or "k-means||" (default).
+   * @since 0.8.0
    */
   def train(
       data: RDD[Vector],
@@ -486,6 +491,7 @@ object KMeans {
 
   /**
    * Trains a k-means model using specified parameters and the default values for unspecified.
+   * @since 0.8.0
    */
   def train(
       data: RDD[Vector],
@@ -496,6 +502,7 @@ object KMeans {
 
   /**
    * Trains a k-means model using specified parameters and the default values for unspecified.
+   * @since 0.8.0
    */
   def train(
       data: RDD[Vector],

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -34,35 +34,52 @@ import org.apache.spark.sql.Row
 
 /**
  * A clustering model for K-means. Each point belongs to the cluster with the closest center.
+ * @since 0.8.0
  */
 class KMeansModel (
     val clusterCenters: Array[Vector]) extends Saveable with Serializable with PMMLExportable {
 
-  /** A Java-friendly constructor that takes an Iterable of Vectors. */
+  /**
+   * A Java-friendly constructor that takes an Iterable of Vectors.
+   * @since 1.4.0
+   */
   def this(centers: java.lang.Iterable[Vector]) = this(centers.asScala.toArray)
 
-  /** Total number of clusters. */
+  /**
+   * Total number of clusters.
+   * @since 0.8.0
+   */
   def k: Int = clusterCenters.length
 
-  /** Returns the cluster index that a given point belongs to. */
+  /**
+   * Returns the cluster index that a given point belongs to.
+   * @since 0.8.0
+   */
   def predict(point: Vector): Int = {
     KMeans.findClosest(clusterCentersWithNorm, new VectorWithNorm(point))._1
   }
 
-  /** Maps given points to their cluster indices. */
+  /**
+   * Maps given points to their cluster indices.
+   * @since 1.0.0
+   */
   def predict(points: RDD[Vector]): RDD[Int] = {
     val centersWithNorm = clusterCentersWithNorm
     val bcCentersWithNorm = points.context.broadcast(centersWithNorm)
     points.map(p => KMeans.findClosest(bcCentersWithNorm.value, new VectorWithNorm(p))._1)
   }
 
-  /** Maps given points to their cluster indices. */
+  /**
+   * Maps given points to their cluster indices.
+   * @since 1.0.0
+   */
   def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
     predict(points.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Integer]]
 
   /**
    * Return the K-means cost (sum of squared distances of points to their nearest center) for this
    * model on the given data.
+   * @since 0.8.0
    */
   def computeCost(data: RDD[Vector]): Double = {
     val centersWithNorm = clusterCentersWithNorm
@@ -73,13 +90,20 @@ class KMeansModel (
   private def clusterCentersWithNorm: Iterable[VectorWithNorm] =
     clusterCenters.map(new VectorWithNorm(_))
 
+  /**
+   * @since 1.4.0
+   */
   override def save(sc: SparkContext, path: String): Unit = {
     KMeansModel.SaveLoadV1_0.save(sc, this, path)
   }
 
+  /** @since 1.4.0 */
   override protected def formatVersion: String = "1.0"
 }
 
+/**
+ * @since 1.4.0
+ */
 object KMeansModel extends Loader[KMeansModel] {
   override def load(sc: SparkContext, path: String): KMeansModel = {
     KMeansModel.SaveLoadV1_0.load(sc, path)