use non-sparse implementation when k = n

Li Pu · Li Pu · commit 9c8051594a88 · 2014-06-04T01:25:58.000-07:00
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -246,6 +246,9 @@ class RowMatrix(
    * Then we compute U via easy matrix multiplication as U =  A * (V * S-1).
    * Note that this approach requires `O(nnz(A))` time.
    *
+   * When the requested eigenvalues k = n, a non-sparse implementation will be used, which requires
+   * `n^2` doubles to fit in memory and `O(n^3)` time on the master node.
+   *
    * At most k largest non-zero singular values and associated vectors are returned.
    * If there are k such values, then the dimensions of the return will be:
    *
@@ -269,8 +272,16 @@ class RowMatrix(
     val n = numCols().toInt
     require(k > 0 && k <= n, s"Request up to n singular values k=$k n=$n.")
 
-    val (sigmaSquares: BDV[Double], u: BDM[Double]) =
+    val (sigmaSquares: BDV[Double], u: BDM[Double]) = if (k < n) {
       EigenValueDecomposition.symmetricEigs(multiplyGramianMatrix, n, k, tol)
+    } else {
+      logWarning(s"Request full SVD (k = n = $k), while ARPACK requires k strictly less than n. " +
+          s"Using non-sparse implementation.")
+      val G = computeGramianMatrix()
+      val (uFull: BDM[Double], sigmaSquaresFull: BDV[Double], vFull: BDM[Double]) =
+        brzSvd(G.toBreeze.asInstanceOf[BDM[Double]])
+      (sigmaSquaresFull, uFull)
+    }
     val sigmas: BDV[Double] = brzSqrt(sigmaSquares)
 
     // Determine effective rank.
@@ -508,4 +519,4 @@ object RowMatrix {
 
     Matrices.dense(n, n, G.data)
   }
-}
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -99,7 +99,7 @@ class RowMatrixSuite extends FunSuite with LocalSparkContext {
       val localMat = mat.toBreeze()
       val (localU, localSigma, localVt) = brzSvd(localMat)
       val localV: BDM[Double] = localVt.t.toDenseMatrix
-      for (k <- 1 to (n - 1)) {
+      for (k <- 1 to n) {
         val svd = mat.computeSVD(k, computeU = true)
         val U = svd.U
         val s = svd.s