@@ -19,7 +19,8 @@ package org.apache.spark.mllib.linalg.distributed
19
19
20
20
import java .util
21
21
22
- import breeze .linalg .{Vector => BV , DenseMatrix => BDM , DenseVector => BDV , svd => brzSvd }
22
+ import breeze .linalg .{Vector => BV , DenseMatrix => BDM , DenseVector => BDV , SparseVector => BSV }
23
+ import breeze .linalg .{svd => brzSvd , axpy => brzAxpy }
23
24
import breeze .numerics .{sqrt => brzSqrt }
24
25
import com .github .fommil .netlib .BLAS .{getInstance => blas }
25
26
@@ -201,16 +202,28 @@ class RowMatrix(
201
202
}
202
203
203
204
/**
204
- * Multiply the Gramian matrix `A^T A` by a Vector on the right.
205
+ * Multiply the Gramian matrix `A^T A` by a DenseVector on the right.
205
206
*
206
- * @param v a local vector whose length must match the number of columns of this matrix
207
- * @return a local DenseVector representing the product
207
+ * @param v a local DenseVector whose length must match the number of columns of this matrix.
208
+ * @return a local DenseVector representing the product.
208
209
*/
209
- private [mllib] def multiplyGramianMatrix (v : Vector ): Vector = {
210
- val bv = rows.map{
211
- row => row.toBreeze * row.toBreeze.dot(v.toBreeze)
212
- }.reduce( (x : BV [Double ], y : BV [Double ]) => x + y )
213
- Vectors .fromBreeze(bv)
210
+ private [mllib] def multiplyGramianMatrix (v : DenseVector ): DenseVector = {
211
+ val n = numCols().toInt
212
+
213
+ val bv = rows.aggregate(BDV .zeros[Double ](n))(
214
+ seqOp = (U , r) => {
215
+ val rBrz = r.toBreeze
216
+ val a = rBrz.dot(v.toBreeze)
217
+ rBrz match {
218
+ case _ : BDV [_] => brzAxpy(a, rBrz.asInstanceOf [BDV [Double ]], U )
219
+ case _ : BSV [_] => brzAxpy(a, rBrz.asInstanceOf [BSV [Double ]], U )
220
+ }
221
+ U
222
+ },
223
+ combOp = (U1 , U2 ) => U1 += U2
224
+ )
225
+
226
+ new DenseVector (bv.data)
214
227
}
215
228
216
229
/**
@@ -243,7 +256,7 @@ class RowMatrix(
243
256
*
244
257
* The decomposition is computed by providing a function that multiples a vector with A'A to
245
258
* ARPACK, and iteratively invoking ARPACK-dsaupd on master node, from which we recover S and V.
246
- * Then we compute U via easy matrix multiplication as U = A * (V * S-1 ).
259
+ * Then we compute U via easy matrix multiplication as U = A * (V * S^{-1} ).
247
260
* Note that this approach requires `O(nnz(A))` time.
248
261
*
249
262
* When the requested eigenvalues k = n, a non-sparse implementation will be used, which requires
0 commit comments