|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | + * contributor license agreements. See the NOTICE file distributed with |
| 4 | + * this work for additional information regarding copyright ownership. |
| 5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | + * (the "License"); you may not use this file except in compliance with |
| 7 | + * the License. You may obtain a copy of the License at |
| 8 | + * |
| 9 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | + * |
| 11 | + * Unless required by applicable law or agreed to in writing, software |
| 12 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | + * See the License for the specific language governing permissions and |
| 15 | + * limitations under the License. |
| 16 | + */ |
| 17 | + |
| 18 | +package org.apache.spark.mllib.linalg |
| 19 | + |
| 20 | +import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} |
| 21 | +import com.github.fommil.netlib.ARPACK |
| 22 | +import org.netlib.util.{intW, doubleW} |
| 23 | + |
| 24 | +import org.apache.spark.annotation.Experimental |
| 25 | + |
| 26 | +/** |
| 27 | + * :: Experimental :: |
| 28 | + * Compute eigen-decomposition. |
| 29 | + */ |
| 30 | +@Experimental |
| 31 | +private[mllib] object EigenValueDecomposition { |
| 32 | + /** |
| 33 | + * Compute the leading k eigenvalues and eigenvectors on a symmetric square matrix using ARPACK. |
| 34 | + * The caller needs to ensure that the input matrix is real symmetric. This function requires |
| 35 | + * memory for `n*(4*k+4)` doubles. |
| 36 | + * |
| 37 | + * @param mul a function that multiplies the symmetric matrix with a DenseVector. |
| 38 | + * @param n dimension of the square matrix (maximum Int.MaxValue). |
| 39 | + * @param k number of leading eigenvalues required, 0 < k < n. |
| 40 | + * @param tol tolerance of the eigs computation. |
| 41 | + * @param maxIterations the maximum number of Arnoldi update iterations. |
| 42 | + * @return a dense vector of eigenvalues in descending order and a dense matrix of eigenvectors |
| 43 | + * (columns of the matrix). |
| 44 | + * @note The number of computed eigenvalues might be smaller than k when some Ritz values do not |
| 45 | + * satisfy the convergence criterion specified by tol (see ARPACK Users Guide, Chapter 4.6 |
| 46 | + * for more details). The maximum number of Arnoldi update iterations is set to 300 in this |
| 47 | + * function. |
| 48 | + */ |
| 49 | + private[mllib] def symmetricEigs( |
| 50 | + mul: BDV[Double] => BDV[Double], |
| 51 | + n: Int, |
| 52 | + k: Int, |
| 53 | + tol: Double, |
| 54 | + maxIterations: Int): (BDV[Double], BDM[Double]) = { |
| 55 | + // TODO: remove this function and use eigs in breeze when switching breeze version |
| 56 | + require(n > k, s"Number of required eigenvalues $k must be smaller than matrix dimension $n") |
| 57 | + |
| 58 | + val arpack = ARPACK.getInstance() |
| 59 | + |
| 60 | + // tolerance used in stopping criterion |
| 61 | + val tolW = new doubleW(tol) |
| 62 | + // number of desired eigenvalues, 0 < nev < n |
| 63 | + val nev = new intW(k) |
| 64 | + // nev Lanczos vectors are generated in the first iteration |
| 65 | + // ncv-nev Lanczos vectors are generated in each subsequent iteration |
| 66 | + // ncv must be smaller than n |
| 67 | + val ncv = math.min(2 * k, n) |
| 68 | + |
| 69 | + // "I" for standard eigenvalue problem, "G" for generalized eigenvalue problem |
| 70 | + val bmat = "I" |
| 71 | + // "LM" : compute the NEV largest (in magnitude) eigenvalues |
| 72 | + val which = "LM" |
| 73 | + |
| 74 | + var iparam = new Array[Int](11) |
| 75 | + // use exact shift in each iteration |
| 76 | + iparam(0) = 1 |
| 77 | + // maximum number of Arnoldi update iterations, or the actual number of iterations on output |
| 78 | + iparam(2) = maxIterations |
| 79 | + // Mode 1: A*x = lambda*x, A symmetric |
| 80 | + iparam(6) = 1 |
| 81 | + |
| 82 | + var ido = new intW(0) |
| 83 | + var info = new intW(0) |
| 84 | + var resid = new Array[Double](n) |
| 85 | + var v = new Array[Double](n * ncv) |
| 86 | + var workd = new Array[Double](n * 3) |
| 87 | + var workl = new Array[Double](ncv * (ncv + 8)) |
| 88 | + var ipntr = new Array[Int](11) |
| 89 | + |
| 90 | + // call ARPACK's reverse communication, first iteration with ido = 0 |
| 91 | + arpack.dsaupd(ido, bmat, n, which, nev.`val`, tolW, resid, ncv, v, n, iparam, ipntr, workd, |
| 92 | + workl, workl.length, info) |
| 93 | + |
| 94 | + val w = BDV(workd) |
| 95 | + |
| 96 | + // ido = 99 : done flag in reverse communication |
| 97 | + while (ido.`val` != 99) { |
| 98 | + if (ido.`val` != -1 && ido.`val` != 1) { |
| 99 | + throw new IllegalStateException("ARPACK returns ido = " + ido.`val` + |
| 100 | + " This flag is not compatible with Mode 1: A*x = lambda*x, A symmetric.") |
| 101 | + } |
| 102 | + // multiply working vector with the matrix |
| 103 | + val inputOffset = ipntr(0) - 1 |
| 104 | + val outputOffset = ipntr(1) - 1 |
| 105 | + val x = w.slice(inputOffset, inputOffset + n) |
| 106 | + val y = w.slice(outputOffset, outputOffset + n) |
| 107 | + y := mul(x) |
| 108 | + // call ARPACK's reverse communication |
| 109 | + arpack.dsaupd(ido, bmat, n, which, nev.`val`, tolW, resid, ncv, v, n, iparam, ipntr, |
| 110 | + workd, workl, workl.length, info) |
| 111 | + } |
| 112 | + |
| 113 | + if (info.`val` != 0) { |
| 114 | + info.`val` match { |
| 115 | + case 1 => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` + |
| 116 | + " Maximum number of iterations taken. (Refer ARPACK user guide for details)") |
| 117 | + case 2 => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` + |
| 118 | + " No shifts could be applied. Try to increase NCV. " + |
| 119 | + "(Refer ARPACK user guide for details)") |
| 120 | + case _ => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` + |
| 121 | + " Please refer ARPACK user guide for error message.") |
| 122 | + } |
| 123 | + } |
| 124 | + |
| 125 | + val d = new Array[Double](nev.`val`) |
| 126 | + val select = new Array[Boolean](ncv) |
| 127 | + // copy the Ritz vectors |
| 128 | + val z = java.util.Arrays.copyOfRange(v, 0, nev.`val` * n) |
| 129 | + |
| 130 | + // call ARPACK's post-processing for eigenvectors |
| 131 | + arpack.dseupd(true, "A", select, d, z, n, 0.0, bmat, n, which, nev, tol, resid, ncv, v, n, |
| 132 | + iparam, ipntr, workd, workl, workl.length, info) |
| 133 | + |
| 134 | + // number of computed eigenvalues, might be smaller than k |
| 135 | + val computed = iparam(4) |
| 136 | + |
| 137 | + val eigenPairs = java.util.Arrays.copyOfRange(d, 0, computed).zipWithIndex.map { r => |
| 138 | + (r._1, java.util.Arrays.copyOfRange(z, r._2 * n, r._2 * n + n)) |
| 139 | + } |
| 140 | + |
| 141 | + // sort the eigen-pairs in descending order |
| 142 | + val sortedEigenPairs = eigenPairs.sortBy(- _._1) |
| 143 | + |
| 144 | + // copy eigenvectors in descending order of eigenvalues |
| 145 | + val sortedU = BDM.zeros[Double](n, computed) |
| 146 | + sortedEigenPairs.zipWithIndex.foreach { r => |
| 147 | + val b = r._2 * n |
| 148 | + var i = 0 |
| 149 | + while (i < n) { |
| 150 | + sortedU.data(b + i) = r._1._2(i) |
| 151 | + i += 1 |
| 152 | + } |
| 153 | + } |
| 154 | + |
| 155 | + (BDV[Double](sortedEigenPairs.map(_._1)), sortedU) |
| 156 | + } |
| 157 | +} |
0 commit comments