Skip to content

Commit f69bb3d

Browse files
committed
Addressing reviewers comments.
1 parent 374bea6 commit f69bb3d

File tree

6 files changed

+106
-87
lines changed

6 files changed

+106
-87
lines changed

mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS}
2323
/**
2424
* In-place DGEMM and DGEMV for Breeze
2525
*/
26-
object BreezeUtil {
26+
private[ann] object BreezeUtil {
2727

28+
// TODO: switch to MLlib BLAS interface
2829
private def transposeString(a: BDM[Double]): String = if (a.isTranspose) "T" else "N"
2930

3031
/**
@@ -40,12 +41,9 @@ object BreezeUtil {
4041
require(a.cols == b.rows, "A & B Dimension mismatch!")
4142
require(a.rows == c.rows, "A & C Dimension mismatch!")
4243
require(b.cols == c.cols, "A & C Dimension mismatch!")
43-
if(a.rows == 0 || b.rows == 0 || a.cols == 0 || b.cols == 0) {
44-
} else {
45-
NativeBLAS.dgemm(transposeString(a), transposeString(b), c.rows, c.cols, a.cols,
46-
alpha, a.data, a.offset, a.majorStride, b.data, b.offset, b.majorStride,
47-
beta, c.data, c.offset, c.rows)
48-
}
44+
NativeBLAS.dgemm(transposeString(a), transposeString(b), c.rows, c.cols, a.cols,
45+
alpha, a.data, a.offset, a.majorStride, b.data, b.offset, b.majorStride,
46+
beta, c.data, c.offset, c.rows)
4947
}
5048

5149
/**
@@ -57,9 +55,7 @@ object BreezeUtil {
5755
* @param y y
5856
*/
5957
def dgemv(alpha: Double, a: BDM[Double], x: BDV[Double], beta: Double, y: BDV[Double]): Unit = {
60-
6158
require(a.cols == x.length, "A & b Dimension mismatch!")
62-
6359
NativeBLAS.dgemv(transposeString(a), a.rows, a.cols,
6460
alpha, a.data, a.offset, a.majorStride, x.data, x.offset, x.stride,
6561
beta, y.data, y.offset, y.stride)

mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala

Lines changed: 61 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717

1818
package org.apache.spark.ml.ann
1919

20-
import breeze.linalg.{*, DenseMatrix => BDM, DenseVector => BDV, Vector => BV, axpy => brzAxpy,
21-
sum => Bsum}
20+
import breeze.linalg.{*, DenseMatrix => BDM, DenseVector => BDV, Vector => BV, axpy => Baxpy,
21+
sum => Bsum}
2222
import breeze.numerics.{log => Blog, sigmoid => Bsigmoid}
23+
2324
import org.apache.spark.mllib.linalg.{Vector, Vectors}
2425
import org.apache.spark.mllib.optimization._
2526
import org.apache.spark.rdd.RDD
@@ -177,8 +178,11 @@ private[ann] object AffineLayerModel {
177178
* @param numOut number of layer outputs
178179
* @return matrix A and vector b
179180
*/
180-
def unroll(weights: Vector, position: Int,
181-
numIn: Int, numOut: Int): (BDM[Double], BDV[Double]) = {
181+
def unroll(
182+
weights: Vector,
183+
position: Int,
184+
numIn: Int,
185+
numOut: Int): (BDM[Double], BDV[Double]) = {
182186
val weightsCopy = weights.toArray
183187
// TODO: the array is not copied to BDMs, make sure this is OK!
184188
val a = new BDM[Double](numOut, numIn, weightsCopy, position)
@@ -272,8 +276,11 @@ private[ann] object ActivationFunction {
272276
}
273277
}
274278

275-
def apply(x1: BDM[Double], x2: BDM[Double], y: BDM[Double],
276-
func: (Double, Double) => Double): Unit = {
279+
def apply(
280+
x1: BDM[Double],
281+
x2: BDM[Double],
282+
y: BDM[Double],
283+
func: (Double, Double) => Double): Unit = {
277284
var i = 0
278285
while (i < x1.rows) {
279286
var j = 0
@@ -284,7 +291,6 @@ private[ann] object ActivationFunction {
284291
i += 1
285292
}
286293
}
287-
288294
}
289295

290296
/**
@@ -320,8 +326,10 @@ private[ann] class SoftmaxFunction extends ActivationFunction {
320326
}
321327
}
322328

323-
override def crossEntropy(output: BDM[Double], target: BDM[Double],
324-
result: BDM[Double]): Double = {
329+
override def crossEntropy(
330+
output: BDM[Double],
331+
target: BDM[Double],
332+
result: BDM[Double]): Double = {
325333
def m(o: Double, t: Double): Double = o - t
326334
ActivationFunction(output, target, result, m)
327335
-Bsum( target :* Blog(output)) / output.cols
@@ -346,11 +354,13 @@ private[ann] class SigmoidFunction extends ActivationFunction {
346354
ActivationFunction(x, y, s)
347355
}
348356

349-
override def crossEntropy(output: BDM[Double], target: BDM[Double],
350-
result: BDM[Double]): Double = {
357+
override def crossEntropy(
358+
output: BDM[Double],
359+
target: BDM[Double],
360+
result: BDM[Double]): Double = {
351361
def m(o: Double, t: Double): Double = o - t
352362
ActivationFunction(output, target, result, m)
353-
-Bsum( target :* Blog(output)) / output.cols
363+
-Bsum(target :* Blog(output)) / output.cols
354364
}
355365

356366
override def derivative(x: BDM[Double], y: BDM[Double]): Unit = {
@@ -384,13 +394,17 @@ private[ann] class FunctionalLayer (val activationFunction: ActivationFunction)
384394
* Functional layer model. Holds no weights.
385395
* @param activationFunction activation function
386396
*/
387-
private[ann] class FunctionalLayerModel private (val activationFunction: ActivationFunction
388-
) extends LayerModel {
397+
private[ann] class FunctionalLayerModel private (val activationFunction: ActivationFunction)
398+
extends LayerModel {
389399
val size = 0
390-
400+
// matrices for in-place computations
401+
// outputs
391402
private var f: BDM[Double] = null
403+
// delta
392404
private var d: BDM[Double] = null
405+
// matrix for error computation
393406
private var e: BDM[Double] = null
407+
// delta gradient
394408
private lazy val dg = new Array[Double](0)
395409

396410
override def eval(data: BDM[Double]): BDM[Double] = {
@@ -487,7 +501,7 @@ private[ann] trait TopologyModel extends Serializable{
487501
* Feed forward ANN
488502
* @param layers
489503
*/
490-
class FeedForwardTopology private(val layers: Array[Layer]) extends Topology {
504+
private[ann] class FeedForwardTopology private(val layers: Array[Layer]) extends Topology {
491505
override def getInstance(weights: Vector): TopologyModel = FeedForwardModel(this, weights)
492506

493507
override def getInstance(seed: Long): TopologyModel = FeedForwardModel(this, seed)
@@ -496,7 +510,7 @@ class FeedForwardTopology private(val layers: Array[Layer]) extends Topology {
496510
/**
497511
* Factory for some of the frequently-used topologies
498512
*/
499-
object FeedForwardTopology {
513+
private[ml] object FeedForwardTopology {
500514
/**
501515
* Creates a feed forward topology from the array of layers
502516
* @param layers array of layers
@@ -534,19 +548,23 @@ object FeedForwardTopology {
534548
* @param layerModels models of layers
535549
* @param topology topology of the network
536550
*/
537-
private[spark] class FeedForwardModel private(val layerModels: Array[LayerModel],
538-
val topology: FeedForwardTopology) extends TopologyModel {
551+
private[ml] class FeedForwardModel private(
552+
val layerModels: Array[LayerModel],
553+
val topology: FeedForwardTopology) extends TopologyModel {
539554
override def forward(data: BDM[Double]): Array[BDM[Double]] = {
540555
val outputs = new Array[BDM[Double]](layerModels.length)
541556
outputs(0) = layerModels(0).eval(data)
542-
for(i <- 1 until layerModels.length){
557+
for (i <- 1 until layerModels.length) {
543558
outputs(i) = layerModels(i).eval(outputs(i-1))
544559
}
545560
outputs
546561
}
547562

548-
override def computeGradient(data: BDM[Double], target: BDM[Double], cumGradient: Vector,
549-
realBatchSize: Int): Double = {
563+
override def computeGradient(
564+
data: BDM[Double],
565+
target: BDM[Double],
566+
cumGradient: Vector,
567+
realBatchSize: Int): Double = {
550568
val outputs = forward(data)
551569
val deltas = new Array[BDM[Double]](layerModels.length)
552570
val L = layerModels.length - 1
@@ -585,12 +603,12 @@ private[spark] class FeedForwardModel private(val layerModels: Array[LayerModel]
585603
override def weights(): Vector = {
586604
// TODO: extract roll
587605
var size = 0
588-
for(i <- 0 until layerModels.length) {
606+
for (i <- 0 until layerModels.length) {
589607
size += layerModels(i).size
590608
}
591609
val array = new Array[Double](size)
592610
var offset = 0
593-
for(i <- 0 until layerModels.length) {
611+
for (i <- 0 until layerModels.length) {
594612
val layerWeights = layerModels(i).weights().toArray
595613
System.arraycopy(layerWeights, 0, array, offset, layerWeights.length)
596614
offset += layerWeights.length
@@ -620,7 +638,7 @@ private[ann] object FeedForwardModel {
620638
val layers = topology.layers
621639
val layerModels = new Array[LayerModel](layers.length)
622640
var offset = 0
623-
for(i <- 0 until layers.length){
641+
for (i <- 0 until layers.length) {
624642
layerModels(i) = layers(i).getInstance(weights, offset)
625643
offset += layerModels(i).size
626644
}
@@ -658,8 +676,11 @@ private[ann] class ANNGradient(topology: Topology, dataStacker: DataStacker) ext
658676
(gradient, loss)
659677
}
660678

661-
override def compute(data: Vector, label: Double, weights: Vector,
662-
cumGradient: Vector): Double = {
679+
override def compute(
680+
data: Vector,
681+
label: Double,
682+
weights: Vector,
683+
cumGradient: Vector): Double = {
663684
val (input, target, realBatchSize) = dataStacker.unstack(data)
664685
val model = topology.getInstance(weights)
665686
model.computeGradient(input, target, cumGradient, realBatchSize)
@@ -684,12 +705,12 @@ private[ann] class DataStacker(stackSize: Int, inputSize: Int, outputSize: Int)
684705
*/
685706
def stack(data: RDD[(Vector, Vector)]): RDD[(Double, Vector)] = {
686707
val stackedData = if (stackSize == 1) {
687-
data.map(v =>
708+
data.map { v =>
688709
(0.0,
689710
Vectors.fromBreeze(BDV.vertcat(
690711
v._1.toBreeze.toDenseVector,
691712
v._2.toBreeze.toDenseVector))
692-
))
713+
) }
693714
} else {
694715
data.mapPartitions { it =>
695716
it.grouped(stackSize).map { seq =>
@@ -728,14 +749,15 @@ private[ann] class DataStacker(stackSize: Int, inputSize: Int, outputSize: Int)
728749
*/
729750
private[ann] class ANNUpdater extends Updater {
730751

731-
override def compute(weightsOld: Vector,
732-
gradient: Vector,
733-
stepSize: Double,
734-
iter: Int,
735-
regParam: Double): (Vector, Double) = {
752+
override def compute(
753+
weightsOld: Vector,
754+
gradient: Vector,
755+
stepSize: Double,
756+
iter: Int,
757+
regParam: Double): (Vector, Double) = {
736758
val thisIterStepSize = stepSize
737759
val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
738-
brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
760+
Baxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
739761
(Vectors.fromBreeze(brzWeights), 0)
740762
}
741763
}
@@ -746,8 +768,10 @@ private[ann] class ANNUpdater extends Updater {
746768
* @param inputSize input size
747769
* @param outputSize output size
748770
*/
749-
private[ml] class FeedForwardTrainer (topology: Topology, val inputSize: Int,
750-
val outputSize: Int) extends Serializable {
771+
private[ml] class FeedForwardTrainer(
772+
topology: Topology,
773+
val inputSize: Int,
774+
val outputSize: Int) extends Serializable {
751775

752776
// TODO: what if we need to pass random seed?
753777
private var _weights = topology.getInstance(11L).weights()

mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ import org.apache.spark.sql.DataFrame
2929

3030
/** Params for Multilayer Perceptron. */
3131
private[ml] trait MultilayerPerceptronParams extends PredictorParams
32-
with HasSeed with HasMaxIter with HasTol {
32+
with HasSeed with HasMaxIter with HasTol {
3333
/**
3434
* Layer sizes including input size and output size.
3535
* @group param
@@ -39,7 +39,7 @@ with HasSeed with HasMaxIter with HasTol {
3939
" E.g., Array(780, 100, 10) means 780 inputs, " +
4040
"one hidden layer with 100 neurons and output layer of 10 neurons.",
4141
// TODO: how to check ALSO that all elements are greater than 0?
42-
ParamValidators.lengthGt(1)
42+
ParamValidators.arrayLengthGt(1)
4343
)
4444

4545
/** @group setParam */
@@ -94,12 +94,12 @@ private object LabelConverter {
9494
* Returns a vector of given length with zeroes at all positions
9595
* and value 1.0 at the position that corresponds to the label.
9696
*
97-
* @param labeledPoint labeled point
97+
* @param labeledPoint labeled point
9898
* @param labelCount total number of labels
99-
* @return vector encoding of a label
99+
* @return pair of features and vector encoding of a label
100100
*/
101-
def apply(labeledPoint: LabeledPoint, labelCount: Int): (Vector, Vector) = {
102-
val output = Array.fill(labelCount){0.0}
101+
def encodeLabeledPoint(labeledPoint: LabeledPoint, labelCount: Int): (Vector, Vector) = {
102+
val output = Array.fill(labelCount)(0.0)
103103
output(labeledPoint.label.toInt) = 1.0
104104
(labeledPoint.features, Vectors.dense(output))
105105
}
@@ -108,10 +108,10 @@ private object LabelConverter {
108108
* Converts a vector to a label.
109109
* Returns the position of the maximal element of a vector.
110110
*
111-
* @param output label encoded with a vector
112-
* @return label
111+
* @param output label encoded with a vector
112+
* @return label
113113
*/
114-
def apply(output: Vector): Double = {
114+
def decodeLabel(output: Vector): Double = {
115115
output.argmax.toDouble
116116
}
117117
}
@@ -138,14 +138,14 @@ class MultilayerPerceptronClassifier(override val uid: String)
138138
* Developers can implement this instead of [[fit()]] to avoid dealing with schema validation
139139
* and copying parameters into the model.
140140
*
141-
* @param dataset Training dataset
142-
* @return Fitted model
141+
* @param dataset Training dataset
142+
* @return Fitted model
143143
*/
144144
override protected def train(dataset: DataFrame): MultilayerPerceptronClassifierModel = {
145-
val labels = getLayers.last.toInt
145+
val myLayers = $(layers)
146+
val labels = myLayers.last
146147
val lpData = extractLabeledPoints(dataset)
147-
val data = lpData.map(lp => LabelConverter(lp, labels))
148-
val myLayers = getLayers.map(_.toInt)
148+
val data = lpData.map(lp => LabelConverter.encodeLabeledPoint(lp, labels))
149149
val topology = FeedForwardTopology.multiLayerPerceptron(myLayers, true)
150150
val FeedForwardTrainer = new FeedForwardTrainer(topology, myLayers(0), myLayers.last)
151151
FeedForwardTrainer.LBFGSOptimizer.setConvergenceTol(getTol).setNumIterations(getMaxIter)
@@ -179,7 +179,7 @@ class MultilayerPerceptronClassifierModel private[ml](
179179
* This internal method is used to implement [[transform()]] and output [[predictionCol]].
180180
*/
181181
override protected def predict(features: Vector): Double = {
182-
LabelConverter(mlpModel.predict(features))
182+
LabelConverter.decodeLabel(mlpModel.predict(features))
183183
}
184184

185185
override def copy(extra: ParamMap): MultilayerPerceptronClassifierModel = {

mllib/src/main/scala/org/apache/spark/ml/param/params.scala

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -167,18 +167,16 @@ object ParamValidators {
167167
allowed.contains(value)
168168
}
169169

170-
/** Private method for checking array types and converting to Array. */
171-
private def getArray[T](value: T): Array[_] = value match {
172-
case x: Array[_] => x
173-
case _ =>
174-
// The type should be checked before this is ever called.
175-
throw new IllegalArgumentException("Array Param validation failed because" +
176-
s" of unexpected input type: ${value.getClass}")
177-
}
178-
179170
/** Check that the array length is greater than lowerBound. */
180-
def lengthGt[T](lowerBound: Double): T => Boolean = { (value: T) =>
181-
getArray(value).length > lowerBound
171+
def arrayLengthGt[T](lowerBound: Double): T => Boolean = { (value: T) =>
172+
val array: Array[_] = value match {
173+
case x: Array[_] => x
174+
case _ =>
175+
// The type should be checked before this is ever called.
176+
throw new IllegalArgumentException("Array Param validation failed because" +
177+
s" of unexpected input type: ${value.getClass}")
178+
}
179+
array.length > lowerBound
182180
}
183181
}
184182

0 commit comments

Comments
 (0)