Skip to content

Commit 7d54b4f

Browse files
committed
Merge pull request alteryx#4 from mengxr/dtree
another pass on code style
2 parents e1dd86f + f536ae9 commit 7d54b4f

File tree

11 files changed

+233
-249
lines changed

11 files changed

+233
-249
lines changed

mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,13 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
3434
* 1, 2, ... , k-1. It's important to note that features are
3535
* zero-indexed.
3636
*/
37-
class Strategy (
38-
val algo: Algo,
39-
val impurity: Impurity,
40-
val maxDepth: Int,
41-
val maxBins: Int = 100,
42-
val quantileCalculationStrategy: QuantileStrategy = Sort,
43-
val categoricalFeaturesInfo: Map[Int,Int] = Map[Int,Int]()) extends Serializable {
37+
class Strategy (
38+
val algo: Algo,
39+
val impurity: Impurity,
40+
val maxDepth: Int,
41+
val maxBins: Int = 100,
42+
val quantileCalculationStrategy: QuantileStrategy = Sort,
43+
val categoricalFeaturesInfo: Map[Int,Int] = Map[Int,Int]()) extends Serializable {
4444

4545
var numBins: Int = Int.MinValue
46-
4746
}

mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
package org.apache.spark.mllib.tree.impurity
1919

20-
import java.lang.UnsupportedOperationException
21-
2220
/**
2321
* Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during
2422
* binary classification.

mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,32 +17,30 @@
1717

1818
package org.apache.spark.mllib.tree.impurity
1919

20-
import java.lang.UnsupportedOperationException
21-
2220
/**
23-
* Class for calculating the [[http://en.wikipedia
24-
* .org/wiki/Decision_tree_learning#Gini_impurity]] during binary classification
21+
* Class for calculating the
22+
* [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
23+
* during binary classification.
2524
*/
2625
object Gini extends Impurity {
2726

2827
/**
29-
* gini coefficient calculation
28+
* Gini coefficient calculation
3029
* @param c0 count of instances with label 0
3130
* @param c1 count of instances with label 1
32-
* @return gini coefficient value
31+
* @return Gini coefficient value
3332
*/
34-
def calculate(c0 : Double, c1 : Double): Double = {
33+
override def calculate(c0: Double, c1: Double): Double = {
3534
if (c0 == 0 || c1 == 0) {
3635
0
3736
} else {
3837
val total = c0 + c1
3938
val f0 = c0 / total
4039
val f1 = c1 / total
41-
1 - f0*f0 - f1*f1
40+
1 - f0 * f0 - f1 * f1
4241
}
4342
}
4443

4544
def calculate(count: Double, sum: Double, sumSquares: Double): Double =
4645
throw new UnsupportedOperationException("Gini.calculate")
47-
4846
}

mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.mllib.tree.impurity
1919

2020
/**
21-
* Trail for calculating information gain
21+
* Trait for calculating information gain.
2222
*/
2323
trait Impurity extends Serializable {
2424

mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,21 @@
1717

1818
package org.apache.spark.mllib.tree.impurity
1919

20-
import java.lang.UnsupportedOperationException
21-
2220
/**
2321
* Class for calculating variance during regression
2422
*/
2523
object Variance extends Impurity {
26-
def calculate(c0: Double, c1: Double): Double
27-
= throw new UnsupportedOperationException("Variance.calculate")
24+
override def calculate(c0: Double, c1: Double): Double =
25+
throw new UnsupportedOperationException("Variance.calculate")
2826

2927
/**
3028
* variance calculation
3129
* @param count number of instances
3230
* @param sum sum of labels
3331
* @param sumSquares summation of squares of the labels
34-
* @return
3532
*/
36-
def calculate(count: Double, sum: Double, sumSquares: Double): Double = {
37-
val squaredLoss = sumSquares - (sum*sum)/count
38-
squaredLoss/count
33+
override def calculate(count: Double, sum: Double, sumSquares: Double): Double = {
34+
val squaredLoss = sumSquares - (sum * sum) / count
35+
squaredLoss / count
3936
}
40-
4137
}

mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,4 @@ import org.apache.spark.mllib.tree.configuration.FeatureType._
3030
* @param featureType type of feature -- categorical or continuous
3131
* @param category categorical label value accepted in the bin
3232
*/
33-
case class Bin(lowSplit: Split, highSplit: Split, featureType: FeatureType, category: Double) {
34-
35-
}
33+
case class Bin(lowSplit: Split, highSplit: Split, featureType: FeatureType, category: Double)

mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,4 @@ class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable
4646
def predict(features: RDD[Array[Double]]): RDD[Double] = {
4747
features.map(x => predict(x))
4848
}
49-
50-
5149
}

mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,4 @@ class InformationGainStats(
3636
"gain = %f, impurity = %f, left impurity = %f, right impurity = %f, predict = %f"
3737
.format(gain, impurity, leftImpurity, rightImpurity, predict)
3838
}
39-
40-
4139
}

mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class Node (
3737
val split: Option[Split],
3838
var leftNode: Option[Node],
3939
var rightNode: Option[Node],
40-
val stats: Option[InformationGainStats]) extends Serializable with Logging{
40+
val stats: Option[InformationGainStats]) extends Serializable with Logging {
4141

4242
override def toString = "id = " + id + ", isLeaf = " + isLeaf + ", predict = " + predict + ", " +
4343
"split = " + split + ", stats = " + stats
@@ -46,7 +46,7 @@ class Node (
4646
* build the left node and right nodes if not leaf
4747
* @param nodes array of nodes
4848
*/
49-
def build(nodes : Array[Node]): Unit = {
49+
def build(nodes: Array[Node]): Unit = {
5050

5151
logDebug("building node " + id + " at level " +
5252
(scala.math.log(id + 1)/scala.math.log(2)).toInt )
@@ -68,7 +68,7 @@ class Node (
6868
* @param feature feature value
6969
* @return predicted value
7070
*/
71-
def predictIfLeaf(feature : Array[Double]) : Double = {
71+
def predictIfLeaf(feature: Array[Double]) : Double = {
7272
if (isLeaf) {
7373
predict
7474
} else{
@@ -87,5 +87,4 @@ class Node (
8787
}
8888
}
8989
}
90-
9190
}

mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,15 @@ case class Split(
4242
* @param feature feature index
4343
* @param featureType type of feature -- categorical or continuous
4444
*/
45-
class DummyLowSplit(feature: Int, featureType : FeatureType)
45+
class DummyLowSplit(feature: Int, featureType: FeatureType)
4646
extends Split(feature, Double.MinValue, featureType, List())
4747

4848
/**
4949
* Split with maximum threshold for continuous features. Helps with the highest bin creation.
5050
* @param feature feature index
5151
* @param featureType type of feature -- categorical or continuous
5252
*/
53-
class DummyHighSplit(feature: Int, featureType : FeatureType)
53+
class DummyHighSplit(feature: Int, featureType: FeatureType)
5454
extends Split(feature, Double.MaxValue, featureType, List())
5555

5656
/**
@@ -59,6 +59,6 @@ class DummyHighSplit(feature: Int, featureType : FeatureType)
5959
* @param feature feature index
6060
* @param featureType type of feature -- categorical or continuous
6161
*/
62-
class DummyCategoricalSplit(feature: Int, featureType : FeatureType)
62+
class DummyCategoricalSplit(feature: Int, featureType: FeatureType)
6363
extends Split(feature, Double.MaxValue, featureType, List())
6464

0 commit comments

Comments
 (0)