Skip to content

Commit cc12f47

Browse files
committed
Merge remote-tracking branch 'apache/master' into range-part
2 parents 06ac2ec + a4d6020 commit cc12f47

File tree

12 files changed

+163
-24
lines changed

12 files changed

+163
-24
lines changed

examples/src/main/python/als.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#
1717

1818
"""
19+
This is an example implementation of ALS for learning how to use Spark. Please refer to
20+
ALS in pyspark.mllib.recommendation for more conventional use.
21+
1922
This example requires numpy (http://www.numpy.org/)
2023
"""
2124
from os.path import realpath
@@ -49,9 +52,15 @@ def update(i, vec, mat, ratings):
4952

5053

5154
if __name__ == "__main__":
55+
5256
"""
5357
Usage: als [M] [U] [F] [iterations] [slices]"
5458
"""
59+
60+
print >> sys.stderr, """WARN: This is a naive implementation of ALS and is given as an
61+
example. Please use the ALS method found in pyspark.mllib.recommendation for more
62+
conventional use."""
63+
5564
sc = SparkContext(appName="PythonALS")
5665
M = int(sys.argv[1]) if len(sys.argv) > 1 else 100
5766
U = int(sys.argv[2]) if len(sys.argv) > 2 else 500

examples/src/main/python/kmeans.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,15 @@ def closestPoint(p, centers):
4545

4646

4747
if __name__ == "__main__":
48+
4849
if len(sys.argv) != 4:
4950
print >> sys.stderr, "Usage: kmeans <file> <k> <convergeDist>"
5051
exit(-1)
52+
53+
print >> sys.stderr, """WARN: This is a naive implementation of KMeans Clustering and is given
54+
as an example! Please refer to examples/src/main/python/mllib/kmeans.py for an example on
55+
how to use MLlib's KMeans implementation."""
56+
5157
sc = SparkContext(appName="PythonKMeans")
5258
lines = sc.textFile(sys.argv[1])
5359
data = lines.map(parseVector).cache()

examples/src/main/python/logistic_regression.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,15 @@ def readPointBatch(iterator):
4747
return [matrix]
4848

4949
if __name__ == "__main__":
50+
5051
if len(sys.argv) != 3:
5152
print >> sys.stderr, "Usage: logistic_regression <file> <iterations>"
5253
exit(-1)
54+
55+
print >> sys.stderr, """WARN: This is a naive implementation of Logistic Regression and is
56+
given as an example! Please refer to examples/src/main/python/mllib/logistic_regression.py
57+
to see how MLlib's implementation is used."""
58+
5359
sc = SparkContext(appName="PythonLR")
5460
points = sc.textFile(sys.argv[1]).mapPartitions(readPointBatch).cache()
5561
iterations = int(sys.argv[2])

examples/src/main/scala/org/apache/spark/examples/LocalALS.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ import cern.jet.math._
2525

2626
/**
2727
* Alternating least squares matrix factorization.
28+
*
29+
* This is an example implementation for learning how to use Spark. For more conventional use,
30+
* please refer to org.apache.spark.mllib.recommendation.ALS
2831
*/
2932
object LocalALS {
3033
// Parameters set through command line arguments
@@ -107,7 +110,16 @@ object LocalALS {
107110
solved2D.viewColumn(0)
108111
}
109112

113+
def showWarning() {
114+
System.err.println(
115+
"""WARN: This is a naive implementation of ALS and is given as an example!
116+
|Please use the ALS method found in org.apache.spark.mllib.recommendation
117+
|for more conventional use.
118+
""".stripMargin)
119+
}
120+
110121
def main(args: Array[String]) {
122+
111123
args match {
112124
case Array(m, u, f, iters) => {
113125
M = m.toInt
@@ -120,6 +132,9 @@ object LocalALS {
120132
System.exit(1)
121133
}
122134
}
135+
136+
showWarning()
137+
123138
printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
124139

125140
val R = generateR()

examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ import java.util.Random
2121

2222
import breeze.linalg.{Vector, DenseVector}
2323

24+
/**
25+
* Logistic regression based classification.
26+
*
27+
* This is an example implementation for learning how to use Spark. For more conventional use,
28+
* please refer to org.apache.spark.mllib.classification.LogisticRegression
29+
*/
2430
object LocalFileLR {
2531
val D = 10 // Numer of dimensions
2632
val rand = new Random(42)
@@ -32,7 +38,18 @@ object LocalFileLR {
3238
DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
3339
}
3440

41+
def showWarning() {
42+
System.err.println(
43+
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
44+
|Please use the LogisticRegression method found in org.apache.spark.mllib.classification
45+
|for more conventional use.
46+
""".stripMargin)
47+
}
48+
3549
def main(args: Array[String]) {
50+
51+
showWarning()
52+
3653
val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
3754
val points = lines.map(parsePoint _)
3855
val ITERATIONS = args(1).toInt

examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ import org.apache.spark.SparkContext._
2828

2929
/**
3030
* K-means clustering.
31+
*
32+
* This is an example implementation for learning how to use Spark. For more conventional use,
33+
* please refer to org.apache.spark.mllib.clustering.KMeans
3134
*/
3235
object LocalKMeans {
3336
val N = 1000
@@ -61,7 +64,18 @@ object LocalKMeans {
6164
bestIndex
6265
}
6366

67+
def showWarning() {
68+
System.err.println(
69+
"""WARN: This is a naive implementation of KMeans Clustering and is given as an example!
70+
|Please use the KMeans method found in org.apache.spark.mllib.clustering
71+
|for more conventional use.
72+
""".stripMargin)
73+
}
74+
6475
def main(args: Array[String]) {
76+
77+
showWarning()
78+
6579
val data = generateData
6680
var points = new HashSet[Vector[Double]]
6781
var kPoints = new HashMap[Int, Vector[Double]]

examples/src/main/scala/org/apache/spark/examples/LocalLR.scala

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ import breeze.linalg.{Vector, DenseVector}
2323

2424
/**
2525
* Logistic regression based classification.
26+
*
27+
* This is an example implementation for learning how to use Spark. For more conventional use,
28+
* please refer to org.apache.spark.mllib.classification.LogisticRegression
2629
*/
2730
object LocalLR {
2831
val N = 10000 // Number of data points
@@ -42,9 +45,19 @@ object LocalLR {
4245
Array.tabulate(N)(generatePoint)
4346
}
4447

48+
def showWarning() {
49+
System.err.println(
50+
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
51+
|Please use the LogisticRegression method found in org.apache.spark.mllib.classification
52+
|for more conventional use.
53+
""".stripMargin)
54+
}
55+
4556
def main(args: Array[String]) {
46-
val data = generateData
4757

58+
showWarning()
59+
60+
val data = generateData
4861
// Initialize w to a random value
4962
var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
5063
println("Initial w: " + w)

examples/src/main/scala/org/apache/spark/examples/SparkALS.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ import org.apache.spark._
2727

2828
/**
2929
* Alternating least squares matrix factorization.
30+
*
31+
* This is an example implementation for learning how to use Spark. For more conventional use,
32+
* please refer to org.apache.spark.mllib.recommendation.ALS
3033
*/
3134
object SparkALS {
3235
// Parameters set through command line arguments
@@ -87,7 +90,16 @@ object SparkALS {
8790
solved2D.viewColumn(0)
8891
}
8992

93+
def showWarning() {
94+
System.err.println(
95+
"""WARN: This is a naive implementation of ALS and is given as an example!
96+
|Please use the ALS method found in org.apache.spark.mllib.recommendation
97+
|for more conventional use.
98+
""".stripMargin)
99+
}
100+
90101
def main(args: Array[String]) {
102+
91103
var slices = 0
92104

93105
val options = (0 to 4).map(i => if (i < args.length) Some(args(i)) else None)
@@ -103,7 +115,11 @@ object SparkALS {
103115
System.err.println("Usage: SparkALS [M] [U] [F] [iters] [slices]")
104116
System.exit(1)
105117
}
118+
119+
showWarning()
120+
106121
printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
122+
107123
val sparkConf = new SparkConf().setAppName("SparkALS")
108124
val sc = new SparkContext(sparkConf)
109125

examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ import org.apache.spark.scheduler.InputFormatInfo
3030

3131
/**
3232
* Logistic regression based classification.
33+
*
34+
* This is an example implementation for learning how to use Spark. For more conventional use,
35+
* please refer to org.apache.spark.mllib.classification.LogisticRegression
3336
*/
3437
object SparkHdfsLR {
3538
val D = 10 // Numer of dimensions
@@ -48,12 +51,23 @@ object SparkHdfsLR {
4851
DataPoint(new DenseVector(x), y)
4952
}
5053

54+
def showWarning() {
55+
System.err.println(
56+
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
57+
|Please use the LogisticRegression method found in org.apache.spark.mllib.classification
58+
|for more conventional use.
59+
""".stripMargin)
60+
}
61+
5162
def main(args: Array[String]) {
63+
5264
if (args.length < 2) {
5365
System.err.println("Usage: SparkHdfsLR <file> <iters>")
5466
System.exit(1)
5567
}
5668

69+
showWarning()
70+
5771
val sparkConf = new SparkConf().setAppName("SparkHdfsLR")
5872
val inputPath = args(0)
5973
val conf = SparkHadoopUtil.get.newConfiguration()

examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ import org.apache.spark.SparkContext._
2424

2525
/**
2626
* K-means clustering.
27+
*
28+
* This is an example implementation for learning how to use Spark. For more conventional use,
29+
* please refer to org.apache.spark.mllib.clustering.KMeans
2730
*/
2831
object SparkKMeans {
2932

@@ -46,11 +49,23 @@ object SparkKMeans {
4649
bestIndex
4750
}
4851

52+
def showWarning() {
53+
System.err.println(
54+
"""WARN: This is a naive implementation of KMeans Clustering and is given as an example!
55+
|Please use the KMeans method found in org.apache.spark.mllib.clustering
56+
|for more conventional use.
57+
""".stripMargin)
58+
}
59+
4960
def main(args: Array[String]) {
61+
5062
if (args.length < 3) {
5163
System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
5264
System.exit(1)
5365
}
66+
67+
showWarning()
68+
5469
val sparkConf = new SparkConf().setAppName("SparkKMeans")
5570
val sc = new SparkContext(sparkConf)
5671
val lines = sc.textFile(args(0))

0 commit comments

Comments
 (0)