address comments from @shivaram

Davies Liu · Davies Liu · commit f0a10e1d29d2 · 2015-04-13T12:00:58.000-07:00
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
@@ -238,7 +238,7 @@ setMethod("cache",
 #' @aliases persist,RDD-method
 setMethod("persist",
           signature(x = "RDD", newLevel = "character"),
-          function(x, newLevel) {
+          function(x, newLevel = "MEMORY_ONLY") {
             callJMethod(getJRDD(x), "persist", getStorageLevel(newLevel))
             x@env$isCached <- TRUE
             x
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
@@ -327,7 +327,7 @@ setMethod("reduceByKey",
               convertEnvsToList(keys, vals)
             }
             locallyReduced <- lapplyPartition(x, reduceVals)
-            shuffled <- partitionBy(locallyReduced, as.integer(numPartitions))
+            shuffled <- partitionBy(locallyReduced, numToInt(numPartitions))
             lapplyPartition(shuffled, reduceVals)
           })
 
diff --git a/docs/index.md b/docs/index.md
@@ -54,7 +54,7 @@ Example applications are also provided in Python. For example,
 
     ./bin/spark-submit examples/src/main/python/pi.py 10
 
-Spark also provides an experimental R API since 1.4 (only RDD and DtaFrame APIs included).
+Spark also provides an experimental R API since 1.4 (only RDD and DataFrames APIs included).
 To run Spark interactively in a R interpreter, use `bin/sparkR`:
 
     ./bin/sparkR --master local[2]
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
@@ -867,7 +867,7 @@ There are three recommended ways to do this:
 For example, to pass a longer function, consider the code below:
 
 {% highlight r %}
-"""MyScript.py"""
+"""MyScript.R"""
 myFunc <- function(s) {
     words = strsplit(s, " ")[[1]]
     length(words)
diff --git a/docs/quick-start.md b/docs/quick-start.md
@@ -225,7 +225,11 @@ For example, we'll define a `mymax` function to make this code easier to underst
 One common data flow pattern is MapReduce, as popularized by Hadoop. Spark can implement MapReduce flows easily:
 
 {% highlight r %}
-> wordCounts <- reduceByKey(map(flatMap(textFile, function(line) strsplit(line, " ")[[1]]), function(word) list(word, 1)), "+", 2)
+> wordCounts <- reduceByKey(
+    map(
+      flatMap(textFile, function(line) strsplit(line, " ")[[1]]),
+      function(word) list(word, 1)),
+    "+", 2)
 {% endhighlight %}
 
 Here, we combined the [`flatMap`](programming-guide.html#transformations), [`map`](programming-guide.html#transformations) and [`reduceByKey`](programming-guide.html#transformations) transformations to compute the per-word counts in the file as an RDD of (string, numeric) pairs. To collect the word counts in our shell, we can use the [`collect`](programming-guide.html#actions) action:
@@ -256,10 +260,10 @@ scala> linesWithSpark.cache()
 res7: spark.RDD[String] = spark.FilteredRDD@17e51082
 
 scala> linesWithSpark.count()
-res8: Long = 15
+res8: Long = 19
 
 scala> linesWithSpark.count()
-res9: Long = 15
+res9: Long = 19
 {% endhighlight %}
 
 It may seem silly to use Spark to explore and cache a 100-line text file. The interesting part is
@@ -274,10 +278,10 @@ a cluster, as described in the [programming guide](programming-guide.html#initia
 >>> linesWithSpark.cache()
 
 >>> linesWithSpark.count()
-15
+19
 
 >>> linesWithSpark.count()
-15
+19
 {% endhighlight %}
 
 It may seem silly to use Spark to explore and cache a 100-line text file. The interesting part is
@@ -292,10 +296,10 @@ a cluster, as described in the [programming guide](programming-guide.html#initia
 > cache(linesWithSpark)
 
 > count(linesWithSpark)
-[1] 15
+[1] 19
 
 > count(linesWithSpark)
-[1] 15
+[1] 19
 {% endhighlight %}
 
 It may seem silly to use Spark to explore and cache a 100-line text file. The interesting part is

Original file line number	Diff line number	Diff line change
`@@ -238,7 +238,7 @@ setMethod("cache",`
`238`	`238`	`#' @aliases persist,RDD-method`
`239`	`239`	`setMethod("persist",`
`240`	`240`	`signature(x = "RDD", newLevel = "character"),`
`241`		`- function(x, newLevel) {`
	`241`	`+ function(x, newLevel = "MEMORY_ONLY") {`
`242`	`242`	`callJMethod(getJRDD(x), "persist", getStorageLevel(newLevel))`
`243`	`243`	`x@env$isCached <- TRUE`
`244`	`244`	`x`
Original file line number	Diff line number	Diff line change
`@@ -327,7 +327,7 @@ setMethod("reduceByKey",`
`327`	`327`	`convertEnvsToList(keys, vals)`
`328`	`328`	`}`
`329`	`329`	`locallyReduced <- lapplyPartition(x, reduceVals)`
`330`		`- shuffled <- partitionBy(locallyReduced, as.integer(numPartitions))`
	`330`	`+ shuffled <- partitionBy(locallyReduced, numToInt(numPartitions))`
`331`	`331`	`lapplyPartition(shuffled, reduceVals)`
`332`	`332`	`})`
`333`	`333`