Skip to content

Commit 4f5ac09

Browse files
author
Davies Liu
committed
Merge branch 'master' of github.com:apache/spark into R5
2 parents 41f8184 + 25998e4 commit 4f5ac09

File tree

325 files changed

+7187
-2302
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

325 files changed

+7187
-2302
lines changed

R/pkg/R/RDD.R

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
8585

8686
if (!inherits(prev, "PipelinedRDD") || !isPipelinable(prev)) {
8787
# This transformation is the first in its stage:
88-
.Object@func <- func
88+
.Object@func <- cleanClosure(func)
8989
.Object@prev_jrdd <- getJRDD(prev)
9090
.Object@env$prev_serializedMode <- prev@env$serializedMode
9191
# NOTE: We use prev_serializedMode to track the serialization mode of prev_JRDD
@@ -94,7 +94,7 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
9494
pipelinedFunc <- function(split, iterator) {
9595
func(split, prev@func(split, iterator))
9696
}
97-
.Object@func <- pipelinedFunc
97+
.Object@func <- cleanClosure(pipelinedFunc)
9898
.Object@prev_jrdd <- prev@prev_jrdd # maintain the pipeline
9999
# Get the serialization mode of the parent RDD
100100
.Object@env$prev_serializedMode <- prev@env$prev_serializedMode
@@ -144,17 +144,13 @@ setMethod("getJRDD", signature(rdd = "PipelinedRDD"),
144144
return(rdd@env$jrdd_val)
145145
}
146146

147-
computeFunc <- function(split, part) {
148-
rdd@func(split, part)
149-
}
150-
151147
packageNamesArr <- serialize(.sparkREnv[[".packages"]],
152148
connection = NULL)
153149

154150
broadcastArr <- lapply(ls(.broadcastNames),
155151
function(name) { get(name, .broadcastNames) })
156152

157-
serializedFuncArr <- serialize(computeFunc, connection = NULL)
153+
serializedFuncArr <- serialize(rdd@func, connection = NULL)
158154

159155
prev_jrdd <- rdd@prev_jrdd
160156

@@ -279,7 +275,7 @@ setMethod("unpersist",
279275
#' @examples
280276
#'\dontrun{
281277
#' sc <- sparkR.init()
282-
#' setCheckpointDir(sc, "checkpoints")
278+
#' setCheckpointDir(sc, "checkpoint")
283279
#' rdd <- parallelize(sc, 1:10, 2L)
284280
#' checkpoint(rdd)
285281
#'}
@@ -551,11 +547,7 @@ setMethod("mapPartitions",
551547
setMethod("lapplyPartitionsWithIndex",
552548
signature(X = "RDD", FUN = "function"),
553549
function(X, FUN) {
554-
FUN <- cleanClosure(FUN)
555-
closureCapturingFunc <- function(split, part) {
556-
FUN(split, part)
557-
}
558-
PipelinedRDD(X, closureCapturingFunc)
550+
PipelinedRDD(X, FUN)
559551
})
560552

561553
#' @rdname lapplyPartitionsWithIndex

R/pkg/R/context.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ broadcast <- function(sc, object) {
216216
#' @examples
217217
#'\dontrun{
218218
#' sc <- sparkR.init()
219-
#' setCheckpointDir(sc, "~/checkpoints")
219+
#' setCheckpointDir(sc, "~/checkpoint")
220220
#' rdd <- parallelize(sc, 1:2, 2L)
221221
#' checkpoint(rdd)
222222
#'}

R/pkg/R/pairRDD.R

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -694,10 +694,6 @@ setMethod("cogroup",
694694
for (i in 1:rddsLen) {
695695
rdds[[i]] <- lapply(rdds[[i]],
696696
function(x) { list(x[[1]], list(i, x[[2]])) })
697-
# TODO(hao): As issue [SparkR-142] mentions, the right value of i
698-
# will not be captured into UDF if getJRDD is not invoked.
699-
# It should be resolved together with that issue.
700-
getJRDD(rdds[[i]]) # Capture the closure.
701697
}
702698
union.rdd <- Reduce(unionRDD, rdds)
703699
group.func <- function(vlist) {

R/pkg/inst/tests/test_binaryFile.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ test_that("saveAsObjectFile()/objectFile() following textFile() works", {
2727
fileName2 <- tempfile(pattern="spark-test", fileext=".tmp")
2828
writeLines(mockFile, fileName1)
2929

30-
rdd <- textFile(sc, fileName1)
30+
rdd <- textFile(sc, fileName1, 1)
3131
saveAsObjectFile(rdd, fileName2)
3232
rdd <- objectFile(sc, fileName2)
3333
expect_equal(collect(rdd), as.list(mockFile))
@@ -40,7 +40,7 @@ test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
4040
fileName <- tempfile(pattern="spark-test", fileext=".tmp")
4141

4242
l <- list(1, 2, 3)
43-
rdd <- parallelize(sc, l)
43+
rdd <- parallelize(sc, l, 1)
4444
saveAsObjectFile(rdd, fileName)
4545
rdd <- objectFile(sc, fileName)
4646
expect_equal(collect(rdd), l)

R/pkg/inst/tests/test_rdd.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkp
141141
unpersist(rdd2)
142142
expect_false(rdd2@env$isCached)
143143

144-
setCheckpointDir(sc, "checkpoints")
144+
tempDir <- tempfile(pattern = "checkpoint")
145+
setCheckpointDir(sc, tempDir)
145146
checkpoint(rdd2)
146147
expect_true(rdd2@env$isCheckpointed)
147148

@@ -152,7 +153,7 @@ test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkp
152153
# make sure the data is collectable
153154
collect(rdd2)
154155

155-
unlink("checkpoints")
156+
unlink(tempDir)
156157
})
157158

158159
test_that("reduce on RDD", {

R/pkg/inst/tests/test_textFile.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ test_that("textFile() followed by a saveAsTextFile() returns the same content",
8181
fileName2 <- tempfile(pattern="spark-test", fileext=".tmp")
8282
writeLines(mockFile, fileName1)
8383

84-
rdd <- textFile(sc, fileName1)
84+
rdd <- textFile(sc, fileName1, 1L)
8585
saveAsTextFile(rdd, fileName2)
8686
rdd <- textFile(sc, fileName2)
8787
expect_equal(collect(rdd), as.list(mockFile))
@@ -93,7 +93,7 @@ test_that("textFile() followed by a saveAsTextFile() returns the same content",
9393
test_that("saveAsTextFile() on a parallelized list works as expected", {
9494
fileName <- tempfile(pattern="spark-test", fileext=".tmp")
9595
l <- list(1, 2, 3)
96-
rdd <- parallelize(sc, l)
96+
rdd <- parallelize(sc, l, 1L)
9797
saveAsTextFile(rdd, fileName)
9898
rdd <- textFile(sc, fileName)
9999
expect_equal(collect(rdd), lapply(l, function(x) {toString(x)}))

bagel/src/test/resources/log4j.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
2424
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
2525

2626
# Ignore messages below warning level from Jetty, because it's a bit verbose
27-
log4j.logger.org.eclipse.jetty=WARN
27+
log4j.logger.org.spark-project.jetty=WARN

bin/load-spark-env.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# This script loads spark-env.sh if it exists, and ensures it is only loaded once.
2121
# spark-env.sh is loaded from SPARK_CONF_DIR if set, or within the current directory's
2222
# conf/ subdirectory.
23+
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2324

2425
if [ -z "$SPARK_ENV_LOADED" ]; then
2526
export SPARK_ENV_LOADED=1
@@ -41,8 +42,8 @@ fi
4142

4243
if [ -z "$SPARK_SCALA_VERSION" ]; then
4344

44-
ASSEMBLY_DIR2="$SPARK_HOME/assembly/target/scala-2.11"
45-
ASSEMBLY_DIR1="$SPARK_HOME/assembly/target/scala-2.10"
45+
ASSEMBLY_DIR2="$FWDIR/assembly/target/scala-2.11"
46+
ASSEMBLY_DIR1="$FWDIR/assembly/target/scala-2.10"
4647

4748
if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
4849
echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2

conf/log4j.properties.template

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ log4j.appender.console.layout=org.apache.log4j.PatternLayout
66
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
77

88
# Settings to quiet third party logs that are too verbose
9-
log4j.logger.org.eclipse.jetty=WARN
10-
log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
9+
log4j.logger.org.spark-project.jetty=WARN
10+
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
1111
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
1212
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

core/src/main/resources/org/apache/spark/log4j-defaults.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ log4j.appender.console.layout=org.apache.log4j.PatternLayout
66
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
77

88
# Settings to quiet third party logs that are too verbose
9-
log4j.logger.org.eclipse.jetty=WARN
10-
log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
9+
log4j.logger.org.spark-project.jetty=WARN
10+
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
1111
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
1212
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

0 commit comments

Comments
 (0)