Skip to content

Commit 70f6932

Browse files
committed
Merge branch 'master' of https://github.com/apache/spark into addbatch
2 parents ecaeafb + 22ab70e commit 70f6932

File tree

631 files changed

+59313
-6064
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

631 files changed

+59313
-6064
lines changed

.rat-excludes

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ TAGS
1515
RELEASE
1616
control
1717
docs
18+
docker.properties.template
1819
fairscheduler.xml.template
1920
spark-defaults.conf.template
2021
log4j.properties
@@ -29,7 +30,13 @@ spark-env.sh.template
2930
log4j-defaults.properties
3031
bootstrap-tooltip.js
3132
jquery-1.11.1.min.js
33+
d3.min.js
34+
dagre-d3.min.js
35+
graphlib-dot.min.js
3236
sorttable.js
37+
vis.min.js
38+
vis.min.css
39+
vis.map
3340
.*avsc
3441
.*txt
3542
.*json

LICENSE

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,36 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
643643
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
644644
THE SOFTWARE.
645645

646+
========================================================================
647+
For d3 (core/src/main/resources/org/apache/spark/ui/static/d3.min.js):
648+
========================================================================
649+
650+
Copyright (c) 2010-2015, Michael Bostock
651+
All rights reserved.
652+
653+
Redistribution and use in source and binary forms, with or without
654+
modification, are permitted provided that the following conditions are met:
655+
656+
* Redistributions of source code must retain the above copyright notice, this
657+
list of conditions and the following disclaimer.
658+
659+
* Redistributions in binary form must reproduce the above copyright notice,
660+
this list of conditions and the following disclaimer in the documentation
661+
and/or other materials provided with the distribution.
662+
663+
* The name Michael Bostock may not be used to endorse or promote products
664+
derived from this software without specific prior written permission.
665+
666+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
667+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
668+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
669+
DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT,
670+
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
671+
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
672+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
673+
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
674+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
675+
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
646676

647677
========================================================================
648678
For Scala Interpreter classes (all .scala files in repl/src/main/scala
@@ -814,6 +844,7 @@ BSD-style licenses
814844
The following components are provided under a BSD-style license. See project link for details.
815845

816846
(BSD 3 Clause) core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
847+
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.1.15 - https://github.com/jpmml/jpmml-model)
817848
(BSD 3-clause style license) jblas (org.jblas:jblas:1.2.3 - http://jblas.org/)
818849
(BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
819850
(BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org)

R/pkg/NAMESPACE

Lines changed: 14 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,117 +1,36 @@
1-
#exportPattern("^[[:alpha:]]+")
2-
exportClasses("RDD")
3-
exportClasses("Broadcast")
4-
exportMethods(
5-
"aggregateByKey",
6-
"aggregateRDD",
7-
"cache",
8-
"cartesian",
9-
"checkpoint",
10-
"coalesce",
11-
"cogroup",
12-
"collect",
13-
"collectAsMap",
14-
"collectPartition",
15-
"combineByKey",
16-
"count",
17-
"countByKey",
18-
"countByValue",
19-
"distinct",
20-
"Filter",
21-
"filterRDD",
22-
"first",
23-
"flatMap",
24-
"flatMapValues",
25-
"fold",
26-
"foldByKey",
27-
"foreach",
28-
"foreachPartition",
29-
"fullOuterJoin",
30-
"glom",
31-
"groupByKey",
32-
"intersection",
33-
"join",
34-
"keyBy",
35-
"keys",
36-
"length",
37-
"lapply",
38-
"lapplyPartition",
39-
"lapplyPartitionsWithIndex",
40-
"leftOuterJoin",
41-
"lookup",
42-
"map",
43-
"mapPartitions",
44-
"mapPartitionsWithIndex",
45-
"mapValues",
46-
"maximum",
47-
"minimum",
48-
"numPartitions",
49-
"partitionBy",
50-
"persist",
51-
"pipeRDD",
52-
"reduce",
53-
"reduceByKey",
54-
"reduceByKeyLocally",
55-
"repartition",
56-
"rightOuterJoin",
57-
"sampleByKey",
58-
"sampleRDD",
59-
"saveAsTextFile",
60-
"saveAsObjectFile",
61-
"sortBy",
62-
"sortByKey",
63-
"subtract",
64-
"subtractByKey",
65-
"sumRDD",
66-
"take",
67-
"takeOrdered",
68-
"takeSample",
69-
"top",
70-
"unionRDD",
71-
"unpersist",
72-
"value",
73-
"values",
74-
"zipPartitions",
75-
"zipRDD",
76-
"zipWithIndex",
77-
"zipWithUniqueId"
78-
)
1+
# Imports from base R
2+
importFrom(methods, setGeneric, setMethod, setOldClass)
3+
useDynLib(SparkR, stringHashCode)
794

805
# S3 methods exported
81-
export(
82-
"textFile",
83-
"objectFile",
84-
"parallelize",
85-
"hashCode",
86-
"includePackage",
87-
"broadcast",
88-
"setBroadcastValue",
89-
"setCheckpointDir"
90-
)
916
export("sparkR.init")
927
export("sparkR.stop")
938
export("print.jobj")
94-
useDynLib(SparkR, stringHashCode)
95-
importFrom(methods, setGeneric, setMethod, setOldClass)
96-
97-
# SparkRSQL
989

9910
exportClasses("DataFrame")
10011

101-
exportMethods("columns",
12+
exportMethods("cache",
13+
"collect",
14+
"columns",
15+
"count",
16+
"describe",
10217
"distinct",
10318
"dtypes",
10419
"except",
10520
"explain",
10621
"filter",
22+
"first",
10723
"groupBy",
10824
"head",
10925
"insertInto",
11026
"intersect",
11127
"isLocal",
28+
"join",
29+
"length",
11230
"limit",
11331
"orderBy",
11432
"names",
33+
"persist",
11534
"printSchema",
11635
"registerTempTable",
11736
"repartition",
@@ -125,9 +44,9 @@ exportMethods("columns",
12544
"show",
12645
"showDF",
12746
"sortDF",
128-
"toJSON",
129-
"toRDD",
47+
"take",
13048
"unionAll",
49+
"unpersist",
13150
"where",
13251
"withColumn",
13352
"withColumnRenamed")
@@ -174,14 +93,12 @@ export("cacheTable",
17493
"createExternalTable",
17594
"dropTempTable",
17695
"jsonFile",
177-
"jsonRDD",
17896
"loadDF",
17997
"parquetFile",
18098
"sql",
18199
"table",
182100
"tableNames",
183101
"tables",
184-
"toDF",
185102
"uncacheTable")
186103

187104
export("sparkRSQL.init",

R/pkg/R/DataFrame.R

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ setMethod("isLocal",
167167
setMethod("showDF",
168168
signature(x = "DataFrame"),
169169
function(x, numRows = 20) {
170-
cat(callJMethod(x@sdf, "showString", numToInt(numRows)), "\n")
170+
callJMethod(x@sdf, "showString", numToInt(numRows))
171171
})
172172

173173
#' show
@@ -272,7 +272,7 @@ setMethod("names",
272272
setMethod("registerTempTable",
273273
signature(x = "DataFrame", tableName = "character"),
274274
function(x, tableName) {
275-
callJMethod(x@sdf, "registerTempTable", tableName)
275+
invisible(callJMethod(x@sdf, "registerTempTable", tableName))
276276
})
277277

278278
#' insertInto
@@ -1276,3 +1276,40 @@ setMethod("saveAsTable",
12761276
callJMethod(df@sdf, "saveAsTable", tableName, source, jmode, options)
12771277
})
12781278

1279+
#' describe
1280+
#'
1281+
#' Computes statistics for numeric columns.
1282+
#' If no columns are given, this function computes statistics for all numerical columns.
1283+
#'
1284+
#' @param x A DataFrame to be computed.
1285+
#' @param col A string of name
1286+
#' @param ... Additional expressions
1287+
#' @return A DataFrame
1288+
#' @rdname describe
1289+
#' @export
1290+
#' @examples
1291+
#'\dontrun{
1292+
#' sc <- sparkR.init()
1293+
#' sqlCtx <- sparkRSQL.init(sc)
1294+
#' path <- "path/to/file.json"
1295+
#' df <- jsonFile(sqlCtx, path)
1296+
#' describe(df)
1297+
#' describe(df, "col1")
1298+
#' describe(df, "col1", "col2")
1299+
#' }
1300+
setMethod("describe",
1301+
signature(x = "DataFrame", col = "character"),
1302+
function(x, col, ...) {
1303+
colList <- list(col, ...)
1304+
sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
1305+
dataFrame(sdf)
1306+
})
1307+
1308+
#' @rdname describe
1309+
setMethod("describe",
1310+
signature(x = "DataFrame"),
1311+
function(x) {
1312+
colList <- as.list(c(columns(x)))
1313+
sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
1314+
dataFrame(sdf)
1315+
})

R/pkg/R/RDD.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@ setMethod("first",
797797
#' @aliases distinct,RDD-method
798798
setMethod("distinct",
799799
signature(x = "RDD"),
800-
function(x, numPartitions = SparkR::numPartitions(x)) {
800+
function(x, numPartitions = SparkR:::numPartitions(x)) {
801801
identical.mapped <- lapply(x, function(x) { list(x, NULL) })
802802
reduced <- reduceByKey(identical.mapped,
803803
function(x, y) { x },
@@ -993,7 +993,7 @@ setMethod("coalesce",
993993
signature(x = "RDD", numPartitions = "numeric"),
994994
function(x, numPartitions, shuffle = FALSE) {
995995
numPartitions <- numToInt(numPartitions)
996-
if (shuffle || numPartitions > SparkR::numPartitions(x)) {
996+
if (shuffle || numPartitions > SparkR:::numPartitions(x)) {
997997
func <- function(partIndex, part) {
998998
set.seed(partIndex) # partIndex as seed
999999
start <- as.integer(sample(numPartitions, 1) - 1)
@@ -1078,7 +1078,7 @@ setMethod("saveAsTextFile",
10781078
#' @aliases sortBy,RDD,RDD-method
10791079
setMethod("sortBy",
10801080
signature(x = "RDD", func = "function"),
1081-
function(x, func, ascending = TRUE, numPartitions = SparkR::numPartitions(x)) {
1081+
function(x, func, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) {
10821082
values(sortByKey(keyBy(x, func), ascending, numPartitions))
10831083
})
10841084

@@ -1552,7 +1552,7 @@ setMethod("cartesian",
15521552
#' @aliases subtract,RDD
15531553
setMethod("subtract",
15541554
signature(x = "RDD", other = "RDD"),
1555-
function(x, other, numPartitions = SparkR::numPartitions(x)) {
1555+
function(x, other, numPartitions = SparkR:::numPartitions(x)) {
15561556
mapFunction <- function(e) { list(e, NA) }
15571557
rdd1 <- map(x, mapFunction)
15581558
rdd2 <- map(other, mapFunction)
@@ -1583,7 +1583,7 @@ setMethod("subtract",
15831583
#' @aliases intersection,RDD
15841584
setMethod("intersection",
15851585
signature(x = "RDD", other = "RDD"),
1586-
function(x, other, numPartitions = SparkR::numPartitions(x)) {
1586+
function(x, other, numPartitions = SparkR:::numPartitions(x)) {
15871587
rdd1 <- map(x, function(v) { list(v, NA) })
15881588
rdd2 <- map(other, function(v) { list(v, NA) })
15891589

R/pkg/R/generics.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,10 @@ setGeneric("value", function(bcast) { standardGeneric("value") })
384384
#' @export
385385
setGeneric("columns", function(x) {standardGeneric("columns") })
386386

387+
#' @rdname describe
388+
#' @export
389+
setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })
390+
387391
#' @rdname schema
388392
#' @export
389393
setGeneric("dtypes", function(x) { standardGeneric("dtypes") })

R/pkg/R/pairRDD.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ setMethod("cogroup",
739739
#' @aliases sortByKey,RDD,RDD-method
740740
setMethod("sortByKey",
741741
signature(x = "RDD"),
742-
function(x, ascending = TRUE, numPartitions = SparkR::numPartitions(x)) {
742+
function(x, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) {
743743
rangeBounds <- list()
744744

745745
if (numPartitions > 1) {
@@ -806,7 +806,7 @@ setMethod("sortByKey",
806806
#' @aliases subtractByKey,RDD
807807
setMethod("subtractByKey",
808808
signature(x = "RDD", other = "RDD"),
809-
function(x, other, numPartitions = SparkR::numPartitions(x)) {
809+
function(x, other, numPartitions = SparkR:::numPartitions(x)) {
810810
filterFunction <- function(elem) {
811811
iters <- elem[[2]]
812812
(length(iters[[1]]) > 0) && (length(iters[[2]]) == 0)

R/pkg/inst/profile/shell.R

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
.libPaths(c(file.path(home, "R", "lib"), .libPaths()))
2121
Sys.setenv(NOAWT=1)
2222

23-
library(utils)
24-
library(SparkR)
25-
sc <- sparkR.init(Sys.getenv("MASTER", unset = ""))
23+
# Make sure SparkR package is the last loaded one
24+
old <- getOption("defaultPackages")
25+
options(defaultPackages = c(old, "SparkR"))
26+
27+
sc <- SparkR::sparkR.init(Sys.getenv("MASTER", unset = ""))
2628
assign("sc", sc, envir=.GlobalEnv)
27-
sqlCtx <- sparkRSQL.init(sc)
29+
sqlCtx <- SparkR::sparkRSQL.init(sc)
2830
assign("sqlCtx", sqlCtx, envir=.GlobalEnv)
2931
cat("\n Welcome to SparkR!")
3032
cat("\n Spark context is available as sc, SQL context is available as sqlCtx\n")

R/pkg/inst/tests/test_broadcast.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ test_that("using broadcast variable", {
2929
randomMatBr <- broadcast(sc, randomMat)
3030

3131
useBroadcast <- function(x) {
32-
sum(value(randomMatBr) * x)
32+
sum(SparkR:::value(randomMatBr) * x)
3333
}
3434
actual <- collect(lapply(rrdd, useBroadcast))
3535
expected <- list(sum(randomMat) * 1, sum(randomMat) * 2)

0 commit comments

Comments
 (0)