Skip to content

Commit 106fd8e

Browse files
committed
Merge pull request #2 from apache/master
pull latest from apache spark
2 parents e3677c9 + 90527f5 commit 106fd8e

File tree

352 files changed

+47129
-19957
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

352 files changed

+47129
-19957
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ scalastyle.txt
6565
scalastyle-output.xml
6666
R-unit-tests.log
6767
R/unit-tests.out
68+
python/lib/pyspark.zip
6869

6970
# For Hive
7071
metastore_db/

.rat-excludes

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ graphlib-dot.min.js
3636
sorttable.js
3737
vis.min.js
3838
vis.min.css
39-
vis.map
4039
.*avsc
4140
.*txt
4241
.*json
@@ -80,6 +79,6 @@ local-1422981780767/*
8079
local-1425081759269/*
8180
local-1426533911241/*
8281
local-1426633911242/*
83-
local-1427397477963/*
82+
local-1430917381534/*
8483
DESCRIPTION
8584
NAMESPACE

R/pkg/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ Suggests:
1515
Description: R frontend for Spark
1616
License: Apache License (== 2.0)
1717
Collate:
18+
'schema.R'
1819
'generics.R'
1920
'jobj.R'
2021
'RDD.R'
2122
'pairRDD.R'
22-
'schema.R'
2323
'column.R'
2424
'group.R'
2525
'DataFrame.R'

R/pkg/NAMESPACE

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ exportMethods("cache",
1313
"collect",
1414
"columns",
1515
"count",
16+
"describe",
1617
"distinct",
1718
"dtypes",
1819
"except",
@@ -25,7 +26,6 @@ exportMethods("cache",
2526
"intersect",
2627
"isLocal",
2728
"join",
28-
"length",
2929
"limit",
3030
"orderBy",
3131
"names",
@@ -44,8 +44,6 @@ exportMethods("cache",
4444
"showDF",
4545
"sortDF",
4646
"take",
47-
"toJSON",
48-
"toRDD",
4947
"unionAll",
5048
"unpersist",
5149
"where",
@@ -94,19 +92,14 @@ export("cacheTable",
9492
"createExternalTable",
9593
"dropTempTable",
9694
"jsonFile",
97-
"jsonRDD",
9895
"loadDF",
9996
"parquetFile",
10097
"sql",
10198
"table",
10299
"tableNames",
103100
"tables",
104-
"toDF",
105101
"uncacheTable")
106102

107-
export("sparkRSQL.init",
108-
"sparkRHive.init")
109-
110103
export("structField",
111104
"structField.jobj",
112105
"structField.character",

R/pkg/R/DataFrame.R

Lines changed: 88 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
4545

4646
#' @rdname DataFrame
4747
#' @export
48+
#'
49+
#' @param sdf A Java object reference to the backing Scala DataFrame
50+
#' @param isCached TRUE if the dataFrame is cached
4851
dataFrame <- function(sdf, isCached = FALSE) {
4952
new("DataFrame", sdf, isCached)
5053
}
@@ -244,7 +247,7 @@ setMethod("columns",
244247
})
245248

246249
#' @rdname columns
247-
#' @export
250+
#' @aliases names,DataFrame,function-method
248251
setMethod("names",
249252
signature(x = "DataFrame"),
250253
function(x) {
@@ -272,7 +275,7 @@ setMethod("names",
272275
setMethod("registerTempTable",
273276
signature(x = "DataFrame", tableName = "character"),
274277
function(x, tableName) {
275-
callJMethod(x@sdf, "registerTempTable", tableName)
278+
invisible(callJMethod(x@sdf, "registerTempTable", tableName))
276279
})
277280

278281
#' insertInto
@@ -399,23 +402,23 @@ setMethod("repartition",
399402
dataFrame(sdf)
400403
})
401404

402-
#' toJSON
403-
#'
404-
#' Convert the rows of a DataFrame into JSON objects and return an RDD where
405-
#' each element contains a JSON string.
406-
#'
407-
#' @param x A SparkSQL DataFrame
408-
#' @return A StringRRDD of JSON objects
409-
#' @rdname tojson
410-
#' @export
411-
#' @examples
412-
#'\dontrun{
413-
#' sc <- sparkR.init()
414-
#' sqlCtx <- sparkRSQL.init(sc)
415-
#' path <- "path/to/file.json"
416-
#' df <- jsonFile(sqlCtx, path)
417-
#' newRDD <- toJSON(df)
418-
#'}
405+
# toJSON
406+
#
407+
# Convert the rows of a DataFrame into JSON objects and return an RDD where
408+
# each element contains a JSON string.
409+
#
410+
#@param x A SparkSQL DataFrame
411+
# @return A StringRRDD of JSON objects
412+
# @rdname tojson
413+
# @export
414+
# @examples
415+
#\dontrun{
416+
# sc <- sparkR.init()
417+
# sqlCtx <- sparkRSQL.init(sc)
418+
# path <- "path/to/file.json"
419+
# df <- jsonFile(sqlCtx, path)
420+
# newRDD <- toJSON(df)
421+
#}
419422
setMethod("toJSON",
420423
signature(x = "DataFrame"),
421424
function(x) {
@@ -578,8 +581,8 @@ setMethod("limit",
578581
dataFrame(res)
579582
})
580583

581-
# Take the first NUM rows of a DataFrame and return a the results as a data.frame
582-
584+
#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
585+
#'
583586
#' @rdname take
584587
#' @export
585588
#' @examples
@@ -644,22 +647,22 @@ setMethod("first",
644647
take(x, 1)
645648
})
646649

647-
#' toRDD()
648-
#'
649-
#' Converts a Spark DataFrame to an RDD while preserving column names.
650-
#'
651-
#' @param x A Spark DataFrame
652-
#'
653-
#' @rdname DataFrame
654-
#' @export
655-
#' @examples
656-
#'\dontrun{
657-
#' sc <- sparkR.init()
658-
#' sqlCtx <- sparkRSQL.init(sc)
659-
#' path <- "path/to/file.json"
660-
#' df <- jsonFile(sqlCtx, path)
661-
#' rdd <- toRDD(df)
662-
#' }
650+
# toRDD()
651+
#
652+
# Converts a Spark DataFrame to an RDD while preserving column names.
653+
#
654+
# @param x A Spark DataFrame
655+
#
656+
# @rdname DataFrame
657+
# @export
658+
# @examples
659+
#\dontrun{
660+
# sc <- sparkR.init()
661+
# sqlCtx <- sparkRSQL.init(sc)
662+
# path <- "path/to/file.json"
663+
# df <- jsonFile(sqlCtx, path)
664+
# rdd <- toRDD(df)
665+
# }
663666
setMethod("toRDD",
664667
signature(x = "DataFrame"),
665668
function(x) {
@@ -706,6 +709,7 @@ setMethod("groupBy",
706709
#'
707710
#' Compute aggregates by specifying a list of columns
708711
#'
712+
#' @param x a DataFrame
709713
#' @rdname DataFrame
710714
#' @export
711715
setMethod("agg",
@@ -721,53 +725,53 @@ setMethod("agg",
721725
# the requested map function. #
722726
###################################################################################
723727

724-
#' @rdname lapply
728+
# @rdname lapply
725729
setMethod("lapply",
726730
signature(X = "DataFrame", FUN = "function"),
727731
function(X, FUN) {
728732
rdd <- toRDD(X)
729733
lapply(rdd, FUN)
730734
})
731735

732-
#' @rdname lapply
736+
# @rdname lapply
733737
setMethod("map",
734738
signature(X = "DataFrame", FUN = "function"),
735739
function(X, FUN) {
736740
lapply(X, FUN)
737741
})
738742

739-
#' @rdname flatMap
743+
# @rdname flatMap
740744
setMethod("flatMap",
741745
signature(X = "DataFrame", FUN = "function"),
742746
function(X, FUN) {
743747
rdd <- toRDD(X)
744748
flatMap(rdd, FUN)
745749
})
746750

747-
#' @rdname lapplyPartition
751+
# @rdname lapplyPartition
748752
setMethod("lapplyPartition",
749753
signature(X = "DataFrame", FUN = "function"),
750754
function(X, FUN) {
751755
rdd <- toRDD(X)
752756
lapplyPartition(rdd, FUN)
753757
})
754758

755-
#' @rdname lapplyPartition
759+
# @rdname lapplyPartition
756760
setMethod("mapPartitions",
757761
signature(X = "DataFrame", FUN = "function"),
758762
function(X, FUN) {
759763
lapplyPartition(X, FUN)
760764
})
761765

762-
#' @rdname foreach
766+
# @rdname foreach
763767
setMethod("foreach",
764768
signature(x = "DataFrame", func = "function"),
765769
function(x, func) {
766770
rdd <- toRDD(x)
767771
foreach(rdd, func)
768772
})
769773

770-
#' @rdname foreach
774+
# @rdname foreach
771775
setMethod("foreachPartition",
772776
signature(x = "DataFrame", func = "function"),
773777
function(x, func) {
@@ -788,6 +792,7 @@ setMethod("$", signature(x = "DataFrame"),
788792
getColumn(x, name)
789793
})
790794

795+
#' @rdname select
791796
setMethod("$<-", signature(x = "DataFrame"),
792797
function(x, name, value) {
793798
stopifnot(class(value) == "Column" || is.null(value))
@@ -1009,7 +1014,7 @@ setMethod("sortDF",
10091014
})
10101015

10111016
#' @rdname sortDF
1012-
#' @export
1017+
#' @aliases orderBy,DataFrame,function-method
10131018
setMethod("orderBy",
10141019
signature(x = "DataFrame", col = "characterOrColumn"),
10151020
function(x, col) {
@@ -1046,7 +1051,7 @@ setMethod("filter",
10461051
})
10471052

10481053
#' @rdname filter
1049-
#' @export
1054+
#' @aliases where,DataFrame,function-method
10501055
setMethod("where",
10511056
signature(x = "DataFrame", condition = "characterOrColumn"),
10521057
function(x, condition) {
@@ -1276,3 +1281,40 @@ setMethod("saveAsTable",
12761281
callJMethod(df@sdf, "saveAsTable", tableName, source, jmode, options)
12771282
})
12781283

1284+
#' describe
1285+
#'
1286+
#' Computes statistics for numeric columns.
1287+
#' If no columns are given, this function computes statistics for all numerical columns.
1288+
#'
1289+
#' @param x A DataFrame to be computed.
1290+
#' @param col A string of name
1291+
#' @param ... Additional expressions
1292+
#' @return A DataFrame
1293+
#' @rdname describe
1294+
#' @export
1295+
#' @examples
1296+
#'\dontrun{
1297+
#' sc <- sparkR.init()
1298+
#' sqlCtx <- sparkRSQL.init(sc)
1299+
#' path <- "path/to/file.json"
1300+
#' df <- jsonFile(sqlCtx, path)
1301+
#' describe(df)
1302+
#' describe(df, "col1")
1303+
#' describe(df, "col1", "col2")
1304+
#' }
1305+
setMethod("describe",
1306+
signature(x = "DataFrame", col = "character"),
1307+
function(x, col, ...) {
1308+
colList <- list(col, ...)
1309+
sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
1310+
dataFrame(sdf)
1311+
})
1312+
1313+
#' @rdname describe
1314+
setMethod("describe",
1315+
signature(x = "DataFrame"),
1316+
function(x) {
1317+
colList <- as.list(c(columns(x)))
1318+
sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
1319+
dataFrame(sdf)
1320+
})

0 commit comments

Comments
 (0)