Skip to content

Commit 962692b

Browse files
committed
resolve merge conflicts
2 parents 093bbe2 + a140dd7 commit 962692b

File tree

460 files changed

+10714
-7197
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

460 files changed

+10714
-7197
lines changed

R/pkg/NAMESPACE

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ exportMethods("arrange",
4747
"join",
4848
"limit",
4949
"merge",
50+
"mutate",
51+
"na.omit",
5052
"names",
5153
"ncol",
5254
"nrow",
5355
"orderBy",
54-
"mutate",
55-
"names",
5656
"persist",
5757
"printSchema",
5858
"rbind",
@@ -69,9 +69,11 @@ exportMethods("arrange",
6969
"selectExpr",
7070
"show",
7171
"showDF",
72+
"subset",
7273
"summarize",
7374
"summary",
7475
"take",
76+
"transform",
7577
"unionAll",
7678
"unique",
7779
"unpersist",
@@ -82,7 +84,8 @@ exportMethods("arrange",
8284

8385
exportClasses("Column")
8486

85-
exportMethods("abs",
87+
exportMethods("%in%",
88+
"abs",
8689
"acos",
8790
"add_months",
8891
"alias",

R/pkg/R/DataFrame.R

Lines changed: 75 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ setMethod("names<-",
271271
signature(x = "DataFrame"),
272272
function(x, value) {
273273
if (!is.null(value)) {
274-
sdf <- callJMethod(x@sdf, "toDF", listToSeq(as.list(value)))
274+
sdf <- callJMethod(x@sdf, "toDF", as.list(value))
275275
dataFrame(sdf)
276276
}
277277
})
@@ -661,15 +661,15 @@ setMethod("collect",
661661
# listCols is a list of columns
662662
listCols <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "dfToCols", x@sdf)
663663
stopifnot(length(listCols) == ncol)
664-
664+
665665
# An empty data.frame with 0 columns and number of rows as collected
666666
nrow <- length(listCols[[1]])
667667
if (nrow <= 0) {
668668
df <- data.frame()
669669
} else {
670-
df <- data.frame(row.names = 1 : nrow)
670+
df <- data.frame(row.names = 1 : nrow)
671671
}
672-
672+
673673
# Append columns one by one
674674
for (colIndex in 1 : ncol) {
675675
# Note: appending a column of list type into a data.frame so that
@@ -683,7 +683,7 @@ setMethod("collect",
683683
# TODO: more robust check on column of primitive types
684684
vec <- do.call(c, col)
685685
if (class(vec) != "list") {
686-
df[[names[colIndex]]] <- vec
686+
df[[names[colIndex]]] <- vec
687687
} else {
688688
# For columns of complex type, be careful to access them.
689689
# Get a column of complex type returns a list.
@@ -843,10 +843,10 @@ setMethod("groupBy",
843843
function(x, ...) {
844844
cols <- list(...)
845845
if (length(cols) >= 1 && class(cols[[1]]) == "character") {
846-
sgd <- callJMethod(x@sdf, "groupBy", cols[[1]], listToSeq(cols[-1]))
846+
sgd <- callJMethod(x@sdf, "groupBy", cols[[1]], cols[-1])
847847
} else {
848848
jcol <- lapply(cols, function(c) { c@jc })
849-
sgd <- callJMethod(x@sdf, "groupBy", listToSeq(jcol))
849+
sgd <- callJMethod(x@sdf, "groupBy", jcol)
850850
}
851851
groupedData(sgd)
852852
})
@@ -987,7 +987,7 @@ setMethod("$<-", signature(x = "DataFrame"),
987987

988988
setClassUnion("numericOrcharacter", c("numeric", "character"))
989989

990-
#' @rdname select
990+
#' @rdname subset
991991
#' @name [[
992992
setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
993993
function(x, i) {
@@ -998,7 +998,7 @@ setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
998998
getColumn(x, i)
999999
})
10001000

1001-
#' @rdname select
1001+
#' @rdname subset
10021002
#' @name [
10031003
setMethod("[", signature(x = "DataFrame", i = "missing"),
10041004
function(x, i, j, ...) {
@@ -1012,20 +1012,51 @@ setMethod("[", signature(x = "DataFrame", i = "missing"),
10121012
select(x, j)
10131013
})
10141014

1015-
#' @rdname select
1015+
#' @rdname subset
10161016
#' @name [
10171017
setMethod("[", signature(x = "DataFrame", i = "Column"),
10181018
function(x, i, j, ...) {
10191019
# It could handle i as "character" but it seems confusing and not required
10201020
# https://stat.ethz.ch/R-manual/R-devel/library/base/html/Extract.data.frame.html
10211021
filtered <- filter(x, i)
10221022
if (!missing(j)) {
1023-
filtered[, j]
1023+
filtered[, j, ...]
10241024
} else {
10251025
filtered
10261026
}
10271027
})
10281028

1029+
#' Subset
1030+
#'
1031+
#' Return subsets of DataFrame according to given conditions
1032+
#' @param x A DataFrame
1033+
#' @param subset A logical expression to filter on rows
1034+
#' @param select expression for the single Column or a list of columns to select from the DataFrame
1035+
#' @return A new DataFrame containing only the rows that meet the condition with selected columns
1036+
#' @export
1037+
#' @rdname subset
1038+
#' @name subset
1039+
#' @aliases [
1040+
#' @family subsetting functions
1041+
#' @examples
1042+
#' \dontrun{
1043+
#' # Columns can be selected using `[[` and `[`
1044+
#' df[[2]] == df[["age"]]
1045+
#' df[,2] == df[,"age"]
1046+
#' df[,c("name", "age")]
1047+
#' # Or to filter rows
1048+
#' df[df$age > 20,]
1049+
#' # DataFrame can be subset on both rows and Columns
1050+
#' df[df$name == "Smith", c(1,2)]
1051+
#' df[df$age %in% c(19, 30), 1:2]
1052+
#' subset(df, df$age %in% c(19, 30), 1:2)
1053+
#' subset(df, df$age %in% c(19), select = c(1,2))
1054+
#' }
1055+
setMethod("subset", signature(x = "DataFrame"),
1056+
function(x, subset, select, ...) {
1057+
x[subset, select, ...]
1058+
})
1059+
10291060
#' Select
10301061
#'
10311062
#' Selects a set of columns with names or Column expressions.
@@ -1034,26 +1065,21 @@ setMethod("[", signature(x = "DataFrame", i = "Column"),
10341065
#' @return A new DataFrame with selected columns
10351066
#' @export
10361067
#' @rdname select
1068+
#' @name select
1069+
#' @family subsetting functions
10371070
#' @examples
10381071
#' \dontrun{
10391072
#' select(df, "*")
10401073
#' select(df, "col1", "col2")
10411074
#' select(df, df$name, df$age + 1)
10421075
#' select(df, c("col1", "col2"))
10431076
#' select(df, list(df$name, df$age + 1))
1044-
#' # Columns can also be selected using `[[` and `[`
1045-
#' df[[2]] == df[["age"]]
1046-
#' df[,2] == df[,"age"]
1047-
#' df[,c("name", "age")]
10481077
#' # Similar to R data frames columns can also be selected using `$`
10491078
#' df$age
1050-
#' # It can also be subset on rows and Columns
1051-
#' df[df$name == "Smith", c(1,2)]
1052-
#' df[df$age %in% c(19, 30), 1:2]
10531079
#' }
10541080
setMethod("select", signature(x = "DataFrame", col = "character"),
10551081
function(x, col, ...) {
1056-
sdf <- callJMethod(x@sdf, "select", col, toSeq(...))
1082+
sdf <- callJMethod(x@sdf, "select", col, list(...))
10571083
dataFrame(sdf)
10581084
})
10591085

@@ -1064,7 +1090,7 @@ setMethod("select", signature(x = "DataFrame", col = "Column"),
10641090
jcols <- lapply(list(col, ...), function(c) {
10651091
c@jc
10661092
})
1067-
sdf <- callJMethod(x@sdf, "select", listToSeq(jcols))
1093+
sdf <- callJMethod(x@sdf, "select", jcols)
10681094
dataFrame(sdf)
10691095
})
10701096

@@ -1080,7 +1106,7 @@ setMethod("select",
10801106
col(c)@jc
10811107
}
10821108
})
1083-
sdf <- callJMethod(x@sdf, "select", listToSeq(cols))
1109+
sdf <- callJMethod(x@sdf, "select", cols)
10841110
dataFrame(sdf)
10851111
})
10861112

@@ -1107,7 +1133,7 @@ setMethod("selectExpr",
11071133
signature(x = "DataFrame", expr = "character"),
11081134
function(x, expr, ...) {
11091135
exprList <- list(expr, ...)
1110-
sdf <- callJMethod(x@sdf, "selectExpr", listToSeq(exprList))
1136+
sdf <- callJMethod(x@sdf, "selectExpr", exprList)
11111137
dataFrame(sdf)
11121138
})
11131139

@@ -1121,7 +1147,7 @@ setMethod("selectExpr",
11211147
#' @return A DataFrame with the new column added.
11221148
#' @rdname withColumn
11231149
#' @name withColumn
1124-
#' @aliases mutate
1150+
#' @aliases mutate transform
11251151
#' @export
11261152
#' @examples
11271153
#'\dontrun{
@@ -1141,11 +1167,12 @@ setMethod("withColumn",
11411167
#'
11421168
#' Return a new DataFrame with the specified columns added.
11431169
#'
1144-
#' @param x A DataFrame
1170+
#' @param .data A DataFrame
11451171
#' @param col a named argument of the form name = col
11461172
#' @return A new DataFrame with the new columns added.
11471173
#' @rdname withColumn
11481174
#' @name mutate
1175+
#' @aliases withColumn transform
11491176
#' @export
11501177
#' @examples
11511178
#'\dontrun{
@@ -1155,10 +1182,12 @@ setMethod("withColumn",
11551182
#' df <- jsonFile(sqlContext, path)
11561183
#' newDF <- mutate(df, newCol = df$col1 * 5, newCol2 = df$col1 * 2)
11571184
#' names(newDF) # Will contain newCol, newCol2
1185+
#' newDF2 <- transform(df, newCol = df$col1 / 5, newCol2 = df$col1 * 2)
11581186
#' }
11591187
setMethod("mutate",
1160-
signature(x = "DataFrame"),
1161-
function(x, ...) {
1188+
signature(.data = "DataFrame"),
1189+
function(.data, ...) {
1190+
x <- .data
11621191
cols <- list(...)
11631192
stopifnot(length(cols) > 0)
11641193
stopifnot(class(cols[[1]]) == "Column")
@@ -1173,6 +1202,16 @@ setMethod("mutate",
11731202
do.call(select, c(x, x$"*", cols))
11741203
})
11751204

1205+
#' @export
1206+
#' @rdname withColumn
1207+
#' @name transform
1208+
#' @aliases withColumn mutate
1209+
setMethod("transform",
1210+
signature(`_data` = "DataFrame"),
1211+
function(`_data`, ...) {
1212+
mutate(`_data`, ...)
1213+
})
1214+
11761215
#' WithColumnRenamed
11771216
#'
11781217
#' Rename an existing column in a DataFrame.
@@ -1272,12 +1311,12 @@ setMethod("arrange",
12721311
signature(x = "DataFrame", col = "characterOrColumn"),
12731312
function(x, col, ...) {
12741313
if (class(col) == "character") {
1275-
sdf <- callJMethod(x@sdf, "sort", col, toSeq(...))
1314+
sdf <- callJMethod(x@sdf, "sort", col, list(...))
12761315
} else if (class(col) == "Column") {
12771316
jcols <- lapply(list(col, ...), function(c) {
12781317
c@jc
12791318
})
1280-
sdf <- callJMethod(x@sdf, "sort", listToSeq(jcols))
1319+
sdf <- callJMethod(x@sdf, "sort", jcols)
12811320
}
12821321
dataFrame(sdf)
12831322
})
@@ -1300,6 +1339,7 @@ setMethod("orderBy",
13001339
#' @return A DataFrame containing only the rows that meet the condition.
13011340
#' @rdname filter
13021341
#' @name filter
1342+
#' @family subsetting functions
13031343
#' @export
13041344
#' @examples
13051345
#'\dontrun{
@@ -1624,7 +1664,7 @@ setMethod("describe",
16241664
signature(x = "DataFrame", col = "character"),
16251665
function(x, col, ...) {
16261666
colList <- list(col, ...)
1627-
sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
1667+
sdf <- callJMethod(x@sdf, "describe", colList)
16281668
dataFrame(sdf)
16291669
})
16301670

@@ -1634,7 +1674,7 @@ setMethod("describe",
16341674
signature(x = "DataFrame"),
16351675
function(x) {
16361676
colList <- as.list(c(columns(x)))
1637-
sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
1677+
sdf <- callJMethod(x@sdf, "describe", colList)
16381678
dataFrame(sdf)
16391679
})
16401680

@@ -1691,17 +1731,17 @@ setMethod("dropna",
16911731

16921732
naFunctions <- callJMethod(x@sdf, "na")
16931733
sdf <- callJMethod(naFunctions, "drop",
1694-
as.integer(minNonNulls), listToSeq(as.list(cols)))
1734+
as.integer(minNonNulls), as.list(cols))
16951735
dataFrame(sdf)
16961736
})
16971737

16981738
#' @rdname nafunctions
16991739
#' @name na.omit
17001740
#' @export
17011741
setMethod("na.omit",
1702-
signature(x = "DataFrame"),
1703-
function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
1704-
dropna(x, how, minNonNulls, cols)
1742+
signature(object = "DataFrame"),
1743+
function(object, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
1744+
dropna(object, how, minNonNulls, cols)
17051745
})
17061746

17071747
#' fillna
@@ -1775,7 +1815,7 @@ setMethod("fillna",
17751815
sdf <- if (length(cols) == 0) {
17761816
callJMethod(naFunctions, "fill", value)
17771817
} else {
1778-
callJMethod(naFunctions, "fill", value, listToSeq(as.list(cols)))
1818+
callJMethod(naFunctions, "fill", value, as.list(cols))
17791819
}
17801820
dataFrame(sdf)
17811821
})

R/pkg/R/SQLContext.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ infer_type <- function(x) {
4949
stopifnot(length(x) > 0)
5050
names <- names(x)
5151
if (is.null(names)) {
52-
list(type = "array", elementType = infer_type(x[[1]]), containsNull = TRUE)
52+
paste0("array<", infer_type(x[[1]]), ">")
5353
} else {
5454
# StructType
5555
types <- lapply(x, infer_type)
@@ -59,7 +59,7 @@ infer_type <- function(x) {
5959
do.call(structType, fields)
6060
}
6161
} else if (length(x) > 1) {
62-
list(type = "array", elementType = type, containsNull = TRUE)
62+
paste0("array<", infer_type(x[[1]]), ">")
6363
} else {
6464
type
6565
}
@@ -201,7 +201,7 @@ setMethod("toDF", signature(x = "RDD"),
201201

202202
jsonFile <- function(sqlContext, path) {
203203
# Allow the user to have a more flexible definiton of the text file path
204-
path <- normalizePath(path)
204+
path <- suppressWarnings(normalizePath(path))
205205
# Convert a string vector of paths to a string containing comma separated paths
206206
path <- paste(path, collapse = ",")
207207
sdf <- callJMethod(sqlContext, "jsonFile", path)
@@ -251,7 +251,7 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
251251
# TODO: Implement saveasParquetFile and write examples for both
252252
parquetFile <- function(sqlContext, ...) {
253253
# Allow the user to have a more flexible definiton of the text file path
254-
paths <- lapply(list(...), normalizePath)
254+
paths <- lapply(list(...), function(x) suppressWarnings(normalizePath(x)))
255255
sdf <- callJMethod(sqlContext, "parquetFile", paths)
256256
dataFrame(sdf)
257257
}

R/pkg/R/column.R

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,7 @@ setMethod("cast",
211211
setMethod("%in%",
212212
signature(x = "Column"),
213213
function(x, table) {
214-
table <- listToSeq(as.list(table))
215-
jc <- callJMethod(x@jc, "in", table)
214+
jc <- callJMethod(x@jc, "in", as.list(table))
216215
return(column(jc))
217216
})
218217

0 commit comments

Comments
 (0)