Skip to content

Commit 6035648

Browse files
committed
rebase with upstream
2 parents a81d0fc + 871764c commit 6035648

File tree

255 files changed

+5407
-2341
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

255 files changed

+5407
-2341
lines changed

R/pkg/NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,11 @@ exportMethods("arrange",
6969
"selectExpr",
7070
"show",
7171
"showDF",
72+
"subset",
7273
"summarize",
7374
"summary",
7475
"take",
76+
"transform",
7577
"unionAll",
7678
"unique",
7779
"unpersist",

R/pkg/R/DataFrame.R

Lines changed: 58 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ setMethod("$<-", signature(x = "DataFrame"),
987987

988988
setClassUnion("numericOrcharacter", c("numeric", "character"))
989989

990-
#' @rdname select
990+
#' @rdname subset
991991
#' @name [[
992992
setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
993993
function(x, i) {
@@ -998,7 +998,7 @@ setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
998998
getColumn(x, i)
999999
})
10001000

1001-
#' @rdname select
1001+
#' @rdname subset
10021002
#' @name [
10031003
setMethod("[", signature(x = "DataFrame", i = "missing"),
10041004
function(x, i, j, ...) {
@@ -1012,20 +1012,51 @@ setMethod("[", signature(x = "DataFrame", i = "missing"),
10121012
select(x, j)
10131013
})
10141014

1015-
#' @rdname select
1015+
#' @rdname subset
10161016
#' @name [
10171017
setMethod("[", signature(x = "DataFrame", i = "Column"),
10181018
function(x, i, j, ...) {
10191019
# It could handle i as "character" but it seems confusing and not required
10201020
# https://stat.ethz.ch/R-manual/R-devel/library/base/html/Extract.data.frame.html
10211021
filtered <- filter(x, i)
10221022
if (!missing(j)) {
1023-
filtered[, j]
1023+
filtered[, j, ...]
10241024
} else {
10251025
filtered
10261026
}
10271027
})
10281028

1029+
#' Subset
1030+
#'
1031+
#' Return subsets of DataFrame according to given conditions
1032+
#' @param x A DataFrame
1033+
#' @param subset A logical expression to filter on rows
1034+
#' @param select expression for the single Column or a list of columns to select from the DataFrame
1035+
#' @return A new DataFrame containing only the rows that meet the condition with selected columns
1036+
#' @export
1037+
#' @rdname subset
1038+
#' @name subset
1039+
#' @aliases [
1040+
#' @family subsetting functions
1041+
#' @examples
1042+
#' \dontrun{
1043+
#' # Columns can be selected using `[[` and `[`
1044+
#' df[[2]] == df[["age"]]
1045+
#' df[,2] == df[,"age"]
1046+
#' df[,c("name", "age")]
1047+
#' # Or to filter rows
1048+
#' df[df$age > 20,]
1049+
#' # DataFrame can be subset on both rows and Columns
1050+
#' df[df$name == "Smith", c(1,2)]
1051+
#' df[df$age %in% c(19, 30), 1:2]
1052+
#' subset(df, df$age %in% c(19, 30), 1:2)
1053+
#' subset(df, df$age %in% c(19), select = c(1,2))
1054+
#' }
1055+
setMethod("subset", signature(x = "DataFrame"),
1056+
function(x, subset, select, ...) {
1057+
x[subset, select, ...]
1058+
})
1059+
10291060
#' Select
10301061
#'
10311062
#' Selects a set of columns with names or Column expressions.
@@ -1034,22 +1065,17 @@ setMethod("[", signature(x = "DataFrame", i = "Column"),
10341065
#' @return A new DataFrame with selected columns
10351066
#' @export
10361067
#' @rdname select
1068+
#' @name select
1069+
#' @family subsetting functions
10371070
#' @examples
10381071
#' \dontrun{
10391072
#' select(df, "*")
10401073
#' select(df, "col1", "col2")
10411074
#' select(df, df$name, df$age + 1)
10421075
#' select(df, c("col1", "col2"))
10431076
#' select(df, list(df$name, df$age + 1))
1044-
#' # Columns can also be selected using `[[` and `[`
1045-
#' df[[2]] == df[["age"]]
1046-
#' df[,2] == df[,"age"]
1047-
#' df[,c("name", "age")]
10481077
#' # Similar to R data frames columns can also be selected using `$`
10491078
#' df$age
1050-
#' # It can also be subset on rows and Columns
1051-
#' df[df$name == "Smith", c(1,2)]
1052-
#' df[df$age %in% c(19, 30), 1:2]
10531079
#' }
10541080
setMethod("select", signature(x = "DataFrame", col = "character"),
10551081
function(x, col, ...) {
@@ -1121,7 +1147,7 @@ setMethod("selectExpr",
11211147
#' @return A DataFrame with the new column added.
11221148
#' @rdname withColumn
11231149
#' @name withColumn
1124-
#' @aliases mutate
1150+
#' @aliases mutate transform
11251151
#' @export
11261152
#' @examples
11271153
#'\dontrun{
@@ -1141,11 +1167,12 @@ setMethod("withColumn",
11411167
#'
11421168
#' Return a new DataFrame with the specified columns added.
11431169
#'
1144-
#' @param x A DataFrame
1170+
#' @param .data A DataFrame
11451171
#' @param col a named argument of the form name = col
11461172
#' @return A new DataFrame with the new columns added.
11471173
#' @rdname withColumn
11481174
#' @name mutate
1175+
#' @aliases withColumn transform
11491176
#' @export
11501177
#' @examples
11511178
#'\dontrun{
@@ -1155,10 +1182,12 @@ setMethod("withColumn",
11551182
#' df <- jsonFile(sqlContext, path)
11561183
#' newDF <- mutate(df, newCol = df$col1 * 5, newCol2 = df$col1 * 2)
11571184
#' names(newDF) # Will contain newCol, newCol2
1185+
#' newDF2 <- transform(df, newCol = df$col1 / 5, newCol2 = df$col1 * 2)
11581186
#' }
11591187
setMethod("mutate",
1160-
signature(x = "DataFrame"),
1161-
function(x, ...) {
1188+
signature(.data = "DataFrame"),
1189+
function(.data, ...) {
1190+
x <- .data
11621191
cols <- list(...)
11631192
stopifnot(length(cols) > 0)
11641193
stopifnot(class(cols[[1]]) == "Column")
@@ -1173,6 +1202,16 @@ setMethod("mutate",
11731202
do.call(select, c(x, x$"*", cols))
11741203
})
11751204

1205+
#' @export
1206+
#' @rdname withColumn
1207+
#' @name transform
1208+
#' @aliases withColumn mutate
1209+
setMethod("transform",
1210+
signature(`_data` = "DataFrame"),
1211+
function(`_data`, ...) {
1212+
mutate(`_data`, ...)
1213+
})
1214+
11761215
#' WithColumnRenamed
11771216
#'
11781217
#' Rename an existing column in a DataFrame.
@@ -1300,6 +1339,7 @@ setMethod("orderBy",
13001339
#' @return A DataFrame containing only the rows that meet the condition.
13011340
#' @rdname filter
13021341
#' @name filter
1342+
#' @family subsetting functions
13031343
#' @export
13041344
#' @examples
13051345
#'\dontrun{
@@ -1699,9 +1739,9 @@ setMethod("dropna",
16991739
#' @name na.omit
17001740
#' @export
17011741
setMethod("na.omit",
1702-
signature(x = "DataFrame"),
1703-
function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
1704-
dropna(x, how, minNonNulls, cols)
1742+
signature(object = "DataFrame"),
1743+
function(object, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
1744+
dropna(object, how, minNonNulls, cols)
17051745
})
17061746

17071747
#' fillna

R/pkg/R/SQLContext.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ setMethod("toDF", signature(x = "RDD"),
201201

202202
jsonFile <- function(sqlContext, path) {
203203
# Allow the user to have a more flexible definiton of the text file path
204-
path <- normalizePath(path)
204+
path <- suppressWarnings(normalizePath(path))
205205
# Convert a string vector of paths to a string containing comma separated paths
206206
path <- paste(path, collapse = ",")
207207
sdf <- callJMethod(sqlContext, "jsonFile", path)
@@ -251,7 +251,7 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
251251
# TODO: Implement saveasParquetFile and write examples for both
252252
parquetFile <- function(sqlContext, ...) {
253253
# Allow the user to have a more flexible definiton of the text file path
254-
paths <- lapply(list(...), normalizePath)
254+
paths <- lapply(list(...), function(x) suppressWarnings(normalizePath(x)))
255255
sdf <- callJMethod(sqlContext, "parquetFile", paths)
256256
dataFrame(sdf)
257257
}

R/pkg/R/deserialize.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,10 @@ readTypedObject <- function(con, type) {
5757

5858
readString <- function(con) {
5959
stringLen <- readInt(con)
60-
string <- readBin(con, raw(), stringLen, endian = "big")
61-
rawToChar(string)
60+
raw <- readBin(con, raw(), stringLen, endian = "big")
61+
string <- rawToChar(raw)
62+
Encoding(string) <- "UTF-8"
63+
string
6264
}
6365

6466
readInt <- function(con) {

R/pkg/R/generics.R

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ setGeneric("dropna",
413413
#' @rdname nafunctions
414414
#' @export
415415
setGeneric("na.omit",
416-
function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
416+
function(object, ...) {
417417
standardGeneric("na.omit")
418418
})
419419

@@ -467,7 +467,7 @@ setGeneric("merge")
467467

468468
#' @rdname withColumn
469469
#' @export
470-
setGeneric("mutate", function(x, ...) {standardGeneric("mutate") })
470+
setGeneric("mutate", function(.data, ...) {standardGeneric("mutate") })
471471

472472
#' @rdname arrange
473473
#' @export
@@ -507,6 +507,10 @@ setGeneric("saveAsTable", function(df, tableName, source, mode, ...) {
507507
standardGeneric("saveAsTable")
508508
})
509509

510+
#' @rdname withColumn
511+
#' @export
512+
setGeneric("transform", function(`_data`, ...) {standardGeneric("transform") })
513+
510514
#' @rdname write.df
511515
#' @export
512516
setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") })
@@ -531,6 +535,10 @@ setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr")
531535
#' @export
532536
setGeneric("showDF", function(x,...) { standardGeneric("showDF") })
533537

538+
# @rdname subset
539+
# @export
540+
setGeneric("subset", function(x, subset, select, ...) { standardGeneric("subset") })
541+
534542
#' @rdname agg
535543
#' @export
536544
setGeneric("summarize", function(x,...) { standardGeneric("summarize") })

R/pkg/R/serialize.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ writeJobj <- function(con, value) {
7979
writeString <- function(con, value) {
8080
utfVal <- enc2utf8(value)
8181
writeInt(con, as.integer(nchar(utfVal, type = "bytes") + 1))
82-
writeBin(utfVal, con, endian = "big")
82+
writeBin(utfVal, con, endian = "big", useBytes=TRUE)
8383
}
8484

8585
writeInt <- function(con, value) {

R/pkg/R/sparkR.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ sparkR.init <- function(
160160
})
161161

162162
if (nchar(sparkHome) != 0) {
163-
sparkHome <- normalizePath(sparkHome)
163+
sparkHome <- suppressWarnings(normalizePath(sparkHome))
164164
}
165165

166166
sparkEnvirMap <- new.env()

0 commit comments

Comments
 (0)