apache
diff --git a/‎R/pkg/NAMESPACE
Lines changed: 2 additions & 0 deletions b/‎R/pkg/NAMESPACE
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/pkg/R/DataFrame.R
Lines changed: 58 additions & 18 deletions b/‎R/pkg/R/DataFrame.R
Lines changed: 58 additions & 18 deletions
diff --git a/‎R/pkg/R/SQLContext.R
Lines changed: 2 additions & 2 deletions b/‎R/pkg/R/SQLContext.R
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/pkg/R/deserialize.R
Lines changed: 4 additions & 2 deletions b/‎R/pkg/R/deserialize.R
Lines changed: 4 additions & 2 deletions
diff --git a/‎R/pkg/R/generics.R
Lines changed: 10 additions & 2 deletions b/‎R/pkg/R/generics.R
Lines changed: 10 additions & 2 deletions
diff --git a/‎R/pkg/R/serialize.R
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/serialize.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/sparkR.R
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/sparkR.R
Lines changed: 1 addition & 1 deletion
@@ -69,9 +69,11 @@ exportMethods("arrange",
               "selectExpr",
               "show",
               "showDF",
+              "subset",
               "summarize",
               "summary",
               "take",
+              "transform",
               "unionAll",
               "unique",
               "unpersist",
 
@@ -987,7 +987,7 @@ setMethod("$<-", signature(x = "DataFrame"),
 
 setClassUnion("numericOrcharacter", c("numeric", "character"))
 
-#' @rdname select
+#' @rdname subset
 #' @name [[
 setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
           function(x, i) {
@@ -998,7 +998,7 @@ setMethod("[[", signature(x = "DataFrame", i = "numericOrcharacter"),
             getColumn(x, i)
           })
 
-#' @rdname select
+#' @rdname subset
 #' @name [
 setMethod("[", signature(x = "DataFrame", i = "missing"),
           function(x, i, j, ...) {
@@ -1012,20 +1012,51 @@ setMethod("[", signature(x = "DataFrame", i = "missing"),
             select(x, j)
           })
 
-#' @rdname select
+#' @rdname subset
 #' @name [
 setMethod("[", signature(x = "DataFrame", i = "Column"),
           function(x, i, j, ...) {
             # It could handle i as "character" but it seems confusing and not required
             # https://stat.ethz.ch/R-manual/R-devel/library/base/html/Extract.data.frame.html
             filtered <- filter(x, i)
             if (!missing(j)) {
-              filtered[, j]
+              filtered[, j, ...]
             } else {
               filtered
             }
           })
 
+#' Subset
+#'
+#' Return subsets of DataFrame according to given conditions
+#' @param x A DataFrame
+#' @param subset A logical expression to filter on rows
+#' @param select expression for the single Column or a list of columns to select from the DataFrame
+#' @return A new DataFrame containing only the rows that meet the condition with selected columns
+#' @export
+#' @rdname subset
+#' @name subset
+#' @aliases [
+#' @family subsetting functions
+#' @examples
+#' \dontrun{
+#'   # Columns can be selected using `[[` and `[`
+#'   df[[2]] == df[["age"]]
+#'   df[,2] == df[,"age"]
+#'   df[,c("name", "age")]
+#'   # Or to filter rows
+#'   df[df$age > 20,]
+#'   # DataFrame can be subset on both rows and Columns
+#'   df[df$name == "Smith", c(1,2)]
+#'   df[df$age %in% c(19, 30), 1:2]
+#'   subset(df, df$age %in% c(19, 30), 1:2)
+#'   subset(df, df$age %in% c(19), select = c(1,2))
+#' }
+setMethod("subset", signature(x = "DataFrame"),
+          function(x, subset, select, ...) {
+            x[subset, select, ...]
+          })
+
 #' Select
 #'
 #' Selects a set of columns with names or Column expressions.
@@ -1034,22 +1065,17 @@ setMethod("[", signature(x = "DataFrame", i = "Column"),
 #' @return A new DataFrame with selected columns
 #' @export
 #' @rdname select
+#' @name select
+#' @family subsetting functions
 #' @examples
 #' \dontrun{
 #'   select(df, "*")
 #'   select(df, "col1", "col2")
 #'   select(df, df$name, df$age + 1)
 #'   select(df, c("col1", "col2"))
 #'   select(df, list(df$name, df$age + 1))
-#'   # Columns can also be selected using `[[` and `[`
-#'   df[[2]] == df[["age"]]
-#'   df[,2] == df[,"age"]
-#'   df[,c("name", "age")]
 #'   # Similar to R data frames columns can also be selected using `$`
 #'   df$age
-#'   # It can also be subset on rows and Columns
-#'   df[df$name == "Smith", c(1,2)]
-#'   df[df$age %in% c(19, 30), 1:2]
 #' }
 setMethod("select", signature(x = "DataFrame", col = "character"),
           function(x, col, ...) {
@@ -1121,7 +1147,7 @@ setMethod("selectExpr",
 #' @return A DataFrame with the new column added.
 #' @rdname withColumn
 #' @name withColumn
-#' @aliases mutate
+#' @aliases mutate transform
 #' @export
 #' @examples
 #'\dontrun{
@@ -1141,11 +1167,12 @@ setMethod("withColumn",
 #'
 #' Return a new DataFrame with the specified columns added.
 #'
-#' @param x A DataFrame
+#' @param .data A DataFrame
 #' @param col a named argument of the form name = col
 #' @return A new DataFrame with the new columns added.
 #' @rdname withColumn
 #' @name mutate
+#' @aliases withColumn transform
 #' @export
 #' @examples
 #'\dontrun{
@@ -1155,10 +1182,12 @@ setMethod("withColumn",
 #' df <- jsonFile(sqlContext, path)
 #' newDF <- mutate(df, newCol = df$col1 * 5, newCol2 = df$col1 * 2)
 #' names(newDF) # Will contain newCol, newCol2
+#' newDF2 <- transform(df, newCol = df$col1 / 5, newCol2 = df$col1 * 2)
 #' }
 setMethod("mutate",
-          signature(x = "DataFrame"),
-          function(x, ...) {
+          signature(.data = "DataFrame"),
+          function(.data, ...) {
+            x <- .data
             cols <- list(...)
             stopifnot(length(cols) > 0)
             stopifnot(class(cols[[1]]) == "Column")
@@ -1173,6 +1202,16 @@ setMethod("mutate",
             do.call(select, c(x, x$"*", cols))
           })
 
+#' @export
+#' @rdname withColumn
+#' @name transform
+#' @aliases withColumn mutate
+setMethod("transform",
+          signature(`_data` = "DataFrame"),
+          function(`_data`, ...) {
+            mutate(`_data`, ...)
+          })
+
 #' WithColumnRenamed
 #'
 #' Rename an existing column in a DataFrame.
@@ -1300,6 +1339,7 @@ setMethod("orderBy",
 #' @return A DataFrame containing only the rows that meet the condition.
 #' @rdname filter
 #' @name filter
+#' @family subsetting functions
 #' @export
 #' @examples
 #'\dontrun{
@@ -1699,9 +1739,9 @@ setMethod("dropna",
 #' @name na.omit
 #' @export
 setMethod("na.omit",
-          signature(x = "DataFrame"),
-          function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
-            dropna(x, how, minNonNulls, cols)
+          signature(object = "DataFrame"),
+          function(object, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
+            dropna(object, how, minNonNulls, cols)
           })
 
 #' fillna
 
@@ -201,7 +201,7 @@ setMethod("toDF", signature(x = "RDD"),
 
 jsonFile <- function(sqlContext, path) {
   # Allow the user to have a more flexible definiton of the text file path
-  path <- normalizePath(path)
+  path <- suppressWarnings(normalizePath(path))
   # Convert a string vector of paths to a string containing comma separated paths
   path <- paste(path, collapse = ",")
   sdf <- callJMethod(sqlContext, "jsonFile", path)
@@ -251,7 +251,7 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
 # TODO: Implement saveasParquetFile and write examples for both
 parquetFile <- function(sqlContext, ...) {
   # Allow the user to have a more flexible definiton of the text file path
-  paths <- lapply(list(...), normalizePath)
+  paths <- lapply(list(...), function(x) suppressWarnings(normalizePath(x)))
   sdf <- callJMethod(sqlContext, "parquetFile", paths)
   dataFrame(sdf)
 }
 
@@ -57,8 +57,10 @@ readTypedObject <- function(con, type) {
 
 readString <- function(con) {
   stringLen <- readInt(con)
-  string <- readBin(con, raw(), stringLen, endian = "big")
-  rawToChar(string)
+  raw <- readBin(con, raw(), stringLen, endian = "big")
+  string <- rawToChar(raw)
+  Encoding(string) <- "UTF-8"
+  string
 }
 
 readInt <- function(con) {
 
@@ -413,7 +413,7 @@ setGeneric("dropna",
 #' @rdname nafunctions
 #' @export
 setGeneric("na.omit",
-           function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
+           function(object, ...) {
              standardGeneric("na.omit")
            })
 
@@ -467,7 +467,7 @@ setGeneric("merge")
 
 #' @rdname withColumn
 #' @export
-setGeneric("mutate", function(x, ...) {standardGeneric("mutate") })
+setGeneric("mutate", function(.data, ...) {standardGeneric("mutate") })
 
 #' @rdname arrange
 #' @export
@@ -507,6 +507,10 @@ setGeneric("saveAsTable", function(df, tableName, source, mode, ...) {
   standardGeneric("saveAsTable")
 })
 
+#' @rdname withColumn
+#' @export
+setGeneric("transform", function(`_data`, ...) {standardGeneric("transform") })
+
 #' @rdname write.df
 #' @export
 setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") })
@@ -531,6 +535,10 @@ setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr")
 #' @export
 setGeneric("showDF", function(x,...) { standardGeneric("showDF") })
 
+# @rdname subset
+# @export
+setGeneric("subset", function(x, subset, select, ...) { standardGeneric("subset") })
+
 #' @rdname agg
 #' @export
 setGeneric("summarize", function(x,...) { standardGeneric("summarize") })
 
@@ -79,7 +79,7 @@ writeJobj <- function(con, value) {
 writeString <- function(con, value) {
   utfVal <- enc2utf8(value)
   writeInt(con, as.integer(nchar(utfVal, type = "bytes") + 1))
-  writeBin(utfVal, con, endian = "big")
+  writeBin(utfVal, con, endian = "big", useBytes=TRUE)
 }
 
 writeInt <- function(con, value) {
 
@@ -160,7 +160,7 @@ sparkR.init <- function(
   })
 
   if (nchar(sparkHome) != 0) {
-    sparkHome <- normalizePath(sparkHome)
+    sparkHome <- suppressWarnings(normalizePath(sparkHome))
   }
 
   sparkEnvirMap <- new.env()
Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,7 @@ writeJobj <- function(con, value) {`
`79`	`79`	`writeString <- function(con, value) {`
`80`	`80`	`utfVal <- enc2utf8(value)`
`81`	`81`	`writeInt(con, as.integer(nchar(utfVal, type = "bytes") + 1))`
`82`		`- writeBin(utfVal, con, endian = "big")`
	`82`	`+ writeBin(utfVal, con, endian = "big", useBytes=TRUE)`
`83`	`83`	`}`
`84`	`84`
`85`	`85`	`writeInt <- function(con, value) {`
Original file line number	Diff line number	Diff line change
`@@ -160,7 +160,7 @@ sparkR.init <- function(`
`160`	`160`	`})`
`161`	`161`
`162`	`162`	`if (nchar(sparkHome) != 0) {`
`163`		`- sparkHome <- normalizePath(sparkHome)`
	`163`	`+ sparkHome <- suppressWarnings(normalizePath(sparkHome))`
`164`	`164`	`}`
`165`	`165`
`166`	`166`	`sparkEnvirMap <- new.env()`