Skip to content

Commit dbe3cff

Browse files
committed
Merge remote-tracking branch 'upstream/master' into ldaonline
2 parents 15be071 + 4d9e560 commit dbe3cff

File tree

387 files changed

+12998
-3829
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

387 files changed

+12998
-3829
lines changed

CONTRIBUTING.md

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
## Contributing to Spark
22

3-
Contributions via GitHub pull requests are gladly accepted from their original
4-
author. Along with any pull requests, please state that the contribution is
5-
your original work and that you license the work to the project under the
6-
project's open source license. Whether or not you state this explicitly, by
7-
submitting any copyrighted material via pull request, email, or other means
8-
you agree to license the material under the project's open source license and
9-
warrant that you have the legal authority to do so.
3+
*Before opening a pull request*, review the
4+
[Contributing to Spark wiki](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark).
5+
It lists steps that are required before creating a PR. In particular, consider:
6+
7+
- Is the change important and ready enough to ask the community to spend time reviewing?
8+
- Have you searched for existing, related JIRAs and pull requests?
9+
- Is this a new feature that can stand alone as a package on http://spark-packages.org ?
10+
- Is the change being proposed clearly explained and motivated?
1011

11-
Please see the [Contributing to Spark wiki page](https://cwiki.apache.org/SPARK/Contributing+to+Spark)
12-
for more information.
12+
When you contribute code, you affirm that the contribution is your original work and that you
13+
license the work to the project under the project's open source license. Whether or not you
14+
state this explicitly, by submitting any copyrighted material via pull request, email, or
15+
other means you agree to license the material under the project's open source license and
16+
warrant that you have the legal authority to do so.

R/pkg/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ Collate:
1919
'jobj.R'
2020
'RDD.R'
2121
'pairRDD.R'
22-
'SQLTypes.R'
22+
'schema.R'
2323
'column.R'
2424
'group.R'
2525
'DataFrame.R'

R/pkg/NAMESPACE

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ exportMethods(
55
"aggregateByKey",
66
"aggregateRDD",
77
"cache",
8+
"cartesian",
89
"checkpoint",
910
"coalesce",
1011
"cogroup",
@@ -28,6 +29,7 @@ exportMethods(
2829
"fullOuterJoin",
2930
"glom",
3031
"groupByKey",
32+
"intersection",
3133
"join",
3234
"keyBy",
3335
"keys",
@@ -52,11 +54,14 @@ exportMethods(
5254
"reduceByKeyLocally",
5355
"repartition",
5456
"rightOuterJoin",
57+
"sampleByKey",
5558
"sampleRDD",
5659
"saveAsTextFile",
5760
"saveAsObjectFile",
5861
"sortBy",
5962
"sortByKey",
63+
"subtract",
64+
"subtractByKey",
6065
"sumRDD",
6166
"take",
6267
"takeOrdered",
@@ -66,6 +71,7 @@ exportMethods(
6671
"unpersist",
6772
"value",
6873
"values",
74+
"zipPartitions",
6975
"zipRDD",
7076
"zipWithIndex",
7177
"zipWithUniqueId"
@@ -95,6 +101,7 @@ exportClasses("DataFrame")
95101
exportMethods("columns",
96102
"distinct",
97103
"dtypes",
104+
"except",
98105
"explain",
99106
"filter",
100107
"groupBy",
@@ -118,7 +125,6 @@ exportMethods("columns",
118125
"show",
119126
"showDF",
120127
"sortDF",
121-
"subtract",
122128
"toJSON",
123129
"toRDD",
124130
"unionAll",
@@ -178,5 +184,14 @@ export("cacheTable",
178184
"toDF",
179185
"uncacheTable")
180186

181-
export("print.structType",
182-
"print.structField")
187+
export("sparkRSQL.init",
188+
"sparkRHive.init")
189+
190+
export("structField",
191+
"structField.jobj",
192+
"structField.character",
193+
"print.structField",
194+
"structType",
195+
"structType.jobj",
196+
"structType.structField",
197+
"print.structType")

R/pkg/R/DataFrame.R

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
# DataFrame.R - DataFrame class and methods implemented in S4 OO classes
1919

20-
#' @include generics.R jobj.R SQLTypes.R RDD.R pairRDD.R column.R group.R
20+
#' @include generics.R jobj.R schema.R RDD.R pairRDD.R column.R group.R
2121
NULL
2222

2323
setOldClass("jobj")
@@ -790,9 +790,12 @@ setMethod("$", signature(x = "DataFrame"),
790790

791791
setMethod("$<-", signature(x = "DataFrame"),
792792
function(x, name, value) {
793-
stopifnot(class(value) == "Column")
793+
stopifnot(class(value) == "Column" || is.null(value))
794794
cols <- columns(x)
795795
if (name %in% cols) {
796+
if (is.null(value)) {
797+
cols <- Filter(function(c) { c != name }, cols)
798+
}
796799
cols <- lapply(cols, function(c) {
797800
if (c == name) {
798801
alias(value, name)
@@ -802,6 +805,9 @@ setMethod("$<-", signature(x = "DataFrame"),
802805
})
803806
nx <- select(x, cols)
804807
} else {
808+
if (is.null(value)) {
809+
return(x)
810+
}
805811
nx <- withColumn(x, name, value)
806812
}
807813
x@sdf <- nx@sdf
@@ -1141,29 +1147,31 @@ setMethod("intersect",
11411147
dataFrame(intersected)
11421148
})
11431149

1144-
#' Subtract
1150+
#' except
11451151
#'
11461152
#' Return a new DataFrame containing rows in this DataFrame
11471153
#' but not in another DataFrame. This is equivalent to `EXCEPT` in SQL.
11481154
#'
11491155
#' @param x A Spark DataFrame
11501156
#' @param y A Spark DataFrame
1151-
#' @return A DataFrame containing the result of the subtract operation.
1152-
#' @rdname subtract
1157+
#' @return A DataFrame containing the result of the except operation.
1158+
#' @rdname except
11531159
#' @export
11541160
#' @examples
11551161
#'\dontrun{
11561162
#' sc <- sparkR.init()
11571163
#' sqlCtx <- sparkRSQL.init(sc)
11581164
#' df1 <- jsonFile(sqlCtx, path)
11591165
#' df2 <- jsonFile(sqlCtx, path2)
1160-
#' subtractDF <- subtract(df, df2)
1166+
#' exceptDF <- except(df, df2)
11611167
#' }
1162-
setMethod("subtract",
1168+
#' @rdname except
1169+
#' @export
1170+
setMethod("except",
11631171
signature(x = "DataFrame", y = "DataFrame"),
11641172
function(x, y) {
1165-
subtracted <- callJMethod(x@sdf, "except", y@sdf)
1166-
dataFrame(subtracted)
1173+
excepted <- callJMethod(x@sdf, "except", y@sdf)
1174+
dataFrame(excepted)
11671175
})
11681176

11691177
#' Save the contents of the DataFrame to a data source

0 commit comments

Comments
 (0)