Skip to content

Commit 3cc9321

Browse files
author
Marcelo Vanzin
committed
Merge branch 'master' into SPARK-10004
2 parents fadff27 + c1840a8 commit 3cc9321

File tree

164 files changed

+3443
-861
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

164 files changed

+3443
-861
lines changed

R/pkg/NAMESPACE

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,44 +87,86 @@ exportMethods("abs",
8787
"alias",
8888
"approxCountDistinct",
8989
"asc",
90+
"ascii",
9091
"asin",
9192
"atan",
9293
"atan2",
9394
"avg",
95+
"base64",
9496
"between",
97+
"bin",
98+
"bitwiseNOT",
9599
"cast",
96100
"cbrt",
101+
"ceil",
97102
"ceiling",
103+
"concat",
98104
"contains",
99105
"cos",
100106
"cosh",
107+
"count",
101108
"countDistinct",
109+
"crc32",
110+
"datediff",
111+
"dayofmonth",
112+
"dayofyear",
102113
"desc",
103114
"endsWith",
104115
"exp",
116+
"explode",
105117
"expm1",
118+
"factorial",
119+
"first",
106120
"floor",
107121
"getField",
108122
"getItem",
123+
"greatest",
124+
"hex",
125+
"hour",
109126
"hypot",
127+
"initcap",
128+
"isNaN",
110129
"isNotNull",
111130
"isNull",
112131
"last",
132+
"last_day",
133+
"least",
134+
"length",
135+
"levenshtein",
113136
"like",
137+
"lit",
114138
"log",
115139
"log10",
116140
"log1p",
141+
"log2",
117142
"lower",
143+
"ltrim",
118144
"max",
145+
"md5",
119146
"mean",
120147
"min",
148+
"minute",
149+
"month",
150+
"months_between",
121151
"n",
122152
"n_distinct",
153+
"nanvl",
154+
"negate",
155+
"pmod",
156+
"quarter",
157+
"reverse",
123158
"rint",
124159
"rlike",
160+
"round",
161+
"rtrim",
162+
"second",
163+
"sha1",
125164
"sign",
165+
"signum",
126166
"sin",
127167
"sinh",
168+
"size",
169+
"soundex",
128170
"sqrt",
129171
"startsWith",
130172
"substr",
@@ -134,7 +176,13 @@ exportMethods("abs",
134176
"tanh",
135177
"toDegrees",
136178
"toRadians",
137-
"upper")
179+
"to_date",
180+
"trim",
181+
"unbase64",
182+
"unhex",
183+
"upper",
184+
"weekofyear",
185+
"year")
138186

139187
exportClasses("GroupedData")
140188
exportMethods("agg")

R/pkg/R/deserialize.R

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,14 @@ readRow <- function(inputCon) {
176176

177177
# Take a single column as Array[Byte] and deserialize it into an atomic vector
178178
readCol <- function(inputCon, numRows) {
179-
# sapply can not work with POSIXlt
180-
do.call(c, lapply(1:numRows, function(x) {
181-
value <- readObject(inputCon)
182-
# Replace NULL with NA so we can coerce to vectors
183-
if (is.null(value)) NA else value
184-
}))
179+
if (numRows > 0) {
180+
# sapply can not work with POSIXlt
181+
do.call(c, lapply(1:numRows, function(x) {
182+
value <- readObject(inputCon)
183+
# Replace NULL with NA so we can coerce to vectors
184+
if (is.null(value)) NA else value
185+
}))
186+
} else {
187+
vector()
188+
}
185189
}

R/pkg/R/functions.R

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,14 @@ createFunctions <- function() {
6767

6868
createFunctions()
6969

70+
#' @rdname functions
71+
#' @return Creates a Column class of literal value.
72+
setMethod("lit", signature("ANY"),
73+
function(x) {
74+
jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x))
75+
column(jc)
76+
})
77+
7078
#' Approx Count Distinct
7179
#'
7280
#' @rdname functions
@@ -93,6 +101,40 @@ setMethod("countDistinct",
93101
column(jc)
94102
})
95103

104+
#' @rdname functions
105+
#' @return Concatenates multiple input string columns together into a single string column.
106+
setMethod("concat",
107+
signature(x = "Column"),
108+
function(x, ...) {
109+
jcols <- lapply(list(x, ...), function(x) { x@jc })
110+
jc <- callJStatic("org.apache.spark.sql.functions", "concat", listToSeq(jcols))
111+
column(jc)
112+
})
113+
114+
#' @rdname functions
115+
#' @return Returns the greatest value of the list of column names, skipping null values.
116+
#' This function takes at least 2 parameters. It will return null if all parameters are null.
117+
setMethod("greatest",
118+
signature(x = "Column"),
119+
function(x, ...) {
120+
stopifnot(length(list(...)) > 0)
121+
jcols <- lapply(list(x, ...), function(x) { x@jc })
122+
jc <- callJStatic("org.apache.spark.sql.functions", "greatest", listToSeq(jcols))
123+
column(jc)
124+
})
125+
126+
#' @rdname functions
127+
#' @return Returns the least value of the list of column names, skipping null values.
128+
#' This function takes at least 2 parameters. It will return null iff all parameters are null.
129+
setMethod("least",
130+
signature(x = "Column"),
131+
function(x, ...) {
132+
stopifnot(length(list(...)) > 0)
133+
jcols <- lapply(list(x, ...), function(x) { x@jc })
134+
jc <- callJStatic("org.apache.spark.sql.functions", "least", listToSeq(jcols))
135+
column(jc)
136+
})
137+
96138
#' @rdname functions
97139
#' @aliases ceil
98140
setMethod("ceiling",

R/pkg/R/generics.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,10 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") })
682682
#' @export
683683
setGeneric("ceil", function(x) { standardGeneric("ceil") })
684684

685+
#' @rdname functions
686+
#' @export
687+
setGeneric("concat", function(x, ...) { standardGeneric("concat") })
688+
685689
#' @rdname functions
686690
#' @export
687691
setGeneric("crc32", function(x) { standardGeneric("crc32") })
@@ -702,6 +706,10 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
702706
#' @export
703707
setGeneric("explode", function(x) { standardGeneric("explode") })
704708

709+
#' @rdname functions
710+
#' @export
711+
setGeneric("greatest", function(x, ...) { standardGeneric("greatest") })
712+
705713
#' @rdname functions
706714
#' @export
707715
setGeneric("hex", function(x) { standardGeneric("hex") })
@@ -722,10 +730,18 @@ setGeneric("isNaN", function(x) { standardGeneric("isNaN") })
722730
#' @export
723731
setGeneric("last_day", function(x) { standardGeneric("last_day") })
724732

733+
#' @rdname functions
734+
#' @export
735+
setGeneric("least", function(x, ...) { standardGeneric("least") })
736+
725737
#' @rdname functions
726738
#' @export
727739
setGeneric("levenshtein", function(y, x) { standardGeneric("levenshtein") })
728740

741+
#' @rdname functions
742+
#' @export
743+
setGeneric("lit", function(x) { standardGeneric("lit") })
744+
729745
#' @rdname functions
730746
#' @export
731747
setGeneric("lower", function(x) { standardGeneric("lower") })

R/pkg/inst/tests/test_sparkSQL.R

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", {
408408
expect_equal(names(rdf)[1], "age")
409409
expect_equal(nrow(rdf), 3)
410410
expect_equal(ncol(rdf), 2)
411+
412+
# collect() returns data correctly from a DataFrame with 0 row
413+
df0 <- limit(df, 0)
414+
rdf <- collect(df0)
415+
expect_true(is.data.frame(rdf))
416+
expect_equal(names(rdf)[1], "age")
417+
expect_equal(nrow(rdf), 0)
418+
expect_equal(ncol(rdf), 2)
411419
})
412420

413421
test_that("limit() returns DataFrame with the correct number of rows", {
@@ -492,6 +500,18 @@ test_that("head() and first() return the correct data", {
492500

493501
testFirst <- first(df)
494502
expect_equal(nrow(testFirst), 1)
503+
504+
# head() and first() return the correct data on
505+
# a DataFrame with 0 row
506+
df0 <- limit(df, 0)
507+
508+
testHead <- head(df0)
509+
expect_equal(nrow(testHead), 0)
510+
expect_equal(ncol(testHead), 2)
511+
512+
testFirst <- first(df0)
513+
expect_equal(nrow(testFirst), 0)
514+
expect_equal(ncol(testFirst), 2)
495515
})
496516

497517
test_that("distinct() and unique on DataFrames", {
@@ -560,6 +580,11 @@ test_that("select with column", {
560580
df2 <- select(df, df$age)
561581
expect_equal(columns(df2), c("age"))
562582
expect_equal(count(df2), 3)
583+
584+
df3 <- select(df, lit("x"))
585+
expect_equal(columns(df3), c("x"))
586+
expect_equal(count(df3), 3)
587+
expect_equal(collect(select(df3, "x"))[[1, 1]], "x")
563588
})
564589

565590
test_that("selectExpr() on a DataFrame", {
@@ -692,6 +717,14 @@ test_that("string operators", {
692717
expect_equal(count(where(df, startsWith(df$name, "A"))), 1)
693718
expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi")
694719
expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30")
720+
expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], "Andy:30")
721+
})
722+
723+
test_that("greatest() and least() on a DataFrame", {
724+
l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
725+
df <- createDataFrame(sqlContext, l)
726+
expect_equal(collect(select(df, greatest(df$a, df$b)))[, 1], c(2, 4))
727+
expect_equal(collect(select(df, least(df$a, df$b)))[, 1], c(1, 3))
695728
})
696729

697730
test_that("group by", {

conf/log4j.properties.template

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ log4j.logger.org.spark-project.jetty=WARN
1010
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
1111
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
1212
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
13+
log4j.logger.org.apache.parquet=ERROR
14+
log4j.logger.parquet=ERROR
1315

1416
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
1517
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL

core/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@
266266
<dependency>
267267
<groupId>org.tachyonproject</groupId>
268268
<artifactId>tachyon-client</artifactId>
269-
<version>0.7.0</version>
269+
<version>0.7.1</version>
270270
<exclusions>
271271
<exclusion>
272272
<groupId>org.apache.hadoop</groupId>

core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ public final class BytesToBytesMap {
9292

9393
/**
9494
* The maximum number of keys that BytesToBytesMap supports. The hash table has to be
95-
* power-of-2-sized and its backing Java array can contain at most (1 << 30) elements, since
96-
* that's the largest power-of-2 that's less than Integer.MAX_VALUE. We need two long array
97-
* entries per key, giving us a maximum capacity of (1 << 29).
95+
* power-of-2-sized and its backing Java array can contain at most (1 &lt;&lt; 30) elements,
96+
* since that's the largest power-of-2 that's less than Integer.MAX_VALUE. We need two long array
97+
* entries per key, giving us a maximum capacity of (1 &lt;&lt; 29).
9898
*/
9999
@VisibleForTesting
100100
static final int MAX_CAPACITY = (1 << 29);

core/src/main/scala/org/apache/spark/SparkEnv.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@ object SparkEnv extends Logging {
331331
case "netty" =>
332332
new NettyBlockTransferService(conf, securityManager, numUsableCores)
333333
case "nio" =>
334+
logWarning("NIO-based block transfer service is deprecated, " +
335+
"and will be removed in Spark 1.6.0.")
334336
new NioBlockTransferService(conf, securityManager)
335337
}
336338

core/src/main/scala/org/apache/spark/SparkException.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,10 @@ class SparkException(message: String, cause: Throwable)
3030
*/
3131
private[spark] class SparkDriverExecutionException(cause: Throwable)
3232
extends SparkException("Execution error", cause)
33+
34+
/**
35+
* Exception thrown when the main user code is run as a child process (e.g. pyspark) and we want
36+
* the parent SparkSubmit process to exit with the same exit code.
37+
*/
38+
private[spark] case class SparkUserAppException(exitCode: Int)
39+
extends SparkException(s"User application exited with $exitCode")

0 commit comments

Comments
 (0)