Skip to content

Commit b7b9f77

Browse files
yanboliangshivaram
authored andcommitted
[SPARK-12198][SPARKR] SparkR support read.parquet and deprecate parquetFile
SparkR support ```read.parquet``` and deprecate ```parquetFile```. This change is similar with #10145 for ```jsonFile```. Author: Yanbo Liang <[email protected]> Closes #10191 from yanboliang/spark-12198. (cherry picked from commit eeb5872) Signed-off-by: Shivaram Venkataraman <[email protected]>
1 parent f939c71 commit b7b9f77

File tree

3 files changed

+22
-6
lines changed

3 files changed

+22
-6
lines changed

R/pkg/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ export("as.DataFrame",
270270
"loadDF",
271271
"parquetFile",
272272
"read.df",
273+
"read.parquet",
273274
"sql",
274275
"table",
275276
"tableNames",

R/pkg/R/SQLContext.R

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,18 +256,30 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
256256
}
257257
}
258258

259-
260259
#' Create a DataFrame from a Parquet file.
261260
#'
262261
#' Loads a Parquet file, returning the result as a DataFrame.
263262
#'
264263
#' @param sqlContext SQLContext to use
265-
#' @param ... Path(s) of parquet file(s) to read.
264+
#' @param path Path of file to read. A vector of multiple paths is allowed.
266265
#' @return DataFrame
266+
#' @rdname read.parquet
267+
#' @name read.parquet
267268
#' @export
269+
read.parquet <- function(sqlContext, path) {
270+
# Allow the user to have a more flexible definiton of the text file path
271+
paths <- as.list(suppressWarnings(normalizePath(path)))
272+
read <- callJMethod(sqlContext, "read")
273+
sdf <- callJMethod(read, "parquet", paths)
274+
dataFrame(sdf)
275+
}
268276

277+
#' @rdname read.parquet
278+
#' @name parquetFile
279+
#' @export
269280
# TODO: Implement saveasParquetFile and write examples for both
270281
parquetFile <- function(sqlContext, ...) {
282+
.Deprecated("read.parquet")
271283
# Allow the user to have a more flexible definiton of the text file path
272284
paths <- lapply(list(...), function(x) suppressWarnings(normalizePath(x)))
273285
sdf <- callJMethod(sqlContext, "parquetFile", paths)

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,22 +1420,25 @@ test_that("mutate(), transform(), rename() and names()", {
14201420
detach(airquality)
14211421
})
14221422

1423-
test_that("write.df() on DataFrame and works with parquetFile", {
1423+
test_that("write.df() on DataFrame and works with read.parquet", {
14241424
df <- jsonFile(sqlContext, jsonPath)
14251425
write.df(df, parquetPath, "parquet", mode="overwrite")
1426-
parquetDF <- parquetFile(sqlContext, parquetPath)
1426+
parquetDF <- read.parquet(sqlContext, parquetPath)
14271427
expect_is(parquetDF, "DataFrame")
14281428
expect_equal(count(df), count(parquetDF))
14291429
})
14301430

1431-
test_that("parquetFile works with multiple input paths", {
1431+
test_that("read.parquet()/parquetFile() works with multiple input paths", {
14321432
df <- jsonFile(sqlContext, jsonPath)
14331433
write.df(df, parquetPath, "parquet", mode="overwrite")
14341434
parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
14351435
write.df(df, parquetPath2, "parquet", mode="overwrite")
1436-
parquetDF <- parquetFile(sqlContext, parquetPath, parquetPath2)
1436+
parquetDF <- read.parquet(sqlContext, c(parquetPath, parquetPath2))
14371437
expect_is(parquetDF, "DataFrame")
14381438
expect_equal(count(parquetDF), count(df) * 2)
1439+
parquetDF2 <- suppressWarnings(parquetFile(sqlContext, parquetPath, parquetPath2))
1440+
expect_is(parquetDF2, "DataFrame")
1441+
expect_equal(count(parquetDF2), count(df) * 2)
14391442

14401443
# Test if varargs works with variables
14411444
saveMode <- "overwrite"

0 commit comments

Comments
 (0)