Skip to content

Commit 6f22d26

Browse files
authored
Merge pull request apache#176 from palantir/rk/merge-upstream
2 parents 1e7eac5 + f53fdff commit 6f22d26

File tree

1,109 files changed

+41438
-12430
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,109 files changed

+41438
-12430
lines changed

LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,4 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
297297
(MIT License) RowsGroup (http://datatables.net/license/mit)
298298
(MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
299299
(MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE)
300+
(MIT License) machinist (https://github.com/typelevel/machinist)

R/check-cran.sh

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,18 @@
2020
set -o pipefail
2121
set -e
2222

23-
FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
24-
pushd $FWDIR > /dev/null
23+
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
24+
pushd "$FWDIR" > /dev/null
2525

26-
. $FWDIR/find-r.sh
26+
. "$FWDIR/find-r.sh"
2727

2828
# Install the package (this is required for code in vignettes to run when building it later)
2929
# Build the latest docs, but not vignettes, which is built with the package next
30-
. $FWDIR/install-dev.sh
30+
. "$FWDIR/install-dev.sh"
3131

3232
# Build source package with vignettes
3333
SPARK_HOME="$(cd "${FWDIR}"/..; pwd)"
34-
. "${SPARK_HOME}"/bin/load-spark-env.sh
34+
. "${SPARK_HOME}/bin/load-spark-env.sh"
3535
if [ -f "${SPARK_HOME}/RELEASE" ]; then
3636
SPARK_JARS_DIR="${SPARK_HOME}/jars"
3737
else
@@ -40,16 +40,16 @@ fi
4040

4141
if [ -d "$SPARK_JARS_DIR" ]; then
4242
# Build a zip file containing the source package with vignettes
43-
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg
43+
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/R" CMD build "$FWDIR/pkg"
4444

4545
find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not -name '*.pdf' -not -name '*.html' -delete
4646
else
47-
echo "Error Spark JARs not found in $SPARK_HOME"
47+
echo "Error Spark JARs not found in '$SPARK_HOME'"
4848
exit 1
4949
fi
5050

5151
# Run check as-cran.
52-
VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
52+
VERSION=`grep Version "$FWDIR/pkg/DESCRIPTION" | awk '{print $NF}'`
5353

5454
CRAN_CHECK_OPTIONS="--as-cran"
5555

@@ -67,10 +67,10 @@ echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"
6767

6868
if [ -n "$NO_TESTS" ] && [ -n "$NO_MANUAL" ]
6969
then
70-
"$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
70+
"$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS "SparkR_$VERSION.tar.gz"
7171
else
7272
# This will run tests and/or build vignettes, and require SPARK_HOME
73-
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
73+
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS "SparkR_$VERSION.tar.gz"
7474
fi
7575

7676
popd > /dev/null

R/create-docs.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,23 @@ export FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
3333
export SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/..; pwd)"
3434

3535
# Required for setting SPARK_SCALA_VERSION
36-
. "${SPARK_HOME}"/bin/load-spark-env.sh
36+
. "${SPARK_HOME}/bin/load-spark-env.sh"
3737

3838
echo "Using Scala $SPARK_SCALA_VERSION"
3939

40-
pushd $FWDIR > /dev/null
41-
. $FWDIR/find-r.sh
40+
pushd "$FWDIR" > /dev/null
41+
. "$FWDIR/find-r.sh"
4242

4343
# Install the package (this will also generate the Rd files)
44-
. $FWDIR/install-dev.sh
44+
. "$FWDIR/install-dev.sh"
4545

4646
# Now create HTML files
4747

4848
# knit_rd puts html in current working directory
4949
mkdir -p pkg/html
5050
pushd pkg/html
5151

52-
"$R_SCRIPT_PATH/"Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
52+
"$R_SCRIPT_PATH/Rscript" -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
5353

5454
popd
5555

R/create-rd.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@
2929
set -o pipefail
3030
set -e
3131

32-
FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
33-
pushd $FWDIR > /dev/null
34-
. $FWDIR/find-r.sh
32+
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
33+
pushd "$FWDIR" > /dev/null
34+
. "$FWDIR/find-r.sh"
3535

3636
# Generate Rd files if devtools is installed
37-
"$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
37+
"$R_SCRIPT_PATH/Rscript" -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'

R/install-dev.sh

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,21 @@
2929
set -o pipefail
3030
set -e
3131

32-
FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
32+
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
3333
LIB_DIR="$FWDIR/lib"
3434

35-
mkdir -p $LIB_DIR
35+
mkdir -p "$LIB_DIR"
3636

37-
pushd $FWDIR > /dev/null
38-
. $FWDIR/find-r.sh
37+
pushd "$FWDIR" > /dev/null
38+
. "$FWDIR/find-r.sh"
3939

40-
. $FWDIR/create-rd.sh
40+
. "$FWDIR/create-rd.sh"
4141

4242
# Install SparkR to $LIB_DIR
43-
"$R_SCRIPT_PATH/"R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
43+
"$R_SCRIPT_PATH/R" CMD INSTALL --library="$LIB_DIR" "$FWDIR/pkg/"
4444

4545
# Zip the SparkR package so that it can be distributed to worker nodes on YARN
46-
cd $LIB_DIR
46+
cd "$LIB_DIR"
4747
jar cfM "$LIB_DIR/sparkr.zip" SparkR
4848

4949
popd > /dev/null

R/install-source-package.sh

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,28 +29,28 @@
2929
set -o pipefail
3030
set -e
3131

32-
FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
33-
pushd $FWDIR > /dev/null
34-
. $FWDIR/find-r.sh
32+
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
33+
pushd "$FWDIR" > /dev/null
34+
. "$FWDIR/find-r.sh"
3535

3636
if [ -z "$VERSION" ]; then
37-
VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
37+
VERSION=`grep Version "$FWDIR/pkg/DESCRIPTION" | awk '{print $NF}'`
3838
fi
3939

40-
if [ ! -f "$FWDIR"/SparkR_"$VERSION".tar.gz ]; then
41-
echo -e "R source package file $FWDIR/SparkR_$VERSION.tar.gz is not found."
40+
if [ ! -f "$FWDIR/SparkR_$VERSION.tar.gz" ]; then
41+
echo -e "R source package file '$FWDIR/SparkR_$VERSION.tar.gz' is not found."
4242
echo -e "Please build R source package with check-cran.sh"
4343
exit -1;
4444
fi
4545

4646
echo "Removing lib path and installing from source package"
4747
LIB_DIR="$FWDIR/lib"
48-
rm -rf $LIB_DIR
49-
mkdir -p $LIB_DIR
50-
"$R_SCRIPT_PATH/"R CMD INSTALL SparkR_"$VERSION".tar.gz --library=$LIB_DIR
48+
rm -rf "$LIB_DIR"
49+
mkdir -p "$LIB_DIR"
50+
"$R_SCRIPT_PATH/R" CMD INSTALL "SparkR_$VERSION.tar.gz" --library="$LIB_DIR"
5151

5252
# Zip the SparkR package so that it can be distributed to worker nodes on YARN
53-
pushd $LIB_DIR > /dev/null
53+
pushd "$LIB_DIR" > /dev/null
5454
jar cfM "$LIB_DIR/sparkr.zip" SparkR
5555
popd > /dev/null
5656

R/pkg/.lintr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
linters: with_defaults(line_length_linter(100), camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
1+
linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
22
exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")

R/pkg/DESCRIPTION

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Collate:
3535
'WindowSpec.R'
3636
'backend.R'
3737
'broadcast.R'
38+
'catalog.R'
3839
'client.R'
3940
'context.R'
4041
'deserialize.R'
@@ -43,6 +44,7 @@ Collate:
4344
'jvm.R'
4445
'mllib_classification.R'
4546
'mllib_clustering.R'
47+
'mllib_fpm.R'
4648
'mllib_recommendation.R'
4749
'mllib_regression.R'
4850
'mllib_stat.R'
@@ -51,6 +53,7 @@ Collate:
5153
'serialize.R'
5254
'sparkR.R'
5355
'stats.R'
56+
'streaming.R'
5457
'types.R'
5558
'utils.R'
5659
'window.R'

R/pkg/NAMESPACE

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,10 @@ exportMethods("glm",
6666
"spark.randomForest",
6767
"spark.gbt",
6868
"spark.bisectingKmeans",
69-
"spark.svmLinear")
69+
"spark.svmLinear",
70+
"spark.fpGrowth",
71+
"spark.freqItemsets",
72+
"spark.associationRules")
7073

7174
# Job group lifecycle management methods
7275
export("setJobGroup",
@@ -82,6 +85,7 @@ exportMethods("arrange",
8285
"as.data.frame",
8386
"attach",
8487
"cache",
88+
"checkpoint",
8589
"coalesce",
8690
"collect",
8791
"colnames",
@@ -97,6 +101,7 @@ exportMethods("arrange",
97101
"createOrReplaceTempView",
98102
"crossJoin",
99103
"crosstab",
104+
"cube",
100105
"dapply",
101106
"dapplyCollect",
102107
"describe",
@@ -118,9 +123,11 @@ exportMethods("arrange",
118123
"group_by",
119124
"groupBy",
120125
"head",
126+
"hint",
121127
"insertInto",
122128
"intersect",
123129
"isLocal",
130+
"isStreaming",
124131
"join",
125132
"limit",
126133
"merge",
@@ -138,6 +145,7 @@ exportMethods("arrange",
138145
"registerTempTable",
139146
"rename",
140147
"repartition",
148+
"rollup",
141149
"sample",
142150
"sample_frac",
143151
"sampleBy",
@@ -169,12 +177,14 @@ exportMethods("arrange",
169177
"write.json",
170178
"write.orc",
171179
"write.parquet",
180+
"write.stream",
172181
"write.text",
173182
"write.ml")
174183

175184
exportClasses("Column")
176185

177-
exportMethods("%in%",
186+
exportMethods("%<=>%",
187+
"%in%",
178188
"abs",
179189
"acos",
180190
"add_months",
@@ -197,6 +207,8 @@ exportMethods("%in%",
197207
"cbrt",
198208
"ceil",
199209
"ceiling",
210+
"collect_list",
211+
"collect_set",
200212
"column",
201213
"concat",
202214
"concat_ws",
@@ -207,6 +219,8 @@ exportMethods("%in%",
207219
"count",
208220
"countDistinct",
209221
"crc32",
222+
"create_array",
223+
"create_map",
210224
"hash",
211225
"cume_dist",
212226
"date_add",
@@ -222,6 +236,7 @@ exportMethods("%in%",
222236
"endsWith",
223237
"exp",
224238
"explode",
239+
"explode_outer",
225240
"expm1",
226241
"expr",
227242
"factorial",
@@ -235,12 +250,15 @@ exportMethods("%in%",
235250
"getField",
236251
"getItem",
237252
"greatest",
253+
"grouping_bit",
254+
"grouping_id",
238255
"hex",
239256
"histogram",
240257
"hour",
241258
"hypot",
242259
"ifelse",
243260
"initcap",
261+
"input_file_name",
244262
"instr",
245263
"isNaN",
246264
"isNotNull",
@@ -278,18 +296,21 @@ exportMethods("%in%",
278296
"nanvl",
279297
"negate",
280298
"next_day",
299+
"not",
281300
"ntile",
282301
"otherwise",
283302
"over",
284303
"percent_rank",
285304
"pmod",
286305
"posexplode",
306+
"posexplode_outer",
287307
"quarter",
288308
"rand",
289309
"randn",
290310
"rank",
291311
"regexp_extract",
292312
"regexp_replace",
313+
"repeat_string",
293314
"reverse",
294315
"rint",
295316
"rlike",
@@ -313,6 +334,7 @@ exportMethods("%in%",
313334
"sort_array",
314335
"soundex",
315336
"spark_partition_id",
337+
"split_string",
316338
"stddev",
317339
"stddev_pop",
318340
"stddev_samp",
@@ -355,17 +377,29 @@ export("as.DataFrame",
355377
"clearCache",
356378
"createDataFrame",
357379
"createExternalTable",
380+
"createTable",
381+
"currentDatabase",
358382
"dropTempTable",
359383
"dropTempView",
360384
"jsonFile",
385+
"listColumns",
386+
"listDatabases",
387+
"listFunctions",
388+
"listTables",
361389
"loadDF",
362390
"parquetFile",
363391
"read.df",
364392
"read.jdbc",
365393
"read.json",
366394
"read.orc",
367395
"read.parquet",
396+
"read.stream",
368397
"read.text",
398+
"recoverPartitions",
399+
"refreshByPath",
400+
"refreshTable",
401+
"setCheckpointDir",
402+
"setCurrentDatabase",
369403
"spark.lapply",
370404
"spark.addFile",
371405
"spark.getSparkFilesRootDirectory",
@@ -402,6 +436,16 @@ export("partitionBy",
402436
export("windowPartitionBy",
403437
"windowOrderBy")
404438

439+
exportClasses("StreamingQuery")
440+
441+
export("awaitTermination",
442+
"isActive",
443+
"lastProgress",
444+
"queryName",
445+
"status",
446+
"stopQuery")
447+
448+
405449
S3method(print, jobj)
406450
S3method(print, structField)
407451
S3method(print, structType)

0 commit comments

Comments
 (0)