17
17
18
18
package org .apache .spark .sql .parquet
19
19
20
- import _root_ .parquet .filter2 .predicate .{FilterPredicate , Operators }
21
20
import org .apache .hadoop .fs .{FileSystem , Path }
22
21
import org .apache .hadoop .mapreduce .Job
23
22
import org .scalatest .{BeforeAndAfterAll , FunSuiteLike }
23
+ import parquet .filter2 .predicate .{FilterPredicate , Operators }
24
24
import parquet .hadoop .ParquetFileWriter
25
25
import parquet .hadoop .util .ContextUtil
26
+ import parquet .io .api .Binary
26
27
27
28
import org .apache .spark .sql ._
28
29
import org .apache .spark .sql .catalyst .expressions ._
@@ -85,6 +86,7 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
85
86
TestData // Load test data tables.
86
87
87
88
var testRDD : SchemaRDD = null
89
+ var originalParquetFilterPushdownEnabled = TestSQLContext .parquetFilterPushDown
88
90
89
91
override def beforeAll () {
90
92
ParquetTestData .writeFile()
@@ -109,13 +111,17 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
109
111
Utils .deleteRecursively(ParquetTestData .testNestedDir3)
110
112
Utils .deleteRecursively(ParquetTestData .testNestedDir4)
111
113
// here we should also unregister the table??
114
+
115
+ setConf(SQLConf .PARQUET_FILTER_PUSHDOWN_ENABLED , originalParquetFilterPushdownEnabled.toString)
112
116
}
113
117
114
118
test(" Read/Write All Types" ) {
115
119
val tempDir = getTempFilePath(" parquetTest" ).getCanonicalPath
116
120
val range = (0 to 255 )
117
- val data = sparkContext.parallelize(range)
118
- .map(x => AllDataTypes (s " $x" , x, x.toLong, x.toFloat, x.toDouble, x.toShort, x.toByte, x % 2 == 0 ))
121
+ val data = sparkContext.parallelize(range).map { x =>
122
+ parquet.AllDataTypes (
123
+ s " $x" , x, x.toLong, x.toFloat, x.toDouble, x.toShort, x.toByte, x % 2 == 0 )
124
+ }
119
125
120
126
data.saveAsParquetFile(tempDir)
121
127
@@ -260,14 +266,15 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
260
266
test(" Read/Write All Types with non-primitive type" ) {
261
267
val tempDir = getTempFilePath(" parquetTest" ).getCanonicalPath
262
268
val range = (0 to 255 )
263
- val data = sparkContext.parallelize(range)
264
- .map(x => AllDataTypesWithNonPrimitiveType (
269
+ val data = sparkContext.parallelize(range).map { x =>
270
+ parquet. AllDataTypesWithNonPrimitiveType (
265
271
s " $x" , x, x.toLong, x.toFloat, x.toDouble, x.toShort, x.toByte, x % 2 == 0 ,
266
272
(0 until x),
267
273
(0 until x).map(Option (_).filter(_ % 3 == 0 )),
268
274
(0 until x).map(i => i -> i.toLong).toMap,
269
275
(0 until x).map(i => i -> Option (i.toLong)).toMap + (x -> None ),
270
- Data ((0 until x), Nested (x, s " $x" ))))
276
+ parquet.Data ((0 until x), parquet.Nested (x, s " $x" )))
277
+ }
271
278
data.saveAsParquetFile(tempDir)
272
279
273
280
checkAnswer(
@@ -420,7 +427,7 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
420
427
}
421
428
422
429
test(" save and load case class RDD with nulls as parquet" ) {
423
- val data = NullReflectData (null , null , null , null , null )
430
+ val data = parquet. NullReflectData (null , null , null , null , null )
424
431
val rdd = sparkContext.parallelize(data :: Nil )
425
432
426
433
val file = getTempFilePath(" parquet" )
@@ -435,7 +442,7 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
435
442
}
436
443
437
444
test(" save and load case class RDD with Nones as parquet" ) {
438
- val data = OptionalReflectData (None , None , None , None , None )
445
+ val data = parquet. OptionalReflectData (None , None , None , None , None )
439
446
val rdd = sparkContext.parallelize(data :: Nil )
440
447
441
448
val file = getTempFilePath(" parquet" )
@@ -938,4 +945,104 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
938
945
checkAnswer(parquetFile(tempDir), data.toSchemaRDD.collect().toSeq)
939
946
}
940
947
}
948
+
949
+ def checkFilter (predicate : Predicate , filterClass : Class [_ <: FilterPredicate ]): Unit = {
950
+ val filter = ParquetFilters .createFilter(predicate)
951
+ assert(filter.isDefined)
952
+ assert(filter.get.getClass == filterClass)
953
+ }
954
+
955
+ test(" Pushdown IsNull predicate" ) {
956
+ checkFilter(' a .int.isNull, classOf [Operators .Eq [Integer ]])
957
+ checkFilter(' a .long.isNull, classOf [Operators .Eq [java.lang.Long ]])
958
+ checkFilter(' a .float.isNull, classOf [Operators .Eq [java.lang.Float ]])
959
+ checkFilter(' a .double.isNull, classOf [Operators .Eq [java.lang.Double ]])
960
+ checkFilter(' a .string.isNull, classOf [Operators .Eq [Binary ]])
961
+ checkFilter(' a .binary.isNull, classOf [Operators .Eq [Binary ]])
962
+ }
963
+
964
+ test(" Pushdown IsNotNull predicate" ) {
965
+ checkFilter(' a .int.isNotNull, classOf [Operators .NotEq [Integer ]])
966
+ checkFilter(' a .long.isNotNull, classOf [Operators .NotEq [java.lang.Long ]])
967
+ checkFilter(' a .float.isNotNull, classOf [Operators .NotEq [java.lang.Float ]])
968
+ checkFilter(' a .double.isNotNull, classOf [Operators .NotEq [java.lang.Double ]])
969
+ checkFilter(' a .string.isNotNull, classOf [Operators .NotEq [Binary ]])
970
+ checkFilter(' a .binary.isNotNull, classOf [Operators .NotEq [Binary ]])
971
+ }
972
+
973
+ test(" Pushdown EqualTo predicate" ) {
974
+ checkFilter(' a .int === 0 , classOf [Operators .Eq [Integer ]])
975
+ checkFilter(' a .long === 0 .toLong, classOf [Operators .Eq [java.lang.Long ]])
976
+ checkFilter(' a .float === 0 .toFloat, classOf [Operators .Eq [java.lang.Float ]])
977
+ checkFilter(' a .double === 0 .toDouble, classOf [Operators .Eq [java.lang.Double ]])
978
+ checkFilter(' a .string === " foo" , classOf [Operators .Eq [Binary ]])
979
+ checkFilter(' a .binary === " foo" .getBytes, classOf [Operators .Eq [Binary ]])
980
+ }
981
+
982
+ test(" Pushdown Not(EqualTo) predicate" ) {
983
+ checkFilter(! (' a .int === 0 ), classOf [Operators .NotEq [Integer ]])
984
+ checkFilter(! (' a .long === 0 .toLong), classOf [Operators .NotEq [java.lang.Long ]])
985
+ checkFilter(! (' a .float === 0 .toFloat), classOf [Operators .NotEq [java.lang.Float ]])
986
+ checkFilter(! (' a .double === 0 .toDouble), classOf [Operators .NotEq [java.lang.Double ]])
987
+ checkFilter(! (' a .string === " foo" ), classOf [Operators .NotEq [Binary ]])
988
+ checkFilter(! (' a .binary === " foo" .getBytes), classOf [Operators .NotEq [Binary ]])
989
+ }
990
+
991
+ test(" Pushdown LessThan predicate" ) {
992
+ checkFilter(' a .int < 0 , classOf [Operators .Lt [Integer ]])
993
+ checkFilter(' a .long < 0 .toLong, classOf [Operators .Lt [java.lang.Long ]])
994
+ checkFilter(' a .float < 0 .toFloat, classOf [Operators .Lt [java.lang.Float ]])
995
+ checkFilter(' a .double < 0 .toDouble, classOf [Operators .Lt [java.lang.Double ]])
996
+ checkFilter(' a .string < " foo" , classOf [Operators .Lt [Binary ]])
997
+ checkFilter(' a .binary < " foo" .getBytes, classOf [Operators .Lt [Binary ]])
998
+ }
999
+
1000
+ test(" Pushdown LessThanOrEqual predicate" ) {
1001
+ checkFilter(' a .int <= 0 , classOf [Operators .LtEq [Integer ]])
1002
+ checkFilter(' a .long <= 0 .toLong, classOf [Operators .LtEq [java.lang.Long ]])
1003
+ checkFilter(' a .float <= 0 .toFloat, classOf [Operators .LtEq [java.lang.Float ]])
1004
+ checkFilter(' a .double <= 0 .toDouble, classOf [Operators .LtEq [java.lang.Double ]])
1005
+ checkFilter(' a .string <= " foo" , classOf [Operators .LtEq [Binary ]])
1006
+ checkFilter(' a .binary <= " foo" .getBytes, classOf [Operators .LtEq [Binary ]])
1007
+ }
1008
+
1009
+ test(" Pushdown GreaterThan predicate" ) {
1010
+ checkFilter(' a .int > 0 , classOf [Operators .Gt [Integer ]])
1011
+ checkFilter(' a .long > 0 .toLong, classOf [Operators .Gt [java.lang.Long ]])
1012
+ checkFilter(' a .float > 0 .toFloat, classOf [Operators .Gt [java.lang.Float ]])
1013
+ checkFilter(' a .double > 0 .toDouble, classOf [Operators .Gt [java.lang.Double ]])
1014
+ checkFilter(' a .string > " foo" , classOf [Operators .Gt [Binary ]])
1015
+ checkFilter(' a .binary > " foo" .getBytes, classOf [Operators .Gt [Binary ]])
1016
+ }
1017
+
1018
+ test(" Pushdown GreaterThanOrEqual predicate" ) {
1019
+ checkFilter(' a .int >= 0 , classOf [Operators .GtEq [Integer ]])
1020
+ checkFilter(' a .long >= 0 .toLong, classOf [Operators .GtEq [java.lang.Long ]])
1021
+ checkFilter(' a .float >= 0 .toFloat, classOf [Operators .GtEq [java.lang.Float ]])
1022
+ checkFilter(' a .double >= 0 .toDouble, classOf [Operators .GtEq [java.lang.Double ]])
1023
+ checkFilter(' a .string >= " foo" , classOf [Operators .GtEq [Binary ]])
1024
+ checkFilter(' a .binary >= " foo" .getBytes, classOf [Operators .GtEq [Binary ]])
1025
+ }
1026
+
1027
+ test(" Comparison with null should not be pushed down" ) {
1028
+ val predicates = Seq (
1029
+ ' a .int === null ,
1030
+ ! (' a .int === null ),
1031
+
1032
+ Literal (null ) === ' a .int,
1033
+ ! (Literal (null ) === ' a .int),
1034
+
1035
+ ' a .int < null ,
1036
+ ' a .int <= null ,
1037
+ ' a .int > null ,
1038
+ ' a .int >= null ,
1039
+
1040
+ Literal (null ) < ' a .int,
1041
+ Literal (null ) <= ' a .int,
1042
+ Literal (null ) > ' a .int,
1043
+ Literal (null ) >= ' a .int
1044
+ )
1045
+
1046
+ predicates.foreach(p => assert(ParquetFilters .createFilter(p).isEmpty))
1047
+ }
941
1048
}
0 commit comments