@@ -28,11 +28,14 @@ import org.apache.spark.sql.{QueryTest, SQLConf, SchemaRDD}
28
28
/**
29
29
* A test suite that tests Parquet filter2 API based filter pushdown optimization.
30
30
*
31
- * Notice that `!(a cmp b)` are always transformed to its negated form `a cmp' b` by the
32
- * `BooleanSimplification` optimization rule whenever possible. As a result, predicate `!(a < 1)`
33
- * results a `GtEq` filter predicate rather than a `Not`.
31
+ * NOTE:
34
32
*
35
- * @todo Add test cases for `IsNull` and `IsNotNull` after merging PR #3367
33
+ * 1. `!(a cmp b)` is always transformed to its negated form `a cmp' b` by the
34
+ * `BooleanSimplification` optimization rule whenever possible. As a result, predicate `!(a < 1)`
35
+ * results in a `GtEq` filter predicate rather than a `Not`.
36
+ *
37
+ * 2. `Tuple1(Option(x))` is used together with `AnyVal` types like `Int` to ensure the inferred
38
+ * data type is nullable.
36
39
*/
37
40
class ParquetFilterSuite extends QueryTest with ParquetTest {
38
41
val sqlContext = TestSQLContext
@@ -85,14 +88,26 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
85
88
}
86
89
87
90
test(" filter pushdown - boolean" ) {
88
- withParquetRDD((true :: false :: Nil ).map(Tuple1 .apply)) { rdd =>
91
+ withParquetRDD((true :: false :: Nil ).map(b => Tuple1 .apply(Option (b)))) { rdd =>
92
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNull, classOf [Eq [java.lang.Boolean ]])(Seq .empty[Row ])
93
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNotNull, classOf [NotEq [java.lang.Boolean ]]) {
94
+ Seq (Row (true ), Row (false ))
95
+ }
96
+
89
97
checkFilterPushdown(rdd, ' _1 )(' _1 === true , classOf [Eq [java.lang.Boolean ]])(true )
90
- checkFilterPushdown(rdd, ' _1 )(' _1 !== true , classOf [Operators .NotEq [java.lang.Boolean ]])(false )
98
+ checkFilterPushdown(rdd, ' _1 )(' _1 !== true , classOf [Operators .NotEq [java.lang.Boolean ]]) {
99
+ false
100
+ }
91
101
}
92
102
}
93
103
94
104
test(" filter pushdown - integer" ) {
95
- withParquetRDD((1 to 4 ).map(Tuple1 .apply)) { rdd =>
105
+ withParquetRDD((1 to 4 ).map(i => Tuple1 (Option (i)))) { rdd =>
106
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNull, classOf [Eq [Integer ]])(Seq .empty[Row ])
107
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNotNull, classOf [NotEq [Integer ]]) {
108
+ (1 to 4 ).map(Row .apply(_))
109
+ }
110
+
96
111
checkFilterPushdown(rdd, ' _1 )(' _1 === 1 , classOf [Eq [Integer ]])(1 )
97
112
checkFilterPushdown(rdd, ' _1 )(' _1 !== 1 , classOf [Operators .NotEq [Integer ]]) {
98
113
(2 to 4 ).map(Row .apply(_))
@@ -118,7 +133,12 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
118
133
}
119
134
120
135
test(" filter pushdown - long" ) {
121
- withParquetRDD((1 to 4 ).map(i => Tuple1 (i.toLong))) { rdd =>
136
+ withParquetRDD((1 to 4 ).map(i => Tuple1 (Option (i.toLong)))) { rdd =>
137
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNull, classOf [Eq [java.lang.Long ]])(Seq .empty[Row ])
138
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNotNull, classOf [NotEq [java.lang.Long ]]) {
139
+ (1 to 4 ).map(Row .apply(_))
140
+ }
141
+
122
142
checkFilterPushdown(rdd, ' _1 )(' _1 === 1 , classOf [Eq [java.lang.Long ]])(1 )
123
143
checkFilterPushdown(rdd, ' _1 )(' _1 !== 1 , classOf [Operators .NotEq [java.lang.Long ]]) {
124
144
(2 to 4 ).map(Row .apply(_))
@@ -144,7 +164,12 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
144
164
}
145
165
146
166
test(" filter pushdown - float" ) {
147
- withParquetRDD((1 to 4 ).map(i => Tuple1 (i.toFloat))) { rdd =>
167
+ withParquetRDD((1 to 4 ).map(i => Tuple1 (Option (i.toFloat)))) { rdd =>
168
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNull, classOf [Eq [java.lang.Float ]])(Seq .empty[Row ])
169
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNotNull, classOf [NotEq [java.lang.Float ]]) {
170
+ (1 to 4 ).map(Row .apply(_))
171
+ }
172
+
148
173
checkFilterPushdown(rdd, ' _1 )(' _1 === 1 , classOf [Eq [java.lang.Float ]])(1 )
149
174
checkFilterPushdown(rdd, ' _1 )(' _1 !== 1 , classOf [Operators .NotEq [java.lang.Float ]]) {
150
175
(2 to 4 ).map(Row .apply(_))
@@ -170,7 +195,12 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
170
195
}
171
196
172
197
test(" filter pushdown - double" ) {
173
- withParquetRDD((1 to 4 ).map(i => Tuple1 (i.toDouble))) { rdd =>
198
+ withParquetRDD((1 to 4 ).map(i => Tuple1 (Option (i.toDouble)))) { rdd =>
199
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNull, classOf [Eq [java.lang.Double ]])(Seq .empty[Row ])
200
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNotNull, classOf [NotEq [java.lang.Double ]]) {
201
+ (1 to 4 ).map(Row .apply(_))
202
+ }
203
+
174
204
checkFilterPushdown(rdd, ' _1 )(' _1 === 1 , classOf [Eq [java.lang.Double ]])(1 )
175
205
checkFilterPushdown(rdd, ' _1 )(' _1 !== 1 , classOf [Operators .NotEq [java.lang.Double ]]) {
176
206
(2 to 4 ).map(Row .apply(_))
@@ -197,6 +227,11 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
197
227
198
228
test(" filter pushdown - string" ) {
199
229
withParquetRDD((1 to 4 ).map(i => Tuple1 (i.toString))) { rdd =>
230
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNull, classOf [Eq [java.lang.String ]])(Seq .empty[Row ])
231
+ checkFilterPushdown(rdd, ' _1 )(' _1 .isNotNull, classOf [NotEq [java.lang.String ]]) {
232
+ (1 to 4 ).map(i => Row .apply(i.toString))
233
+ }
234
+
200
235
checkFilterPushdown(rdd, ' _1 )(' _1 === " 1" , classOf [Eq [String ]])(" 1" )
201
236
checkFilterPushdown(rdd, ' _1 )(' _1 !== " 1" , classOf [Operators .NotEq [String ]]) {
202
237
(2 to 4 ).map(i => Row .apply(i.toString))
@@ -227,6 +262,11 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
227
262
}
228
263
229
264
withParquetRDD((1 to 4 ).map(i => Tuple1 (i.b))) { rdd =>
265
+ checkBinaryFilterPushdown(rdd, ' _1 )(' _1 .isNull, classOf [Eq [java.lang.String ]])(Seq .empty[Row ])
266
+ checkBinaryFilterPushdown(rdd, ' _1 )(' _1 .isNotNull, classOf [NotEq [java.lang.String ]]) {
267
+ (1 to 4 ).map(i => Row .apply(i.b)).toSeq
268
+ }
269
+
230
270
checkBinaryFilterPushdown(rdd, ' _1 )(' _1 === 1 .b, classOf [Eq [Array [Byte ]]])(1 .b)
231
271
checkBinaryFilterPushdown(rdd, ' _1 )(' _1 !== 1 .b, classOf [Operators .NotEq [Array [Byte ]]]) {
232
272
(2 to 4 ).map(i => Row .apply(i.b)).toSeq
0 commit comments