Skip to content

Commit 19a8802

Browse files
lianchengmarmbrus
authored andcommitted
[SPARK-4493][SQL] Tests for IsNull / IsNotNull in the ParquetFilterSuite
This is a follow-up of #3367 and #3644. At the time #3644 was written, #3367 hadn't been merged yet, thus `IsNull` and `IsNotNull` filters are not covered in the first version of `ParquetFilterSuite`. This PR adds corresponding test cases. <!-- Reviewable:start --> [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/3748) <!-- Reviewable:end --> Author: Cheng Lian <[email protected]> Closes #3748 from liancheng/test-null-filters and squashes the following commits: 1ab943f [Cheng Lian] IsNull and IsNotNull Parquet filter test case for boolean type bcd616b [Cheng Lian] Adds Parquet filter pushedown tests for IsNull and IsNotNull
1 parent 53f0a00 commit 19a8802

File tree

1 file changed

+50
-10
lines changed

1 file changed

+50
-10
lines changed

sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,14 @@ import org.apache.spark.sql.{QueryTest, SQLConf, SchemaRDD}
2828
/**
2929
* A test suite that tests Parquet filter2 API based filter pushdown optimization.
3030
*
31-
* Notice that `!(a cmp b)` are always transformed to its negated form `a cmp' b` by the
32-
* `BooleanSimplification` optimization rule whenever possible. As a result, predicate `!(a < 1)`
33-
* results a `GtEq` filter predicate rather than a `Not`.
31+
* NOTE:
3432
*
35-
* @todo Add test cases for `IsNull` and `IsNotNull` after merging PR #3367
33+
* 1. `!(a cmp b)` is always transformed to its negated form `a cmp' b` by the
34+
* `BooleanSimplification` optimization rule whenever possible. As a result, predicate `!(a < 1)`
35+
* results in a `GtEq` filter predicate rather than a `Not`.
36+
*
37+
* 2. `Tuple1(Option(x))` is used together with `AnyVal` types like `Int` to ensure the inferred
38+
* data type is nullable.
3639
*/
3740
class ParquetFilterSuite extends QueryTest with ParquetTest {
3841
val sqlContext = TestSQLContext
@@ -85,14 +88,26 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
8588
}
8689

8790
test("filter pushdown - boolean") {
88-
withParquetRDD((true :: false :: Nil).map(Tuple1.apply)) { rdd =>
91+
withParquetRDD((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { rdd =>
92+
checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.Boolean]])(Seq.empty[Row])
93+
checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.Boolean]]) {
94+
Seq(Row(true), Row(false))
95+
}
96+
8997
checkFilterPushdown(rdd, '_1)('_1 === true, classOf[Eq[java.lang.Boolean]])(true)
90-
checkFilterPushdown(rdd, '_1)('_1 !== true, classOf[Operators.NotEq[java.lang.Boolean]])(false)
98+
checkFilterPushdown(rdd, '_1)('_1 !== true, classOf[Operators.NotEq[java.lang.Boolean]]) {
99+
false
100+
}
91101
}
92102
}
93103

94104
test("filter pushdown - integer") {
95-
withParquetRDD((1 to 4).map(Tuple1.apply)) { rdd =>
105+
withParquetRDD((1 to 4).map(i => Tuple1(Option(i)))) { rdd =>
106+
checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[Integer]])(Seq.empty[Row])
107+
checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[Integer]]) {
108+
(1 to 4).map(Row.apply(_))
109+
}
110+
96111
checkFilterPushdown(rdd, '_1)('_1 === 1, classOf[Eq[Integer]])(1)
97112
checkFilterPushdown(rdd, '_1)('_1 !== 1, classOf[Operators.NotEq[Integer]]) {
98113
(2 to 4).map(Row.apply(_))
@@ -118,7 +133,12 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
118133
}
119134

120135
test("filter pushdown - long") {
121-
withParquetRDD((1 to 4).map(i => Tuple1(i.toLong))) { rdd =>
136+
withParquetRDD((1 to 4).map(i => Tuple1(Option(i.toLong)))) { rdd =>
137+
checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.Long]])(Seq.empty[Row])
138+
checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.Long]]) {
139+
(1 to 4).map(Row.apply(_))
140+
}
141+
122142
checkFilterPushdown(rdd, '_1)('_1 === 1, classOf[Eq[java.lang.Long]])(1)
123143
checkFilterPushdown(rdd, '_1)('_1 !== 1, classOf[Operators.NotEq[java.lang.Long]]) {
124144
(2 to 4).map(Row.apply(_))
@@ -144,7 +164,12 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
144164
}
145165

146166
test("filter pushdown - float") {
147-
withParquetRDD((1 to 4).map(i => Tuple1(i.toFloat))) { rdd =>
167+
withParquetRDD((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { rdd =>
168+
checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.Float]])(Seq.empty[Row])
169+
checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.Float]]) {
170+
(1 to 4).map(Row.apply(_))
171+
}
172+
148173
checkFilterPushdown(rdd, '_1)('_1 === 1, classOf[Eq[java.lang.Float]])(1)
149174
checkFilterPushdown(rdd, '_1)('_1 !== 1, classOf[Operators.NotEq[java.lang.Float]]) {
150175
(2 to 4).map(Row.apply(_))
@@ -170,7 +195,12 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
170195
}
171196

172197
test("filter pushdown - double") {
173-
withParquetRDD((1 to 4).map(i => Tuple1(i.toDouble))) { rdd =>
198+
withParquetRDD((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { rdd =>
199+
checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.Double]])(Seq.empty[Row])
200+
checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.Double]]) {
201+
(1 to 4).map(Row.apply(_))
202+
}
203+
174204
checkFilterPushdown(rdd, '_1)('_1 === 1, classOf[Eq[java.lang.Double]])(1)
175205
checkFilterPushdown(rdd, '_1)('_1 !== 1, classOf[Operators.NotEq[java.lang.Double]]) {
176206
(2 to 4).map(Row.apply(_))
@@ -197,6 +227,11 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
197227

198228
test("filter pushdown - string") {
199229
withParquetRDD((1 to 4).map(i => Tuple1(i.toString))) { rdd =>
230+
checkFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.String]])(Seq.empty[Row])
231+
checkFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.String]]) {
232+
(1 to 4).map(i => Row.apply(i.toString))
233+
}
234+
200235
checkFilterPushdown(rdd, '_1)('_1 === "1", classOf[Eq[String]])("1")
201236
checkFilterPushdown(rdd, '_1)('_1 !== "1", classOf[Operators.NotEq[String]]) {
202237
(2 to 4).map(i => Row.apply(i.toString))
@@ -227,6 +262,11 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
227262
}
228263

229264
withParquetRDD((1 to 4).map(i => Tuple1(i.b))) { rdd =>
265+
checkBinaryFilterPushdown(rdd, '_1)('_1.isNull, classOf[Eq[java.lang.String]])(Seq.empty[Row])
266+
checkBinaryFilterPushdown(rdd, '_1)('_1.isNotNull, classOf[NotEq[java.lang.String]]) {
267+
(1 to 4).map(i => Row.apply(i.b)).toSeq
268+
}
269+
230270
checkBinaryFilterPushdown(rdd, '_1)('_1 === 1.b, classOf[Eq[Array[Byte]]])(1.b)
231271
checkBinaryFilterPushdown(rdd, '_1)('_1 !== 1.b, classOf[Operators.NotEq[Array[Byte]]]) {
232272
(2 to 4).map(i => Row.apply(i.b)).toSeq

0 commit comments

Comments
 (0)