Skip to content

Commit a85f24a

Browse files
sarutakmarmbrus
authored andcommitted
[SPARK-3831] [SQL] Filter rule Improvement and bool expression optimization.
If we write the filter which is always FALSE like SELECT * from person WHERE FALSE; 200 tasks will run. I think, 1 task is enough. And current optimizer cannot optimize the case NOT is duplicated like SELECT * from person WHERE NOT ( NOT (age > 30)); The filter rule above should be simplified Author: Kousuke Saruta <[email protected]> Closes #2692 from sarutak/SPARK-3831 and squashes the following commits: 25f3e20 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3831 23c750c [Kousuke Saruta] Improved unsupported predicate test case a11b9f3 [Kousuke Saruta] Modified NOT predicate test case in PartitionBatchPruningSuite 8ea872b [Kousuke Saruta] Fixed the number of tasks when the data of LocalRelation is empty.
1 parent add174a commit a85f24a

File tree

3 files changed

+16
-2
lines changed

3 files changed

+16
-2
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,18 @@ object BooleanSimplification extends Rule[LogicalPlan] {
299299
case (_, _) => or
300300
}
301301

302+
case not @ Not(exp) =>
303+
exp match {
304+
case Literal(true, BooleanType) => Literal(false)
305+
case Literal(false, BooleanType) => Literal(true)
306+
case GreaterThan(l, r) => LessThanOrEqual(l, r)
307+
case GreaterThanOrEqual(l, r) => LessThan(l, r)
308+
case LessThan(l, r) => GreaterThanOrEqual(l, r)
309+
case LessThanOrEqual(l, r) => GreaterThan(l, r)
310+
case Not(e) => e
311+
case _ => not
312+
}
313+
302314
// Turn "if (true) a else b" into "a", and if (false) a else b" into "b".
303315
case e @ If(Literal(v, _), trueValue, falseValue) => if (v == true) trueValue else falseValue
304316
}

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,9 +274,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
274274
execution.Sample(fraction, withReplacement, seed, planLater(child)) :: Nil
275275
case SparkLogicalPlan(alreadyPlanned) => alreadyPlanned :: Nil
276276
case logical.LocalRelation(output, data) =>
277+
val nPartitions = if (data.isEmpty) 1 else numPartitions
277278
PhysicalRDD(
278279
output,
279-
RDDConversions.productToRowRdd(sparkContext.parallelize(data, numPartitions))) :: Nil
280+
RDDConversions.productToRowRdd(sparkContext.parallelize(data, nPartitions))) :: Nil
280281
case logical.Limit(IntegerLiteral(limit), child) =>
281282
execution.Limit(limit, planLater(child)) :: Nil
282283
case Unions(unionChildren) =>

sql/core/src/test/scala/org/apache/spark/sql/columnar/PartitionBatchPruningSuite.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,11 @@ class PartitionBatchPruningSuite extends FunSuite with BeforeAndAfterAll with Be
6767
checkBatchPruning("i > 8 AND i <= 21", 9 to 21, 2, 3)
6868
checkBatchPruning("i < 2 OR i > 99", Seq(1, 100), 2, 2)
6969
checkBatchPruning("i < 2 OR (i > 78 AND i < 92)", Seq(1) ++ (79 to 91), 3, 4)
70+
checkBatchPruning("NOT (i < 88)", 88 to 100, 1, 2)
7071

7172
// With unsupported predicate
7273
checkBatchPruning("i < 12 AND i IS NOT NULL", 1 to 11, 1, 2)
73-
checkBatchPruning("NOT (i < 88)", 88 to 100, 5, 10)
74+
checkBatchPruning(s"NOT (i in (${(1 to 30).mkString(",")}))", 31 to 100, 5, 10)
7475

7576
def checkBatchPruning(
7677
filter: String,

0 commit comments

Comments
 (0)