File tree Expand file tree Collapse file tree 3 files changed +12
-4
lines changed
sql/core/src/main/scala/org/apache/spark/sql/parquet Expand file tree Collapse file tree 3 files changed +12
-4
lines changed Original file line number Diff line number Diff line change @@ -31,6 +31,10 @@ import org.apache.spark.sql.execution.SparkSqlSerializer
31
31
32
32
object ParquetFilters {
33
33
val PARQUET_FILTER_DATA = " org.apache.spark.sql.parquet.row.filter"
34
+ // set this to false if pushdown should be disabled
35
+ // Note: prefix is "spark.hadoop." so that it will be copied from SparkConf
36
+ // to Hadoop configuration
37
+ val PARQUET_FILTER_PUSHDOWN_ENABLED = " org.apache.spark.sql.parquet.filter.pushdown"
34
38
35
39
def createFilter (filterExpressions : Seq [Expression ]): UnboundRecordFilter = {
36
40
def createEqualityFilter (name : String , literal : Literal ) = literal.dataType match {
Original file line number Diff line number Diff line change @@ -71,10 +71,13 @@ case class ParquetTableScan(
71
71
ParquetTypesConverter .convertFromAttributes(output).toString)
72
72
73
73
// Store record filtering predicate in `Configuration`
74
- // Note: the input format ignores all predicates that cannot be expressed
74
+ // Note 1 : the input format ignores all predicates that cannot be expressed
75
75
// as simple column predicate filters in Parquet. Here we just record
76
76
// the whole pruning predicate.
77
- if (columnPruningPred.isDefined) {
77
+ // Note 2: you can disable filter predicate pushdown by setting
78
+ // "org.apache.spark.sql.parquet.filter.pushdown" to false inside SparkConf.
79
+ if (columnPruningPred.isDefined &&
80
+ sc.conf.getBoolean(ParquetFilters .PARQUET_FILTER_PUSHDOWN_ENABLED , true )) {
78
81
ParquetFilters .serializeFilterExpressions(columnPruningPred.get, conf)
79
82
}
80
83
Original file line number Diff line number Diff line change @@ -130,14 +130,15 @@ private[sql] object ParquetTestData {
130
130
writer.close()
131
131
}
132
132
133
- def writeFilterFile () = {
133
+ def writeFilterFile (records : Int = 200 ) = {
134
+ // for microbenchmark use: records = 300000000
134
135
testFilterDir.delete
135
136
val path : Path = new Path (new Path (testFilterDir.toURI), new Path (" part-r-0.parquet" ))
136
137
val schema : MessageType = MessageTypeParser .parseMessageType(testFilterSchema)
137
138
val writeSupport = new TestGroupWriteSupport (schema)
138
139
val writer = new ParquetWriter [Group ](path, writeSupport)
139
140
140
- for (i <- 0 to 200 ) {
141
+ for (i <- 0 to records ) {
141
142
val record = new SimpleGroup (schema)
142
143
if (i % 4 == 0 ) {
143
144
record.add(0 , true )
You can’t perform that action at this time.
0 commit comments