@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.plans.logical.TableParallelInfo
38
38
import org .apache .spark .sql .catalyst .plans .physical .{HashPartitioning , Partitioning , UnknownPartitioning }
39
39
import org .apache .spark .sql .catalyst .trees .TreeNodeTag
40
40
import org .apache .spark .sql .catalyst .util .truncatedString
41
+ import org .apache .spark .sql .execution .adaptive .SkewHandlingUtil
41
42
import org .apache .spark .sql .execution .datasources ._
42
43
import org .apache .spark .sql .execution .datasources .parquet .{ParquetFileFormat => ParquetSource }
43
44
import org .apache .spark .sql .execution .index .IndexMetaLookup
@@ -421,6 +422,18 @@ case class FileSourceScanExec(
421
422
tableParallelInfo.get.partitionNumber.isEmpty))
422
423
}
423
424
425
+ lazy val isBucketSkew = {
426
+ bucketedScan && {
427
+ val files = selectedPartitions.flatMap(partition => partition.files)
428
+ val bucketFilesGroupingSize = files.map(file => (file.getPath.getName, file.getLen))
429
+ .groupBy(file => BucketingUtils .getBucketId(file._1))
430
+ .map(bucket => bucket._2.map(_._2).sum).toArray
431
+
432
+ SkewHandlingUtil .isSkewed(bucketFilesGroupingSize.max,
433
+ Utils .median(bucketFilesGroupingSize, false ), conf)
434
+ }
435
+ }
436
+
424
437
override lazy val (outputPartitioning, outputOrdering): (Partitioning , Seq [SortOrder ]) = {
425
438
if (bucketedScan) {
426
439
// For bucketed columns:
@@ -522,7 +535,11 @@ case class FileSourceScanExec(
522
535
val withSelectedBucketsCount = relation.bucketSpec.map { spec =>
523
536
val bucketedKey = " Bucketed"
524
537
val withBucketedScanStatus = if (bucketedScan) {
525
- metadata + (bucketedKey -> " true" )
538
+ if (isBucketSkew) {
539
+ metadata + (bucketedKey -> s " true skewed " )
540
+ } else {
541
+ metadata + (bucketedKey -> s " true " )
542
+ }
526
543
} else if (! relation.sparkSession.sessionState.conf.bucketingEnabled) {
527
544
metadata + (bucketedKey -> " false (disabled by configuration)" )
528
545
} else if (disableBucketedScan) {
0 commit comments