File tree Expand file tree Collapse file tree 3 files changed +9
-4
lines changed
core/src/main/scala/org/apache/spark/rdd Expand file tree Collapse file tree 3 files changed +9
-4
lines changed Original file line number Diff line number Diff line change @@ -78,14 +78,15 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
78
78
// greater than totalParts because we actually cap it at totalParts in runJob.
79
79
var numPartsToTry = 1
80
80
if (partsScanned > 0 ) {
81
- // If we didn't find any rows after the first iteration, just try all partitions next .
81
+ // If we didn't find any rows after the previous iteration, quadruple and retry .
82
82
// Otherwise, interpolate the number of partitions we need to try, but overestimate it
83
- // by 50%.
83
+ // by 50%. We also cap the estimation in the end.
84
84
if (results.size == 0 ) {
85
- numPartsToTry = totalParts - 1
85
+ numPartsToTry = totalParts * 4
86
86
} else {
87
87
// the left side of max is >=1 whenever partsScanned >= 2
88
88
numPartsToTry = ((1.5 * num * partsScanned / results.size).toInt - partsScanned) max 1
89
+ numPartsToTry = numPartsToTry min (totalParts * 4 )
89
90
}
90
91
}
91
92
Original file line number Diff line number Diff line change @@ -1079,13 +1079,15 @@ abstract class RDD[T: ClassTag](
1079
1079
// greater than totalParts because we actually cap it at totalParts in runJob.
1080
1080
var numPartsToTry = 1
1081
1081
if (partsScanned > 0 ) {
1082
- // If we didn't find any rows after the previous iteration, quadruple and retry. Otherwise,
1082
+ // If we didn't find any rows after the previous iteration, quadruple and retry. Otherwise,
1083
1083
// interpolate the number of partitions we need to try, but overestimate it by 50%.
1084
+ // We also cap the estimation in the end.
1084
1085
if (buf.size == 0 ) {
1085
1086
numPartsToTry = partsScanned * 4
1086
1087
} else {
1087
1088
// the left side of max is >=1 whenever partsScanned >= 2
1088
1089
numPartsToTry = ((1.5 * num * partsScanned / buf.size).toInt - partsScanned) max 1
1090
+ numPartsToTry = numPartsToTry min (partsScanned * 4 )
1089
1091
}
1090
1092
}
1091
1093
Original file line number Diff line number Diff line change @@ -1070,11 +1070,13 @@ def take(self, num):
1070
1070
# If we didn't find any rows after the previous iteration,
1071
1071
# quadruple and retry. Otherwise, interpolate the number of
1072
1072
# partitions we need to try, but overestimate it by 50%.
1073
+ # We also cap the estimation in the end.
1073
1074
if len (items ) == 0 :
1074
1075
numPartsToTry = partsScanned * 4
1075
1076
else :
1076
1077
#the first paramter of max is >=1 whenever partsScanned >= 2
1077
1078
numPartsToTry = max (int (1.5 * num * partsScanned / len (items )) - partsScanned , 1 )
1079
+ numPartsToTry = min (numPartsToTry , partsScanned * 4 )
1078
1080
1079
1081
left = num - len (items )
1080
1082
You can’t perform that action at this time.
0 commit comments