@@ -255,7 +255,13 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta
255
255
retFilters = b .leftover
256
256
}
257
257
258
- return ret , c .bestStat , retFilters , nil
258
+ bestStat , err := c .bestStat .WithHistogram (c .bestHist )
259
+ if err != nil {
260
+ return nil , nil , nil , err
261
+ }
262
+ bestStat = stats .UpdateCounts (bestStat )
263
+
264
+ return ret , bestStat , retFilters , nil
259
265
}
260
266
261
267
func addIndexScans (m * memo.Memo ) error {
@@ -363,6 +369,8 @@ type indexCoster struct {
363
369
idToExpr map [indexScanId ]sql.Expression
364
370
// bestStat is the lowest cardinality indexScan option
365
371
bestStat sql.Statistic
372
+ bestHist []sql.HistogramBucket
373
+ bestCnt uint64
366
374
// bestFilters is the set of conjunctions used to create bestStat
367
375
bestFilters sql.FastIntSet
368
376
// bestConstant are the constant best filters
@@ -377,29 +385,30 @@ type indexCoster struct {
377
385
func (c * indexCoster ) cost (f indexFilter , stat sql.Statistic , idx sql.Index ) error {
378
386
ordinals := ordinalsForStat (stat )
379
387
380
- newStat := stat
388
+ var newHist []sql.HistogramBucket
389
+ var newFds * sql.FuncDepSet
381
390
var filters sql.FastIntSet
382
391
var prefix int
383
392
var err error
384
393
var ok bool
385
394
386
395
switch f := f .(type ) {
387
396
case * iScanAnd :
388
- newStat , filters , prefix , err = c .costIndexScanAnd (f , stat , ordinals , idx )
397
+ newHist , newFds , filters , prefix , err = c .costIndexScanAnd (f , stat , stat . Histogram () , ordinals , idx )
389
398
if err != nil {
390
399
return err
391
400
}
392
401
393
402
case * iScanOr :
394
- newStat , ok , err = c .costIndexScanOr (f , stat , ordinals , idx )
403
+ newHist , newFds , ok , err = c .costIndexScanOr (f , stat , stat . Histogram () , ordinals , idx )
395
404
if err != nil {
396
405
return err
397
406
}
398
407
if ok {
399
408
filters .Add (int (f .id ))
400
409
}
401
410
case * iScanLeaf :
402
- newStat , ok , prefix , err = c .costIndexScanLeaf (f , stat , ordinals , idx )
411
+ newHist , newFds , ok , prefix , err = c .costIndexScanLeaf (f , stat , stat . Histogram () , ordinals , idx )
403
412
if err != nil {
404
413
return err
405
414
}
@@ -410,25 +419,33 @@ func (c *indexCoster) cost(f indexFilter, stat sql.Statistic, idx sql.Index) err
410
419
panic ("unreachable" )
411
420
}
412
421
413
- c .updateBest (newStat , filters , prefix )
422
+ if newFds == nil {
423
+ newFds = & sql.FuncDepSet {}
424
+ }
425
+
426
+ c .updateBest (stat , newHist , newFds , filters , prefix )
427
+
414
428
return nil
415
429
}
416
430
417
- func (c * indexCoster ) updateBest (s sql.Statistic , filters sql.FastIntSet , prefix int ) {
431
+ func (c * indexCoster ) updateBest (s sql.Statistic , hist []sql. HistogramBucket , fds * sql. FuncDepSet , filters sql.FastIntSet , prefix int ) {
418
432
if s == nil || filters .Len () == 0 {
419
433
return
420
434
}
435
+ rowCnt , _ , _ := stats .GetNewCounts (hist )
421
436
422
437
var update bool
423
438
defer func () {
424
439
if update {
425
- c .bestStat = s
440
+ c .bestStat = s .WithFuncDeps (fds )
441
+ c .bestHist = hist
442
+ c .bestCnt = rowCnt
426
443
c .bestFilters = filters
427
444
c .bestPrefix = prefix
428
445
}
429
446
}()
430
447
431
- if c .bestStat == nil || s . RowCount () < c .bestStat . RowCount () {
448
+ if c .bestStat == nil || rowCnt < c .bestCnt {
432
449
update = true
433
450
return
434
451
} else if c .bestStat .FuncDeps ().HasMax1Row () {
@@ -437,9 +454,9 @@ func (c *indexCoster) updateBest(s sql.Statistic, filters sql.FastIntSet, prefix
437
454
// any prefix is better than no prefix
438
455
update = prefix > c .bestPrefix
439
456
return
440
- } else if s . RowCount () == c .bestStat . RowCount () {
457
+ } else if rowCnt == c .bestCnt {
441
458
// hand rules when stats don't exist or match exactly
442
- cmp := s . FuncDeps ()
459
+ cmp := fds
443
460
best := c .bestStat .FuncDeps ()
444
461
if cmp .HasMax1Row () {
445
462
update = true
@@ -1111,28 +1128,34 @@ func ordinalsForStat(stat sql.Statistic) map[string]int {
1111
1128
// updated statistic, the subset of applicable filters, the maximum prefix
1112
1129
// key created by a subset of equality filters (from conjunction only),
1113
1130
// or an error if applicable.
1114
- func (c * indexCoster ) costIndexScanAnd (filter * iScanAnd , s sql.Statistic , ordinals map [string ]int , idx sql.Index ) (sql.Statistic , sql.FastIntSet , int , error ) {
1131
+ func (c * indexCoster ) costIndexScanAnd (filter * iScanAnd , s sql.Statistic , buckets []sql. HistogramBucket , ordinals map [string ]int , idx sql.Index ) ([] sql.HistogramBucket , * sql. FuncDepSet , sql.FastIntSet , int , error ) {
1115
1132
// first step finds the conjunctions that match index prefix columns.
1116
1133
// we divide into eqFilters and rangeFilters
1117
1134
1118
- ret := s
1135
+ ret := s . Histogram ()
1119
1136
var exact sql.FastIntSet
1120
1137
1121
1138
if len (filter .orChildren ) > 0 {
1122
1139
for _ , or := range filter .orChildren {
1123
- childStat , ok , err := c .costIndexScanOr (or .(* iScanOr ), s , ordinals , idx )
1140
+ childStat , fds , ok , err := c .costIndexScanOr (or .(* iScanOr ), s , buckets , ordinals , idx )
1124
1141
if err != nil {
1125
- return nil , sql.FastIntSet {}, 0 , err
1142
+ return nil , nil , sql.FastIntSet {}, 0 , err
1126
1143
}
1127
1144
// if valid, INTERSECT
1128
1145
if ok {
1129
- ret = stats .Intersect (ret , childStat )
1146
+ if fds != nil {
1147
+ s = s .WithFuncDeps (fds )
1148
+ }
1149
+ ret , err = stats .Intersect (ret , childStat , s .Types ())
1150
+ if err != nil {
1151
+ return nil , nil , sql.FastIntSet {}, 0 , err
1152
+ }
1130
1153
exact .Add (int (or .Id ()))
1131
1154
}
1132
1155
}
1133
1156
}
1134
1157
1135
- conj := newConjCollector (ret , ordinals )
1158
+ conj := newConjCollector (s , ret , ordinals )
1136
1159
for _ , c := range s .Columns () {
1137
1160
if colFilters , ok := filter .leafChildren [c ]; ok {
1138
1161
for _ , f := range colFilters {
@@ -1143,46 +1166,58 @@ func (c *indexCoster) costIndexScanAnd(filter *iScanAnd, s sql.Statistic, ordina
1143
1166
1144
1167
if exact .Len ()+ conj .applied .Len () == filter .childCnt () {
1145
1168
// matched all filters
1146
- return conj .stat , sql .NewFastIntSet (int (filter .id )), conj .missingPrefix , nil
1169
+ return conj .hist , conj . fds , sql .NewFastIntSet (int (filter .id )), conj .missingPrefix , nil
1147
1170
}
1148
1171
1149
- return conj .stat , exact .Union (conj .applied ), conj .missingPrefix , nil
1172
+ return conj .hist , conj . fds , exact .Union (conj .applied ), conj .missingPrefix , nil
1150
1173
}
1151
1174
1152
- func (c * indexCoster ) costIndexScanOr (filter * iScanOr , s sql.Statistic , ordinals map [string ]int , idx sql.Index ) (sql.Statistic , bool , error ) {
1175
+ func (c * indexCoster ) costIndexScanOr (filter * iScanOr , s sql.Statistic , buckets []sql. HistogramBucket , ordinals map [string ]int , idx sql.Index ) ([] sql.HistogramBucket , * sql. FuncDepSet , bool , error ) {
1153
1176
// OR just unions the statistics from each child?
1154
1177
// if one of the children is invalid, we balk and return false
1155
1178
// otherwise we union the buckets between the children
1156
- ret := s
1179
+ ret := buckets
1157
1180
for _ , child := range filter .children {
1158
1181
switch child := child .(type ) {
1159
1182
case * iScanAnd :
1160
- childStat , ids , _ , err := c .costIndexScanAnd (child , s , ordinals , idx )
1183
+ childBuckets , fds , ids , _ , err := c .costIndexScanAnd (child , s , buckets , ordinals , idx )
1161
1184
if err != nil {
1162
- return nil , false , err
1185
+ return nil , nil , false , err
1163
1186
}
1164
1187
if ids .Len () != 1 || ! ids .Contains (int (child .Id ())) {
1165
1188
// scan option missed some filters
1166
- return nil , false , nil
1189
+ return nil , nil , false , nil
1190
+ }
1191
+ if fds != nil {
1192
+ s = s .WithFuncDeps (fds )
1193
+ }
1194
+ ret , err = stats .Union (buckets , childBuckets , s .Types ())
1195
+ if err != nil {
1196
+ return nil , nil , false , err
1167
1197
}
1168
- ret = stats .Union (s , childStat )
1169
1198
1170
1199
case * iScanLeaf :
1171
1200
var ok bool
1172
- childStat , ok , _ , err := c .costIndexScanLeaf (child , s , ordinals , idx )
1201
+ childBuckets , fds , ok , _ , err := c .costIndexScanLeaf (child , s , ret , ordinals , idx )
1173
1202
if err != nil {
1174
- return nil , false , err
1203
+ return nil , nil , false , err
1175
1204
}
1176
1205
if ! ok {
1177
- return nil , false , nil
1206
+ return nil , nil , false , nil
1207
+ }
1208
+ if fds != nil {
1209
+ s = s .WithFuncDeps (fds )
1210
+ }
1211
+ ret , err = stats .Union (ret , childBuckets , s .Types ())
1212
+ if err != nil {
1213
+ return nil , nil , false , err
1178
1214
}
1179
- ret = stats .Union (s , childStat )
1180
1215
1181
1216
default :
1182
- return nil , false , fmt .Errorf ("invalid *iScanOr child: %T" , child )
1217
+ return nil , nil , false , fmt .Errorf ("invalid *iScanOr child: %T" , child )
1183
1218
}
1184
1219
}
1185
- return ret , true , nil
1220
+ return ret , nil , true , nil
1186
1221
}
1187
1222
1188
1223
// indexHasContentHashedFieldForFilter returns true if the given index |idx| has a content-hashed field that is used
@@ -1212,10 +1247,10 @@ func indexHasContentHashedFieldForFilter(filter *iScanLeaf, idx sql.Index, ordin
1212
1247
// costIndexScanLeaf tries to apply a leaf filter to an index represented
1213
1248
// by a statistic, returning the updated statistic, whether the filter was
1214
1249
// applicable, and the maximum prefix key (0 or 1 for a leaf).
1215
- func (c * indexCoster ) costIndexScanLeaf (filter * iScanLeaf , s sql.Statistic , ordinals map [string ]int , idx sql.Index ) (sql.Statistic , bool , int , error ) {
1250
+ func (c * indexCoster ) costIndexScanLeaf (filter * iScanLeaf , s sql.Statistic , buckets []sql. HistogramBucket , ordinals map [string ]int , idx sql.Index ) ([] sql.HistogramBucket , * sql. FuncDepSet , bool , int , error ) {
1216
1251
ord , ok := ordinals [strings .ToLower (filter .gf .Name ())]
1217
1252
if ! ok {
1218
- return nil , false , 0 , nil
1253
+ return nil , nil , false , 0 , nil
1219
1254
}
1220
1255
1221
1256
// indexes with content-hashed fields can be used to test equality or compare with NULL,
@@ -1224,21 +1259,21 @@ func (c *indexCoster) costIndexScanLeaf(filter *iScanLeaf, s sql.Statistic, ordi
1224
1259
switch filter .op {
1225
1260
case indexScanOpEq , indexScanOpNotEq , indexScanOpNullSafeEq , indexScanOpIsNull , indexScanOpIsNotNull :
1226
1261
default :
1227
- return nil , false , 0 , nil
1262
+ return nil , nil , false , 0 , nil
1228
1263
}
1229
1264
}
1230
1265
1231
1266
switch filter .op {
1232
1267
case indexScanOpSpatialEq :
1233
1268
stat , ok , err := c .costSpatial (filter , s , ord )
1234
- return stat , ok , 0 , err
1269
+ return buckets , stat . FuncDeps () , ok , 0 , err
1235
1270
case indexScanOpFulltextEq :
1236
1271
stat , ok , err := c .costFulltext (filter , s , ord )
1237
- return stat , ok , 0 , err
1272
+ return buckets , stat . FuncDeps () , ok , 0 , err
1238
1273
default :
1239
- conj := newConjCollector (s , ordinals )
1274
+ conj := newConjCollector (s , buckets , ordinals )
1240
1275
conj .add (filter )
1241
- return conj .stat , true , conj .missingPrefix , nil
1276
+ return conj .hist , conj . fds , true , conj .missingPrefix , nil
1242
1277
}
1243
1278
}
1244
1279
@@ -1521,9 +1556,11 @@ func newUniformDistStatistic(dbName, tableName string, sch sql.Schema, idx sql.I
1521
1556
return ret , nil
1522
1557
}
1523
1558
1524
- func newConjCollector (s sql.Statistic , ordinals map [string ]int ) * conjCollector {
1559
+ func newConjCollector (s sql.Statistic , hist []sql. HistogramBucket , ordinals map [string ]int ) * conjCollector {
1525
1560
return & conjCollector {
1526
1561
stat : s ,
1562
+ hist : hist ,
1563
+ fds : s .FuncDeps (),
1527
1564
ordinals : ordinals ,
1528
1565
eqVals : make ([]interface {}, len (ordinals )),
1529
1566
nullable : make ([]bool , len (ordinals )),
@@ -1534,6 +1571,8 @@ func newConjCollector(s sql.Statistic, ordinals map[string]int) *conjCollector {
1534
1571
// an index histogram for a list of conjugate filters
1535
1572
type conjCollector struct {
1536
1573
stat sql.Statistic
1574
+ hist []sql.HistogramBucket
1575
+ fds * sql.FuncDepSet
1537
1576
ordinals map [string ]int
1538
1577
missingPrefix int
1539
1578
constant sql.FastIntSet
@@ -1587,7 +1626,7 @@ func (c *conjCollector) addEq(col string, val interface{}, nullSafe bool) error
1587
1626
1588
1627
// truncate buckets
1589
1628
var err error
1590
- c .stat , err = stats .PrefixKey (c .stat , c .eqVals [:ord + 1 ], c .nullable )
1629
+ c .hist , c . fds , err = stats .PrefixKey (c .stat . Histogram (), c . stat . ColSet (), c . stat . Types (), c . stat . FuncDeps () , c .eqVals [:ord + 1 ], c .nullable )
1591
1630
if err != nil {
1592
1631
return err
1593
1632
}
@@ -1619,19 +1658,19 @@ func (c *conjCollector) cmpFirstCol(op indexScanOp, val interface{}) error {
1619
1658
switch op {
1620
1659
case indexScanOpNotEq :
1621
1660
// todo notEq
1622
- c .stat , err = stats .PrefixGt (c .stat , val )
1661
+ c .hist , err = stats .PrefixGt (c .hist , c . stat . Types () , val )
1623
1662
case indexScanOpGt :
1624
- c .stat , err = stats .PrefixGt (c .stat , val )
1663
+ c .hist , err = stats .PrefixGt (c .hist , c . stat . Types () , val )
1625
1664
case indexScanOpGte :
1626
- c .stat , err = stats .PrefixGte (c .stat , val )
1665
+ c .hist , err = stats .PrefixGte (c .hist , c . stat . Types () , val )
1627
1666
case indexScanOpLt :
1628
- c .stat , err = stats .PrefixLt (c .stat , val )
1667
+ c .hist , err = stats .PrefixLt (c .hist , c . stat . Types () , val )
1629
1668
case indexScanOpLte :
1630
- c .stat , err = stats .PrefixLte (c .stat , val )
1669
+ c .hist , err = stats .PrefixLte (c .hist , c . stat . Types () , val )
1631
1670
case indexScanOpIsNull :
1632
- c .stat , err = stats .PrefixIsNull (c .stat )
1671
+ c .hist , err = stats .PrefixIsNull (c .hist )
1633
1672
case indexScanOpIsNotNull :
1634
- c .stat , err = stats .PrefixIsNotNull (c .stat )
1673
+ c .hist , err = stats .PrefixIsNotNull (c .hist )
1635
1674
}
1636
1675
return err
1637
1676
}
0 commit comments