Skip to content

Commit 9627c83

Browse files
authored
LocalCSE: Do not optimize small things like global.get (#6087)
LocalCSE is nice for large expressions, but for small things it has always been of unclear benefit since VMs also do GVN/CSE anyhow. So we are likely not speeding anything up, but hopefully we are reducing code size at least. Doing LocalCSE on something small like a global.get is very possibly going to increase code size, however (since we add a tee, and since the local gets are of similar size to global gets - depends on LUB sizes). On real-world Java code that overhead is noticeable, so this PR makes us more careful, and we skip things of size 1 (no children).
1 parent 3640f9c commit 9627c83

File tree

3 files changed

+45
-47
lines changed

3 files changed

+45
-47
lines changed

src/passes/LocalCSE.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -334,13 +334,16 @@ struct Scanner
334334
// and so adding one set+one get and removing one of the items itself
335335
// is not detrimental, and may be beneficial.
336336
// TODO: investigate size 2
337-
if (options.shrinkLevel > 0 && Measurer::measure(curr) >= 3) {
337+
auto size = Measurer::measure(curr);
338+
if (options.shrinkLevel > 0 && size >= 3) {
338339
return true;
339340
}
340341

341342
// If we focus on speed, any reduction in cost is beneficial, as the
342-
// cost of a get is essentially free.
343-
if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0) {
343+
// cost of a get is essentially free. However, we need to balance that with
344+
// the fact that the VM will also do CSE/GVN itself, so minor improvements
345+
// are not worthwhile, so skip things of size 1 (like a global.get).
346+
if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0 && size >= 2) {
344347
return true;
345348
}
346349

test/lit/passes/inlining-optimizing_optimize-level=3.wast

Lines changed: 31 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -299,13 +299,11 @@
299299
;; CHECK-NEXT: (local $12 i32)
300300
;; CHECK-NEXT: (local $13 i32)
301301
;; CHECK-NEXT: (local.set $8
302-
;; CHECK-NEXT: (local.tee $4
303-
;; CHECK-NEXT: (global.get $STACKTOP)
304-
;; CHECK-NEXT: )
302+
;; CHECK-NEXT: (global.get $STACKTOP)
305303
;; CHECK-NEXT: )
306304
;; CHECK-NEXT: (global.set $STACKTOP
307305
;; CHECK-NEXT: (i32.add
308-
;; CHECK-NEXT: (local.get $4)
306+
;; CHECK-NEXT: (global.get $STACKTOP)
309307
;; CHECK-NEXT: (i32.const 16)
310308
;; CHECK-NEXT: )
311309
;; CHECK-NEXT: )
@@ -316,11 +314,12 @@
316314
;; CHECK-NEXT: )
317315
;; CHECK-NEXT: (call $abort)
318316
;; CHECK-NEXT: )
317+
;; CHECK-NEXT: (local.set $6
318+
;; CHECK-NEXT: (global.get $STACKTOP)
319+
;; CHECK-NEXT: )
319320
;; CHECK-NEXT: (global.set $STACKTOP
320321
;; CHECK-NEXT: (i32.add
321-
;; CHECK-NEXT: (local.tee $4
322-
;; CHECK-NEXT: (global.get $STACKTOP)
323-
;; CHECK-NEXT: )
322+
;; CHECK-NEXT: (global.get $STACKTOP)
324323
;; CHECK-NEXT: (i32.const 16)
325324
;; CHECK-NEXT: )
326325
;; CHECK-NEXT: )
@@ -332,19 +331,20 @@
332331
;; CHECK-NEXT: (call $abort)
333332
;; CHECK-NEXT: )
334333
;; CHECK-NEXT: (i32.store
335-
;; CHECK-NEXT: (local.get $4)
334+
;; CHECK-NEXT: (local.get $6)
336335
;; CHECK-NEXT: (local.get $8)
337336
;; CHECK-NEXT: )
338337
;; CHECK-NEXT: (local.set $0
339338
;; CHECK-NEXT: (i32.load
340339
;; CHECK-NEXT: (i32.const 8)
341340
;; CHECK-NEXT: )
342341
;; CHECK-NEXT: )
342+
;; CHECK-NEXT: (local.set $1
343+
;; CHECK-NEXT: (global.get $STACKTOP)
344+
;; CHECK-NEXT: )
343345
;; CHECK-NEXT: (global.set $STACKTOP
344346
;; CHECK-NEXT: (i32.add
345-
;; CHECK-NEXT: (local.tee $1
346-
;; CHECK-NEXT: (global.get $STACKTOP)
347-
;; CHECK-NEXT: )
347+
;; CHECK-NEXT: (global.get $STACKTOP)
348348
;; CHECK-NEXT: (i32.const 224)
349349
;; CHECK-NEXT: )
350350
;; CHECK-NEXT: )
@@ -361,13 +361,13 @@
361361
;; CHECK-NEXT: (i32.const 120)
362362
;; CHECK-NEXT: )
363363
;; CHECK-NEXT: )
364-
;; CHECK-NEXT: (local.set $5
364+
;; CHECK-NEXT: (local.set $4
365365
;; CHECK-NEXT: (i32.add
366366
;; CHECK-NEXT: (local.get $1)
367367
;; CHECK-NEXT: (i32.const 136)
368368
;; CHECK-NEXT: )
369369
;; CHECK-NEXT: )
370-
;; CHECK-NEXT: (local.set $6
370+
;; CHECK-NEXT: (local.set $5
371371
;; CHECK-NEXT: (i32.add
372372
;; CHECK-NEXT: (local.tee $3
373373
;; CHECK-NEXT: (local.tee $7
@@ -393,14 +393,14 @@
393393
;; CHECK-NEXT: (i32.const 4)
394394
;; CHECK-NEXT: )
395395
;; CHECK-NEXT: )
396-
;; CHECK-NEXT: (local.get $6)
396+
;; CHECK-NEXT: (local.get $5)
397397
;; CHECK-NEXT: )
398398
;; CHECK-NEXT: )
399399
;; CHECK-NEXT: )
400400
;; CHECK-NEXT: (i32.store
401401
;; CHECK-NEXT: (local.get $2)
402402
;; CHECK-NEXT: (i32.load
403-
;; CHECK-NEXT: (local.get $4)
403+
;; CHECK-NEXT: (local.get $6)
404404
;; CHECK-NEXT: )
405405
;; CHECK-NEXT: )
406406
;; CHECK-NEXT: (drop
@@ -444,7 +444,7 @@
444444
;; CHECK-NEXT: )
445445
;; CHECK-NEXT: (if
446446
;; CHECK-NEXT: (i32.load
447-
;; CHECK-NEXT: (local.tee $6
447+
;; CHECK-NEXT: (local.tee $5
448448
;; CHECK-NEXT: (i32.add
449449
;; CHECK-NEXT: (local.get $0)
450450
;; CHECK-NEXT: (i32.const 48)
@@ -473,7 +473,7 @@
473473
;; CHECK-NEXT: )
474474
;; CHECK-NEXT: (i32.store
475475
;; CHECK-NEXT: (local.get $9)
476-
;; CHECK-NEXT: (local.get $5)
476+
;; CHECK-NEXT: (local.get $4)
477477
;; CHECK-NEXT: )
478478
;; CHECK-NEXT: (i32.store
479479
;; CHECK-NEXT: (local.tee $12
@@ -482,7 +482,7 @@
482482
;; CHECK-NEXT: (i32.const 28)
483483
;; CHECK-NEXT: )
484484
;; CHECK-NEXT: )
485-
;; CHECK-NEXT: (local.get $5)
485+
;; CHECK-NEXT: (local.get $4)
486486
;; CHECK-NEXT: )
487487
;; CHECK-NEXT: (i32.store
488488
;; CHECK-NEXT: (local.tee $11
@@ -491,10 +491,10 @@
491491
;; CHECK-NEXT: (i32.const 20)
492492
;; CHECK-NEXT: )
493493
;; CHECK-NEXT: )
494-
;; CHECK-NEXT: (local.get $5)
494+
;; CHECK-NEXT: (local.get $4)
495495
;; CHECK-NEXT: )
496496
;; CHECK-NEXT: (i32.store
497-
;; CHECK-NEXT: (local.get $6)
497+
;; CHECK-NEXT: (local.get $5)
498498
;; CHECK-NEXT: (i32.const 80)
499499
;; CHECK-NEXT: )
500500
;; CHECK-NEXT: (i32.store
@@ -505,7 +505,7 @@
505505
;; CHECK-NEXT: )
506506
;; CHECK-NEXT: )
507507
;; CHECK-NEXT: (i32.add
508-
;; CHECK-NEXT: (local.get $5)
508+
;; CHECK-NEXT: (local.get $4)
509509
;; CHECK-NEXT: (i32.const 80)
510510
;; CHECK-NEXT: )
511511
;; CHECK-NEXT: )
@@ -547,7 +547,7 @@
547547
;; CHECK-NEXT: (local.get $10)
548548
;; CHECK-NEXT: )
549549
;; CHECK-NEXT: (i32.store
550-
;; CHECK-NEXT: (local.get $6)
550+
;; CHECK-NEXT: (local.get $5)
551551
;; CHECK-NEXT: (i32.const 0)
552552
;; CHECK-NEXT: )
553553
;; CHECK-NEXT: (i32.store
@@ -586,7 +586,7 @@
586586
;; CHECK-NEXT: (local.get $1)
587587
;; CHECK-NEXT: )
588588
;; CHECK-NEXT: (global.set $STACKTOP
589-
;; CHECK-NEXT: (local.get $4)
589+
;; CHECK-NEXT: (local.get $6)
590590
;; CHECK-NEXT: )
591591
;; CHECK-NEXT: (global.set $STACKTOP
592592
;; CHECK-NEXT: (local.get $8)
@@ -4080,13 +4080,11 @@
40804080
;; CHECK-NEXT: (local $44 i32)
40814081
;; CHECK-NEXT: (local $45 i32)
40824082
;; CHECK-NEXT: (local.set $13
4083-
;; CHECK-NEXT: (local.tee $5
4084-
;; CHECK-NEXT: (global.get $STACKTOP)
4085-
;; CHECK-NEXT: )
4083+
;; CHECK-NEXT: (global.get $STACKTOP)
40864084
;; CHECK-NEXT: )
40874085
;; CHECK-NEXT: (global.set $STACKTOP
40884086
;; CHECK-NEXT: (i32.add
4089-
;; CHECK-NEXT: (local.get $5)
4087+
;; CHECK-NEXT: (global.get $STACKTOP)
40904088
;; CHECK-NEXT: (i32.const 624)
40914089
;; CHECK-NEXT: )
40924090
;; CHECK-NEXT: )
@@ -5785,21 +5783,19 @@
57855783
;; CHECK-NEXT: (i32.const 0)
57865784
;; CHECK-NEXT: )
57875785
;; CHECK-NEXT: (f64.store
5788-
;; CHECK-NEXT: (local.tee $5
5789-
;; CHECK-NEXT: (global.get $tempDoublePtr)
5790-
;; CHECK-NEXT: )
5786+
;; CHECK-NEXT: (global.get $tempDoublePtr)
57915787
;; CHECK-NEXT: (local.get $14)
57925788
;; CHECK-NEXT: )
57935789
;; CHECK-NEXT: (drop
57945790
;; CHECK-NEXT: (i32.load
5795-
;; CHECK-NEXT: (local.get $5)
5791+
;; CHECK-NEXT: (global.get $tempDoublePtr)
57965792
;; CHECK-NEXT: )
57975793
;; CHECK-NEXT: )
57985794
;; CHECK-NEXT: (local.set $30
57995795
;; CHECK-NEXT: (if (result i32)
58005796
;; CHECK-NEXT: (i32.lt_s
58015797
;; CHECK-NEXT: (i32.load offset=4
5802-
;; CHECK-NEXT: (local.get $5)
5798+
;; CHECK-NEXT: (global.get $tempDoublePtr)
58035799
;; CHECK-NEXT: )
58045800
;; CHECK-NEXT: (i32.const 0)
58055801
;; CHECK-NEXT: )
@@ -5844,22 +5840,20 @@
58445840
;; CHECK-NEXT: )
58455841
;; CHECK-NEXT: )
58465842
;; CHECK-NEXT: (f64.store
5847-
;; CHECK-NEXT: (local.tee $5
5848-
;; CHECK-NEXT: (global.get $tempDoublePtr)
5849-
;; CHECK-NEXT: )
5843+
;; CHECK-NEXT: (global.get $tempDoublePtr)
58505844
;; CHECK-NEXT: (local.get $14)
58515845
;; CHECK-NEXT: )
58525846
;; CHECK-NEXT: (drop
58535847
;; CHECK-NEXT: (i32.load
5854-
;; CHECK-NEXT: (local.get $5)
5848+
;; CHECK-NEXT: (global.get $tempDoublePtr)
58555849
;; CHECK-NEXT: )
58565850
;; CHECK-NEXT: )
58575851
;; CHECK-NEXT: (local.set $7
58585852
;; CHECK-NEXT: (if (result i32)
58595853
;; CHECK-NEXT: (i32.lt_u
58605854
;; CHECK-NEXT: (i32.and
58615855
;; CHECK-NEXT: (i32.load offset=4
5862-
;; CHECK-NEXT: (local.get $5)
5856+
;; CHECK-NEXT: (global.get $tempDoublePtr)
58635857
;; CHECK-NEXT: )
58645858
;; CHECK-NEXT: (i32.const 2146435072)
58655859
;; CHECK-NEXT: )

test/lit/passes/local-cse.wast

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -447,20 +447,17 @@
447447
(global $other-glob (mut i32) (i32.const 1))
448448

449449
;; CHECK: (func $global
450-
;; CHECK-NEXT: (local $0 i32)
451450
;; CHECK-NEXT: (drop
452-
;; CHECK-NEXT: (local.tee $0
453-
;; CHECK-NEXT: (global.get $glob)
454-
;; CHECK-NEXT: )
451+
;; CHECK-NEXT: (global.get $glob)
455452
;; CHECK-NEXT: )
456453
;; CHECK-NEXT: (drop
457-
;; CHECK-NEXT: (local.get $0)
454+
;; CHECK-NEXT: (global.get $glob)
458455
;; CHECK-NEXT: )
459456
;; CHECK-NEXT: (global.set $other-glob
460457
;; CHECK-NEXT: (i32.const 100)
461458
;; CHECK-NEXT: )
462459
;; CHECK-NEXT: (drop
463-
;; CHECK-NEXT: (local.get $0)
460+
;; CHECK-NEXT: (global.get $glob)
464461
;; CHECK-NEXT: )
465462
;; CHECK-NEXT: (global.set $glob
466463
;; CHECK-NEXT: (i32.const 200)
@@ -470,7 +467,11 @@
470467
;; CHECK-NEXT: )
471468
;; CHECK-NEXT: )
472469
(func $global
473-
;; We should optimize redundant global.get operations.
470+
;; We should not optimize redundant global.get operations: they are of size
471+
;; 1 (no children), and so we may end up increasing code size here for
472+
;; unclear benefit. The benefit is unclear since VMs already do GVN/CSE
473+
;; themselves, and so we focus on things of size 2 and above, where we
474+
;; definitely reduce code size at least.
474475
(drop (global.get $glob))
475476
(drop (global.get $glob))
476477
;; We can do it past a write to another global

0 commit comments

Comments
 (0)