Skip to content

Commit 1e353f0

Browse files
committed
[SimplifyCFG] Start redesigning FoldTwoEntryPHINode().
The current `FoldTwoEntryPHINode()` is not quite designed correctly. It starts from the merge point, and then tries to detect the 'divergence' point. Because of that, it is limited to the simple two-predecessor case, where the PHI completely goes away. but that is rather pessimistic, and it doesn't make much sense from the costmodel side of things. For example if there is some other unrelated predecessor of the merge point, we could split the merge point so that the then/else blocks first branch to an empty block and then to the merge point, and then we'd be able to speculate the then/else code. But if we'd instead simply start at the divergence point, and look for the merge point, then we'll just natively support this case. There's also the fact that `SpeculativelyExecuteBB()` already does just that, but only if there is a single block to speculate, and with a much more restrictive cost model. But that also means we have code duplication. Now, sadly, while this is as much NFCI as possible, there is just no way to cleanly migrate to the proper implementation. The results *are* going to be different somewhat because of various phase ordering effects and SimplifyCFG block iteration strategy.
1 parent 73cb542 commit 1e353f0

15 files changed

+383
-287
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 80 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2960,6 +2960,81 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
29602960
return true;
29612961
}
29622962

2963+
static bool SpeculativelyExecuteThenElseCode(BranchInst *BI,
2964+
const TargetTransformInfo &TTI,
2965+
DomTreeUpdater *DTU,
2966+
const DataLayout &DL) {
2967+
assert(BI->isConditional() && !isa<ConstantInt>(BI->getCondition()) &&
2968+
BI->getSuccessor(0) != BI->getSuccessor(1) &&
2969+
"Only for truly conditional branches.");
2970+
BasicBlock *BB = BI->getParent();
2971+
2972+
// Which ones of our successors end up with an unconditional branch?
2973+
SmallVector<BasicBlock *, 2> UncondSuccessors;
2974+
SmallVector<BasicBlock *, 2> OtherSuccessors;
2975+
for (BasicBlock *Succ : successors(BI)) {
2976+
auto *SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
2977+
if (SuccBI && SuccBI->isUnconditional())
2978+
UncondSuccessors.emplace_back(Succ);
2979+
else
2980+
OtherSuccessors.emplace_back(Succ);
2981+
}
2982+
assert(UncondSuccessors.size() + OtherSuccessors.size() == 2 &&
2983+
"Can not have more than two successors!");
2984+
2985+
// If none do, then we can't do anything.
2986+
if (UncondSuccessors.empty())
2987+
return false;
2988+
2989+
// We want to hoist code from the unconditional block[s] and eliminate them,
2990+
// but if they have their address taken, then we essentially can't do this.
2991+
for (BasicBlock *UncondSucc : UncondSuccessors)
2992+
if (UncondSucc->hasAddressTaken())
2993+
return false;
2994+
2995+
// All unconditional successors must have a single (and the same) predecessor.
2996+
// FIXME: lift this restriction.
2997+
for (BasicBlock *UncondSucc : UncondSuccessors)
2998+
if (!UncondSucc->getSinglePredecessor())
2999+
return false;
3000+
3001+
// Now, what is the merge point?
3002+
BasicBlock *MergeBB = nullptr;
3003+
// If there was only a single unconditional successor,
3004+
// then the other successor *must* be the merge point.
3005+
if (UncondSuccessors.size() == 1)
3006+
MergeBB = OtherSuccessors.front();
3007+
3008+
// All unconditional successors must have the same successor themselves.
3009+
for (BasicBlock *UncondSucc : UncondSuccessors) {
3010+
auto *SuccBI = cast<BranchInst>(UncondSucc->getTerminator());
3011+
assert(SuccBI->isUnconditional() && "Should be an unconditional branch.");
3012+
BasicBlock *SuccOfSucc = SuccBI->getSuccessor(0);
3013+
if (!MergeBB) // First unconditional successor, record it's successor.
3014+
MergeBB = SuccOfSucc;
3015+
else if (SuccOfSucc != MergeBB) // Do all succs have the same successor?
3016+
return false;
3017+
}
3018+
3019+
assert(MergeBB && "Should have found the merge point.");
3020+
assert(all_of(UncondSuccessors,
3021+
[MergeBB](BasicBlock *UncondSucc) {
3022+
return is_contained(predecessors(MergeBB), UncondSucc);
3023+
}) &&
3024+
"All unconditional successors must be predecessors of merge block.");
3025+
assert((UncondSuccessors.size() != 1 ||
3026+
is_contained(predecessors(MergeBB), BB)) &&
3027+
"If there is only a single unconditional successor, then the dispatch "
3028+
"block must also be merge block's predecessor.");
3029+
3030+
if (auto *PN = dyn_cast<PHINode>(MergeBB->begin()))
3031+
// FIXME: lift this restriction.
3032+
if (PN->getNumIncomingValues() == 2)
3033+
return FoldTwoEntryPHINode(PN, TTI, DTU, DL);
3034+
3035+
return false;
3036+
}
3037+
29633038
static Value *createLogicalOp(IRBuilderBase &Builder,
29643039
Instruction::BinaryOps Opc, Value *LHS,
29653040
Value *RHS, const Twine &Name = "") {
@@ -6521,6 +6596,11 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
65216596
return requestResimplify();
65226597
}
65236598

6599+
if (Options.FoldTwoEntryPHINode) {
6600+
if (SpeculativelyExecuteThenElseCode(BI, TTI, DTU, DL))
6601+
return true;
6602+
}
6603+
65246604
// If this is a branch on a phi node in the current block, thread control
65256605
// through this block if any PHI node entries are constants.
65266606
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
@@ -6736,15 +6816,6 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
67366816

67376817
IRBuilder<> Builder(BB);
67386818

6739-
if (Options.FoldTwoEntryPHINode) {
6740-
// If there is a trivial two-entry PHI node in this basic block, and we can
6741-
// eliminate it, do so now.
6742-
if (auto *PN = dyn_cast<PHINode>(BB->begin()))
6743-
if (PN->getNumIncomingValues() == 2)
6744-
if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
6745-
return true;
6746-
}
6747-
67486819
Instruction *Terminator = BB->getTerminator();
67496820
Builder.SetInsertPoint(Terminator);
67506821
switch (Terminator->getOpcode()) {

llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll

Lines changed: 44 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
define i32 @f_i8_sign_extend_inreg(i8 %in, i32 %a, i32 %b) nounwind {
1414
; CHECK-LABEL: f_i8_sign_extend_inreg:
1515
; CHECK: // %bb.0: // %entry
16-
; CHECK-NEXT: sxtb w8, w0
17-
; CHECK-NEXT: cmp w8, #0
18-
; CHECK-NEXT: csel w8, w1, w2, ge
19-
; CHECK-NEXT: add w0, w8, w0, uxtb
16+
; CHECK-NEXT: and w8, w0, #0xff
17+
; CHECK-NEXT: sxtb w9, w0
18+
; CHECK-NEXT: add w10, w8, w1
19+
; CHECK-NEXT: add w8, w8, w2
20+
; CHECK-NEXT: cmp w9, #0
21+
; CHECK-NEXT: csel w0, w10, w8, ge
2022
; CHECK-NEXT: ret
2123
entry:
2224
%cmp = icmp sgt i8 %in, -1
@@ -35,10 +37,12 @@ B:
3537
define i32 @f_i16_sign_extend_inreg(i16 %in, i32 %a, i32 %b) nounwind {
3638
; CHECK-LABEL: f_i16_sign_extend_inreg:
3739
; CHECK: // %bb.0: // %entry
38-
; CHECK-NEXT: sxth w8, w0
39-
; CHECK-NEXT: cmp w8, #0
40-
; CHECK-NEXT: csel w8, w1, w2, ge
41-
; CHECK-NEXT: add w0, w8, w0, uxth
40+
; CHECK-NEXT: and w8, w0, #0xffff
41+
; CHECK-NEXT: sxth w9, w0
42+
; CHECK-NEXT: add w10, w8, w1
43+
; CHECK-NEXT: add w8, w8, w2
44+
; CHECK-NEXT: cmp w9, #0
45+
; CHECK-NEXT: csel w0, w10, w8, ge
4246
; CHECK-NEXT: ret
4347
entry:
4448
%cmp = icmp sgt i16 %in, -1
@@ -57,9 +61,11 @@ B:
5761
define i64 @f_i32_sign_extend_inreg(i32 %in, i64 %a, i64 %b) nounwind {
5862
; CHECK-LABEL: f_i32_sign_extend_inreg:
5963
; CHECK: // %bb.0: // %entry
64+
; CHECK-NEXT: mov w8, w0
6065
; CHECK-NEXT: cmp w0, #0
61-
; CHECK-NEXT: csel x8, x1, x2, ge
62-
; CHECK-NEXT: add x0, x8, w0, uxtw
66+
; CHECK-NEXT: add x9, x8, x1
67+
; CHECK-NEXT: add x8, x8, x2
68+
; CHECK-NEXT: csel x0, x9, x8, ge
6369
; CHECK-NEXT: ret
6470
entry:
6571
%cmp = icmp sgt i32 %in, -1
@@ -78,10 +84,12 @@ B:
7884
define i32 @g_i8_sign_extend_inreg(i8 %in, i32 %a, i32 %b) nounwind {
7985
; CHECK-LABEL: g_i8_sign_extend_inreg:
8086
; CHECK: // %bb.0: // %entry
81-
; CHECK-NEXT: sxtb w8, w0
82-
; CHECK-NEXT: cmp w8, #0
83-
; CHECK-NEXT: csel w8, w1, w2, lt
84-
; CHECK-NEXT: add w0, w8, w0, uxtb
87+
; CHECK-NEXT: and w8, w0, #0xff
88+
; CHECK-NEXT: sxtb w9, w0
89+
; CHECK-NEXT: add w10, w8, w1
90+
; CHECK-NEXT: add w8, w8, w2
91+
; CHECK-NEXT: cmp w9, #0
92+
; CHECK-NEXT: csel w0, w10, w8, lt
8593
; CHECK-NEXT: ret
8694
entry:
8795
%cmp = icmp slt i8 %in, 0
@@ -100,10 +108,12 @@ B:
100108
define i32 @g_i16_sign_extend_inreg(i16 %in, i32 %a, i32 %b) nounwind {
101109
; CHECK-LABEL: g_i16_sign_extend_inreg:
102110
; CHECK: // %bb.0: // %entry
103-
; CHECK-NEXT: sxth w8, w0
104-
; CHECK-NEXT: cmp w8, #0
105-
; CHECK-NEXT: csel w8, w1, w2, lt
106-
; CHECK-NEXT: add w0, w8, w0, uxth
111+
; CHECK-NEXT: and w8, w0, #0xffff
112+
; CHECK-NEXT: sxth w9, w0
113+
; CHECK-NEXT: add w10, w8, w1
114+
; CHECK-NEXT: add w8, w8, w2
115+
; CHECK-NEXT: cmp w9, #0
116+
; CHECK-NEXT: csel w0, w10, w8, lt
107117
; CHECK-NEXT: ret
108118
entry:
109119
%cmp = icmp slt i16 %in, 0
@@ -122,9 +132,11 @@ B:
122132
define i64 @g_i32_sign_extend_inreg(i32 %in, i64 %a, i64 %b) nounwind {
123133
; CHECK-LABEL: g_i32_sign_extend_inreg:
124134
; CHECK: // %bb.0: // %entry
135+
; CHECK-NEXT: mov w8, w0
125136
; CHECK-NEXT: cmp w0, #0
126-
; CHECK-NEXT: csel x8, x1, x2, lt
127-
; CHECK-NEXT: add x0, x8, w0, uxtw
137+
; CHECK-NEXT: add x9, x8, x1
138+
; CHECK-NEXT: add x8, x8, x2
139+
; CHECK-NEXT: csel x0, x9, x8, lt
128140
; CHECK-NEXT: ret
129141
entry:
130142
%cmp = icmp slt i32 %in, 0
@@ -144,10 +156,12 @@ define i64 @f_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
144156
; CHECK-LABEL: f_i32_sign_extend_i64:
145157
; CHECK: // %bb.0: // %entry
146158
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
147-
; CHECK-NEXT: sxtw x8, w0
148-
; CHECK-NEXT: cmp x8, #0
149-
; CHECK-NEXT: csel x8, x1, x2, ge
150-
; CHECK-NEXT: add x0, x8, w0, uxtw
159+
; CHECK-NEXT: mov w8, w0
160+
; CHECK-NEXT: sxtw x9, w0
161+
; CHECK-NEXT: add x10, x8, x1
162+
; CHECK-NEXT: add x8, x8, x2
163+
; CHECK-NEXT: cmp x9, #0
164+
; CHECK-NEXT: csel x0, x10, x8, ge
151165
; CHECK-NEXT: ret
152166
entry:
153167
%inext = sext i32 %in to i64
@@ -168,10 +182,12 @@ define i64 @g_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
168182
; CHECK-LABEL: g_i32_sign_extend_i64:
169183
; CHECK: // %bb.0: // %entry
170184
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
171-
; CHECK-NEXT: sxtw x8, w0
172-
; CHECK-NEXT: cmp x8, #0
173-
; CHECK-NEXT: csel x8, x1, x2, lt
174-
; CHECK-NEXT: add x0, x8, w0, uxtw
185+
; CHECK-NEXT: mov w8, w0
186+
; CHECK-NEXT: sxtw x9, w0
187+
; CHECK-NEXT: add x10, x8, x1
188+
; CHECK-NEXT: add x8, x8, x2
189+
; CHECK-NEXT: cmp x9, #0
190+
; CHECK-NEXT: csel x0, x10, x8, lt
175191
; CHECK-NEXT: ret
176192
entry:
177193
%inext = sext i32 %in to i64

llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,7 @@ define i32 @fcmpri(i32 %argc, i8** nocapture readonly %argv) {
654654
; CHECK-NEXT: cbz x8, .LBB9_3
655655
; CHECK-NEXT: // %bb.2:
656656
; CHECK-NEXT: mov w0, #3
657-
; CHECK-NEXT: b .LBB9_4
657+
; CHECK-NEXT: b .LBB9_6
658658
; CHECK-NEXT: .LBB9_3: // %if.end
659659
; CHECK-NEXT: mov w0, #1
660660
; CHECK-NEXT: bl zoo
@@ -666,14 +666,17 @@ define i32 @fcmpri(i32 %argc, i8** nocapture readonly %argv) {
666666
; CHECK-NEXT: cinc w0, w19, gt
667667
; CHECK-NEXT: fmov d8, d0
668668
; CHECK-NEXT: bl xoo
669-
; CHECK-NEXT: fmov d0, #-1.00000000
670669
; CHECK-NEXT: fcmp d8, #0.0
670+
; CHECK-NEXT: b.gt .LBB9_5
671+
; CHECK-NEXT: // %bb.4: // %cond.false12
672+
; CHECK-NEXT: fmov d0, #-1.00000000
673+
; CHECK-NEXT: fadd d8, d8, d0
674+
; CHECK-NEXT: .LBB9_5: // %cond.end14
675+
; CHECK-NEXT: fmov d0, d8
671676
; CHECK-NEXT: fmov d1, #-2.00000000
672-
; CHECK-NEXT: fadd d0, d8, d0
673-
; CHECK-NEXT: fcsel d0, d8, d0, gt
674677
; CHECK-NEXT: bl woo
675678
; CHECK-NEXT: mov w0, #4
676-
; CHECK-NEXT: .LBB9_4: // %return
679+
; CHECK-NEXT: .LBB9_6: // %return
677680
; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
678681
; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload
679682
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/typepromotion-phisret.ll

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,11 @@ define void @phi_i16() {
103103
; CHECK-LABEL: phi_i16:
104104
; CHECK: // %bb.0: // %entry
105105
; CHECK-NEXT: mov w8, wzr
106-
; CHECK-NEXT: mov w9, #1
107106
; CHECK-NEXT: .LBB2_1: // %loop
108107
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
108+
; CHECK-NEXT: add w9, w8, #2
109109
; CHECK-NEXT: cmp w8, #128
110-
; CHECK-NEXT: cinc w10, w9, lo
111-
; CHECK-NEXT: add w8, w8, w10
110+
; CHECK-NEXT: csinc w8, w9, w8, lo
112111
; CHECK-NEXT: cmp w8, #253
113112
; CHECK-NEXT: b.lo .LBB2_1
114113
; CHECK-NEXT: // %bb.2: // %exit
@@ -142,12 +141,11 @@ define i8 @ret_i8() {
142141
; CHECK-LABEL: ret_i8:
143142
; CHECK: // %bb.0: // %entry
144143
; CHECK-NEXT: mov w0, wzr
145-
; CHECK-NEXT: mov w8, #1
146144
; CHECK-NEXT: .LBB3_1: // %loop
147145
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
146+
; CHECK-NEXT: add w8, w0, #2
148147
; CHECK-NEXT: cmp w0, #128
149-
; CHECK-NEXT: cinc w9, w8, lo
150-
; CHECK-NEXT: add w0, w0, w9
148+
; CHECK-NEXT: csinc w0, w8, w0, lo
151149
; CHECK-NEXT: cmp w0, #252
152150
; CHECK-NEXT: b.hi .LBB3_1
153151
; CHECK-NEXT: // %bb.2: // %exit
@@ -180,14 +178,13 @@ exit: ; preds = %if.end
180178
define i16 @phi_multiple_undefs(i16 zeroext %arg) {
181179
; CHECK-LABEL: phi_multiple_undefs:
182180
; CHECK: // %bb.0: // %entry
183-
; CHECK-NEXT: mov w8, #1
184-
; CHECK-NEXT: // implicit-def: $w9
181+
; CHECK-NEXT: // implicit-def: $w8
185182
; CHECK-NEXT: .LBB4_1: // %loop
186183
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
187-
; CHECK-NEXT: cmp w9, #128
188-
; CHECK-NEXT: cinc w10, w8, lo
189-
; CHECK-NEXT: add w9, w9, w10
190-
; CHECK-NEXT: cmp w9, #253
184+
; CHECK-NEXT: add w9, w8, #2
185+
; CHECK-NEXT: cmp w8, #128
186+
; CHECK-NEXT: csinc w8, w9, w8, lo
187+
; CHECK-NEXT: cmp w8, #253
191188
; CHECK-NEXT: b.lo .LBB4_1
192189
; CHECK-NEXT: // %bb.2: // %exit
193190
; CHECK-NEXT: ret

llvm/test/CodeGen/ARM/ifcvt-callback.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
define i32 @test_ifcvt(i32 %a, i32 %b) #0 {
99
; CHECK-LABEL: test_ifcvt:
1010
; CHECK: @ %bb.0: @ %common.ret
11-
; CHECK-NEXT: movs r2, #1
11+
; CHECK-NEXT: adds r2, r1, #1
1212
; CHECK-NEXT: cmp r0, #0
1313
; CHECK-NEXT: it eq
14-
; CHECK-NEXT: moveq.w r2, #-1
15-
; CHECK-NEXT: adds r0, r1, r2
14+
; CHECK-NEXT: subeq r2, r1, #1
15+
; CHECK-NEXT: mov r0, r2
1616
; CHECK-NEXT: bx lr
1717
%tmp2 = icmp eq i32 %a, 0
1818
br i1 %tmp2, label %cond_false, label %cond_true

llvm/test/CodeGen/ARM/ifcvt1.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,18 @@
55
define i32 @t1(i32 %a, i32 %b) {
66
; A8-LABEL: t1:
77
; A8: @ %bb.0: @ %common.ret
8-
; A8-NEXT: mov r2, #1
8+
; A8-NEXT: add r2, r1, #1
99
; A8-NEXT: cmp r0, #0
10-
; A8-NEXT: mvneq r2, #0
11-
; A8-NEXT: add r0, r1, r2
10+
; A8-NEXT: subeq r2, r1, #1
11+
; A8-NEXT: mov r0, r2
1212
; A8-NEXT: bx lr
1313
;
1414
; SWIFT-LABEL: t1:
1515
; SWIFT: @ %bb.0: @ %common.ret
16-
; SWIFT-NEXT: mov r2, #1
16+
; SWIFT-NEXT: add r2, r1, #1
1717
; SWIFT-NEXT: cmp r0, #0
18-
; SWIFT-NEXT: mvneq r2, #0
19-
; SWIFT-NEXT: add r0, r1, r2
18+
; SWIFT-NEXT: subeq r2, r1, #1
19+
; SWIFT-NEXT: mov r0, r2
2020
; SWIFT-NEXT: bx lr
2121
%tmp2 = icmp eq i32 %a, 0
2222
br i1 %tmp2, label %cond_false, label %cond_true

0 commit comments

Comments
 (0)