Skip to content

Commit 65f99b5

Browse files
committed
[AArch64][GlobalISel] Fixup <32b heterogeneous regbanks of G_PHIs just before selection.
Since all types <32b on gpr end up being assigned gpr32 regclasses, we can end up with PHIs here which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't be selecting heterogenous regbanks for operands if possible, but we still need to be able to deal with it here. To fix this, if we have a gpr-bank operand < 32b in size and at least one other operand is on the fpr bank, then we add cross-bank copies to homogenize the operand banks. For simplicity the bank that we choose to settle on is whatever bank the def operand has. For example: %endbb: %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 => %bb2: ... %in2_copy:gpr(s16) = COPY %in2:fpr(s16) ... %endbb: %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 Differential Revision: https://reviews.llvm.org/D75086
1 parent 16cabf2 commit 65f99b5

File tree

2 files changed

+204
-0
lines changed

2 files changed

+204
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ class AArch64InstructionSelector : public InstructionSelector {
6464
ProduceNonFlagSettingCondBr =
6565
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
6666
MFReturnAddr = Register();
67+
68+
processPHIs(MF);
6769
}
6870

6971
private:
@@ -78,6 +80,9 @@ class AArch64InstructionSelector : public InstructionSelector {
7880
// An early selection function that runs before the selectImpl() call.
7981
bool earlySelect(MachineInstr &I) const;
8082

83+
// Do some preprocessing of G_PHIs before we begin selection.
84+
void processPHIs(MachineFunction &MF);
85+
8186
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
8287

8388
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
@@ -5327,6 +5332,95 @@ bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
53275332
}
53285333
}
53295334

5335+
5336+
// Perform fixups on the given PHI instruction's operands to force them all
5337+
// to be the same as the destination regbank.
5338+
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
5339+
const AArch64RegisterBankInfo &RBI) {
5340+
assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
5341+
Register DstReg = MI.getOperand(0).getReg();
5342+
const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
5343+
assert(DstRB && "Expected PHI dst to have regbank assigned");
5344+
MachineIRBuilder MIB(MI);
5345+
5346+
// Go through each operand and ensure it has the same regbank.
5347+
for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5348+
MachineOperand &MO = MI.getOperand(OpIdx);
5349+
if (!MO.isReg())
5350+
continue;
5351+
Register OpReg = MO.getReg();
5352+
const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
5353+
if (RB != DstRB) {
5354+
// Insert a cross-bank copy.
5355+
auto *OpDef = MRI.getVRegDef(OpReg);
5356+
const LLT &Ty = MRI.getType(OpReg);
5357+
MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
5358+
auto Copy = MIB.buildCopy(Ty, OpReg);
5359+
MRI.setRegBank(Copy.getReg(0), *DstRB);
5360+
MO.setReg(Copy.getReg(0));
5361+
}
5362+
}
5363+
}
5364+
5365+
void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
5366+
// We're looking for PHIs, build a list so we don't invalidate iterators.
5367+
MachineRegisterInfo &MRI = MF.getRegInfo();
5368+
SmallVector<MachineInstr *, 32> Phis;
5369+
for (auto &BB : MF) {
5370+
for (auto &MI : BB) {
5371+
if (MI.getOpcode() == TargetOpcode::G_PHI)
5372+
Phis.emplace_back(&MI);
5373+
}
5374+
}
5375+
5376+
for (auto *MI : Phis) {
5377+
// We need to do some work here if the operand types are < 16 bit and they
5378+
// are split across fpr/gpr banks. Since all types <32b on gpr
5379+
// end up being assigned gpr32 regclasses, we can end up with PHIs here
5380+
// which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
5381+
// be selecting heterogenous regbanks for operands if possible, but we
5382+
// still need to be able to deal with it here.
5383+
//
5384+
// To fix this, if we have a gpr-bank operand < 32b in size and at least
5385+
// one other operand is on the fpr bank, then we add cross-bank copies
5386+
// to homogenize the operand banks. For simplicity the bank that we choose
5387+
// to settle on is whatever bank the def operand has. For example:
5388+
//
5389+
// %endbb:
5390+
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
5391+
// =>
5392+
// %bb2:
5393+
// ...
5394+
// %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
5395+
// ...
5396+
// %endbb:
5397+
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
5398+
bool HasGPROp = false, HasFPROp = false;
5399+
for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
5400+
const auto &MO = MI->getOperand(OpIdx);
5401+
if (!MO.isReg())
5402+
continue;
5403+
const LLT &Ty = MRI.getType(MO.getReg());
5404+
if (!Ty.isValid() || !Ty.isScalar())
5405+
break;
5406+
if (Ty.getSizeInBits() >= 32)
5407+
break;
5408+
const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
5409+
// If for some reason we don't have a regbank yet. Don't try anything.
5410+
if (!RB)
5411+
break;
5412+
5413+
if (RB->getID() == AArch64::GPRRegBankID)
5414+
HasGPROp = true;
5415+
else
5416+
HasFPROp = true;
5417+
}
5418+
// We have heterogenous regbanks, need to fixup.
5419+
if (HasGPROp && HasFPROp)
5420+
fixupPHIOpBanks(*MI, MRI, RBI);
5421+
}
5422+
}
5423+
53305424
namespace llvm {
53315425
InstructionSelector *
53325426
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -verify-machineinstrs -mtriple aarch64--- -run-pass=instruction-select -global-isel %s -o - | FileCheck %s
3+
---
4+
name: test_loop_phi_fpr_to_gpr
5+
alignment: 4
6+
legalized: true
7+
regBankSelected: true
8+
selected: false
9+
failedISel: false
10+
tracksRegLiveness: true
11+
liveins: []
12+
machineFunctionInfo: {}
13+
body: |
14+
; CHECK-LABEL: name: test_loop_phi_fpr_to_gpr
15+
; CHECK: bb.0:
16+
; CHECK: successors: %bb.1(0x80000000)
17+
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
18+
; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
19+
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
20+
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
21+
; CHECK: bb.1:
22+
; CHECK: successors: %bb.2(0x80000000)
23+
; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
24+
; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
25+
; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
26+
; CHECK: bb.2:
27+
; CHECK: successors: %bb.2(0x80000000)
28+
; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[CSELWr]], %bb.1, %8, %bb.2
29+
; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
30+
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[FCVTHSr]], %subreg.hsub
31+
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
32+
; CHECK: STRHHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
33+
; CHECK: B %bb.2
34+
bb.0:
35+
successors: %bb.1(0x80000000)
36+
37+
%0:gpr(s1) = G_IMPLICIT_DEF
38+
%4:gpr(p0) = G_IMPLICIT_DEF
39+
%8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000
40+
41+
bb.1:
42+
successors: %bb.2(0x80000000)
43+
44+
%6:gpr(s32) = G_IMPLICIT_DEF
45+
%7:gpr(s32) = G_SELECT %0(s1), %6, %6
46+
%1:gpr(s16) = G_TRUNC %7(s32)
47+
48+
bb.2:
49+
successors: %bb.2(0x80000000)
50+
51+
%3:gpr(s16) = G_PHI %1(s16), %bb.1, %5(s16), %bb.2
52+
%5:fpr(s16) = G_FPTRUNC %8(s32)
53+
G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
54+
G_BR %bb.2
55+
56+
...
57+
---
58+
name: test_loop_phi_gpr_to_fpr
59+
alignment: 4
60+
legalized: true
61+
regBankSelected: true
62+
selected: false
63+
failedISel: false
64+
tracksRegLiveness: true
65+
liveins: []
66+
machineFunctionInfo: {}
67+
body: |
68+
; CHECK-LABEL: name: test_loop_phi_gpr_to_fpr
69+
; CHECK: bb.0:
70+
; CHECK: successors: %bb.1(0x80000000)
71+
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
72+
; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
73+
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
74+
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
75+
; CHECK: bb.1:
76+
; CHECK: successors: %bb.2(0x80000000)
77+
; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
78+
; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
79+
; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
80+
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[CSELWr]]
81+
; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
82+
; CHECK: bb.2:
83+
; CHECK: successors: %bb.2(0x80000000)
84+
; CHECK: [[PHI:%[0-9]+]]:fpr16 = PHI %7, %bb.2, [[COPY2]], %bb.1
85+
; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
86+
; CHECK: STRHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
87+
; CHECK: B %bb.2
88+
bb.0:
89+
successors: %bb.1(0x80000000)
90+
91+
%0:gpr(s1) = G_IMPLICIT_DEF
92+
%4:gpr(p0) = G_IMPLICIT_DEF
93+
%8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000
94+
95+
bb.1:
96+
successors: %bb.2(0x80000000)
97+
98+
%6:gpr(s32) = G_IMPLICIT_DEF
99+
%7:gpr(s32) = G_SELECT %0(s1), %6, %6
100+
%1:gpr(s16) = G_TRUNC %7(s32)
101+
102+
bb.2:
103+
successors: %bb.2(0x80000000)
104+
105+
%3:fpr(s16) = G_PHI %5(s16), %bb.2, %1(s16), %bb.1
106+
%5:fpr(s16) = G_FPTRUNC %8(s32)
107+
G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
108+
G_BR %bb.2
109+
110+
...

0 commit comments

Comments
 (0)