22
22
#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
23
23
#include " SIMachineFunctionInfo.h"
24
24
#include " llvm/CodeGen/LiveIntervals.h"
25
+ #include " llvm/CodeGen/MachineDominators.h"
25
26
#include " llvm/CodeGen/MachineFrameInfo.h"
26
27
#include " llvm/CodeGen/RegisterScavenging.h"
27
28
#include " llvm/InitializePasses.h"
@@ -40,6 +41,7 @@ class SILowerSGPRSpills : public MachineFunctionPass {
40
41
const SIInstrInfo *TII = nullptr ;
41
42
LiveIntervals *LIS = nullptr ;
42
43
SlotIndexes *Indexes = nullptr ;
44
+ MachineDominatorTree *MDT = nullptr ;
43
45
44
46
// Save and Restore blocks of the current function. Typically there is a
45
47
// single save block, unless Windows EH funclets are involved.
@@ -52,14 +54,25 @@ class SILowerSGPRSpills : public MachineFunctionPass {
52
54
SILowerSGPRSpills () : MachineFunctionPass(ID) {}
53
55
54
56
void calculateSaveRestoreBlocks (MachineFunction &MF);
55
- bool spillCalleeSavedRegs (MachineFunction &MF);
57
+ bool spillCalleeSavedRegs (MachineFunction &MF,
58
+ SmallVectorImpl<int > &CalleeSavedFIs);
59
+ void updateLaneVGPRDomInstr (
60
+ int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
61
+ DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr);
56
62
57
63
bool runOnMachineFunction (MachineFunction &MF) override ;
58
64
59
65
void getAnalysisUsage (AnalysisUsage &AU) const override {
66
+ AU.addRequired <MachineDominatorTree>();
60
67
AU.setPreservesAll ();
61
68
MachineFunctionPass::getAnalysisUsage (AU);
62
69
}
70
+
71
+ MachineFunctionProperties getClearedProperties () const override {
72
+ return MachineFunctionProperties ()
73
+ .set (MachineFunctionProperties::Property::IsSSA)
74
+ .set (MachineFunctionProperties::Property::NoVRegs);
75
+ }
63
76
};
64
77
65
78
} // end anonymous namespace
@@ -70,6 +83,7 @@ INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
70
83
" SI lower SGPR spill instructions" , false , false )
71
84
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
72
85
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
86
+ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
73
87
INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
74
88
" SI lower SGPR spill instructions" , false , false )
75
89
@@ -175,7 +189,8 @@ static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
175
189
EntryBB.sortUniqueLiveIns ();
176
190
}
177
191
178
- bool SILowerSGPRSpills::spillCalleeSavedRegs (MachineFunction &MF) {
192
+ bool SILowerSGPRSpills::spillCalleeSavedRegs (
193
+ MachineFunction &MF, SmallVectorImpl<int > &CalleeSavedFIs) {
179
194
MachineRegisterInfo &MRI = MF.getRegInfo ();
180
195
const Function &F = MF.getFunction ();
181
196
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
@@ -214,6 +229,7 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
214
229
TRI->getSpillAlign (*RC), true );
215
230
216
231
CSI.push_back (CalleeSavedInfo (Reg, JunkFI));
232
+ CalleeSavedFIs.push_back (JunkFI);
217
233
}
218
234
}
219
235
@@ -226,6 +242,7 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
226
242
int JunkFI = MFI.CreateStackObject (TRI->getSpillSize (*RC),
227
243
TRI->getSpillAlign (*RC), true );
228
244
CSI.push_back (CalleeSavedInfo (RetAddrReg, JunkFI));
245
+ CalleeSavedFIs.push_back (JunkFI);
229
246
}
230
247
231
248
if (!CSI.empty ()) {
@@ -245,20 +262,71 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
245
262
return false ;
246
263
}
247
264
265
+ void SILowerSGPRSpills::updateLaneVGPRDomInstr (
266
+ int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
267
+ DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) {
268
+ // For the Def of a virtual LaneVPGR to dominate all its uses, we should
269
+ // insert an IMPLICIT_DEF before the dominating spill. Switching to a
270
+ // depth first order doesn't really help since the machine function can be in
271
+ // the unstructured control flow post-SSA. For each virtual register, hence
272
+ // finding the common dominator to get either the dominating spill or a block
273
+ // dominating all spills. Is there a better way to handle it?
274
+ SIMachineFunctionInfo *FuncInfo =
275
+ MBB->getParent ()->getInfo <SIMachineFunctionInfo>();
276
+ ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills =
277
+ FuncInfo->getSGPRSpillToVirtualVGPRLanes (FI);
278
+ Register PrevLaneVGPR;
279
+ for (auto &Spill : VGPRSpills) {
280
+ if (PrevLaneVGPR == Spill.VGPR )
281
+ continue ;
282
+
283
+ PrevLaneVGPR = Spill.VGPR ;
284
+ auto I = LaneVGPRDomInstr.find (Spill.VGPR );
285
+ if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end ()) {
286
+ // Initially add the spill instruction itself for Insertion point.
287
+ LaneVGPRDomInstr[Spill.VGPR ] = InsertPt;
288
+ } else {
289
+ assert (I != LaneVGPRDomInstr.end ());
290
+ auto PrevInsertPt = I->second ;
291
+ MachineBasicBlock *DomMBB = PrevInsertPt->getParent ();
292
+ if (DomMBB == MBB) {
293
+ // The insertion point earlier selected in a predecessor block whose
294
+ // spills are currently being lowered. The earlier InsertPt would be
295
+ // the one just before the block terminator and it should be changed
296
+ // if we insert any new spill in it.
297
+ if (MDT->dominates (&*InsertPt, &*PrevInsertPt))
298
+ I->second = InsertPt;
299
+
300
+ continue ;
301
+ }
302
+
303
+ // Find the common dominator block between PrevInsertPt and the
304
+ // current spill.
305
+ DomMBB = MDT->findNearestCommonDominator (DomMBB, MBB);
306
+ if (DomMBB == MBB)
307
+ I->second = InsertPt;
308
+ else if (DomMBB != PrevInsertPt->getParent ())
309
+ I->second = &(*DomMBB->getFirstTerminator ());
310
+ }
311
+ }
312
+ }
313
+
248
314
bool SILowerSGPRSpills::runOnMachineFunction (MachineFunction &MF) {
249
315
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
250
316
TII = ST.getInstrInfo ();
251
317
TRI = &TII->getRegisterInfo ();
252
318
253
319
LIS = getAnalysisIfAvailable<LiveIntervals>();
254
320
Indexes = getAnalysisIfAvailable<SlotIndexes>();
321
+ MDT = &getAnalysis<MachineDominatorTree>();
255
322
256
323
assert (SaveBlocks.empty () && RestoreBlocks.empty ());
257
324
258
325
// First, expose any CSR SGPR spills. This is mostly the same as what PEI
259
326
// does, but somewhat simpler.
260
327
calculateSaveRestoreBlocks (MF);
261
- bool HasCSRs = spillCalleeSavedRegs (MF);
328
+ SmallVector<int > CalleeSavedFIs;
329
+ bool HasCSRs = spillCalleeSavedRegs (MF, CalleeSavedFIs);
262
330
263
331
MachineFrameInfo &MFI = MF.getFrameInfo ();
264
332
MachineRegisterInfo &MRI = MF.getRegInfo ();
@@ -272,6 +340,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
272
340
273
341
bool MadeChange = false ;
274
342
bool NewReservedRegs = false ;
343
+ bool SpilledToVirtVGPRLanes = false ;
275
344
276
345
// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
277
346
// handled as SpilledToReg in regular PrologEpilogInserter.
@@ -287,30 +356,69 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
287
356
// To track the spill frame indices handled in this pass.
288
357
BitVector SpillFIs (MFI.getObjectIndexEnd (), false );
289
358
359
+ // To track the IMPLICIT_DEF insertion point for the lane vgprs.
360
+ DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr;
361
+
290
362
for (MachineBasicBlock &MBB : MF) {
291
363
for (MachineInstr &MI : llvm::make_early_inc_range (MBB)) {
292
364
if (!TII->isSGPRSpill (MI))
293
365
continue ;
294
366
295
367
int FI = TII->getNamedOperand (MI, AMDGPU::OpName::addr)->getIndex ();
296
368
assert (MFI.getStackID (FI) == TargetStackID::SGPRSpill);
297
- if (FuncInfo->allocateSGPRSpillToVGPRLane (MF, FI)) {
298
- NewReservedRegs = true ;
299
- bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex (
300
- MI, FI, nullptr , Indexes, LIS);
301
- (void )Spilled;
302
- assert (Spilled && " failed to spill SGPR to VGPR when allocated" );
303
- SpillFIs.set (FI);
369
+
370
+ bool IsCalleeSaveSGPRSpill =
371
+ std::find (CalleeSavedFIs.begin (), CalleeSavedFIs.end (), FI) !=
372
+ CalleeSavedFIs.end ();
373
+ if (IsCalleeSaveSGPRSpill) {
374
+ // Spill callee-saved SGPRs into physical VGPR lanes.
375
+
376
+ // TODO: This is to ensure the CFIs are static for efficient frame
377
+ // unwinding in the debugger. Spilling them into virtual VGPR lanes
378
+ // involve regalloc to allocate the physical VGPRs and that might
379
+ // cause intermediate spill/split of such liveranges for successful
380
+ // allocation. This would result in broken CFI encoding unless the
381
+ // regalloc aware CFI generation to insert new CFIs along with the
382
+ // intermediate spills is implemented. There is no such support
383
+ // currently exist in the LLVM compiler.
384
+ if (FuncInfo->allocateSGPRSpillToVGPRLane (MF, FI, true )) {
385
+ NewReservedRegs = true ;
386
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex (
387
+ MI, FI, nullptr , Indexes, LIS, true );
388
+ (void )Spilled;
389
+ assert (Spilled &&
390
+ " failed to spill SGPR to physical VGPR lane when allocated" );
391
+ }
392
+ } else {
393
+ MachineInstrSpan MIS (&MI, &MBB);
394
+ if (FuncInfo->allocateSGPRSpillToVGPRLane (MF, FI)) {
395
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex (
396
+ MI, FI, nullptr , Indexes, LIS);
397
+ (void )Spilled;
398
+ assert (Spilled &&
399
+ " failed to spill SGPR to virtual VGPR lane when allocated" );
400
+ SpillFIs.set (FI);
401
+ updateLaneVGPRDomInstr (FI, &MBB, MIS.begin (), LaneVGPRDomInstr);
402
+ SpilledToVirtVGPRLanes = true ;
403
+ }
304
404
}
305
405
}
306
406
}
307
407
308
- // FIXME: Adding to live-ins redundant with reserving registers.
309
- for (MachineBasicBlock &MBB : MF) {
310
- for (auto Reg : FuncInfo->getSGPRSpillVGPRs ())
311
- MBB.addLiveIn (Reg);
312
- MBB.sortUniqueLiveIns ();
408
+ for (auto Reg : FuncInfo->getSGPRSpillVGPRs ()) {
409
+ auto InsertPt = LaneVGPRDomInstr[Reg];
410
+ // Insert the IMPLICIT_DEF at the identified points.
411
+ auto MIB =
412
+ BuildMI (*InsertPt->getParent (), *InsertPt, InsertPt->getDebugLoc (),
413
+ TII->get (AMDGPU::IMPLICIT_DEF), Reg);
414
+ FuncInfo->setFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
415
+ if (LIS) {
416
+ LIS->InsertMachineInstrInMaps (*MIB);
417
+ LIS->createAndComputeVirtRegInterval (Reg);
418
+ }
419
+ }
313
420
421
+ for (MachineBasicBlock &MBB : MF) {
314
422
// FIXME: The dead frame indices are replaced with a null register from
315
423
// the debug value instructions. We should instead, update it with the
316
424
// correct register value. But not sure the register value alone is
@@ -338,6 +446,10 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
338
446
// lane".
339
447
FuncInfo->removeDeadFrameIndices (MF, /* ResetSGPRSpillStackIDs*/ false );
340
448
449
+ MadeChange = true ;
450
+ }
451
+
452
+ if (SpilledToVirtVGPRLanes) {
341
453
const TargetRegisterClass *RC =
342
454
ST.isWave32 () ? &AMDGPU::SGPR_32RegClass : &AMDGPU::SGPR_64RegClass;
343
455
// Shift back the reserved SGPR for EXEC copy into the lowest range.
@@ -347,18 +459,17 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
347
459
if (UnusedLowSGPR && TRI->getHWRegIndex (UnusedLowSGPR) <
348
460
TRI->getHWRegIndex (FuncInfo->getSGPRForEXECCopy ()))
349
461
FuncInfo->setSGPRForEXECCopy (UnusedLowSGPR);
350
-
351
- MadeChange = true ;
352
462
} else {
353
- // No SGPR spills and hence there won't be any WWM spills/copies. Reset the
354
- // SGPR reserved for EXEC copy.
463
+ // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
464
+ // spills/copies. Reset the SGPR reserved for EXEC copy.
355
465
FuncInfo->setSGPRForEXECCopy (AMDGPU::NoRegister);
356
466
}
357
467
358
468
SaveBlocks.clear ();
359
469
RestoreBlocks.clear ();
360
470
361
- // Updated the reserved registers with any VGPRs added for SGPR spills.
471
+ // Updated the reserved registers with any physical VGPRs added for SGPR
472
+ // spills.
362
473
if (NewReservedRegs)
363
474
MRI.freezeReservedRegs (MF);
364
475
0 commit comments