Skip to content

Commit 6789442

Browse files
authored
[AArch64] Fix a corner case with large stack allocation (#122038)
In the unlikely case where the stack size is greater than 4GB, we may run into the situation where the local stack size and the callee saved registers stack size get combined incorrectly when restoring the callee saved registers. This happens because the stack size in shouldCombineCSRLocalStackBumpInEpilogue is represented as an 'unsigned', but is passed in as an 'int64_t'. We end up with something like $fp, $lr = frame-destroy LDPXi $sp, 536870912 This change just makes 'shouldCombineCSRLocalStackBumpInEpilogue' match 'shouldCombineCSRLocalStackBump' where 'StackBumpBytes' is an 'uint64_t'
1 parent 30e517c commit 6789442

File tree

3 files changed

+48
-4
lines changed

3 files changed

+48
-4
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,10 +1195,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
11951195
}
11961196

11971197
bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
1198-
MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
1198+
MachineBasicBlock &MBB, uint64_t StackBumpBytes) const {
11991199
if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
12001200
return false;
1201-
12021201
if (MBB.empty())
12031202
return true;
12041203

@@ -2363,7 +2362,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
23632362
}
23642363
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
23652364
// Assume we can't combine the last pop with the sp restore.
2366-
23672365
bool CombineAfterCSRBump = false;
23682366
if (!CombineSPBump && PrologueSaveSize != 0) {
23692367
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());

llvm/lib/Target/AArch64/AArch64FrameLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
146146
int &MinCSFrameIndex,
147147
int &MaxCSFrameIndex) const;
148148
bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
149-
unsigned StackBumpBytes) const;
149+
uint64_t StackBumpBytes) const;
150150
void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,
151151
MachineBasicBlock::iterator MBBI) const;
152152
void emitCalleeSavedSVELocations(MachineBasicBlock &MBB,
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# RUN: llc -mtriple=aarch64 -run-pass=prologepilog %s -o - | FileCheck %s
2+
--- |
3+
define i32 @_Z4funcv() {
4+
entry:
5+
%array = alloca [1073741824 x i32], align 4
6+
%arrayidx = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
7+
store i32 7, ptr %arrayidx, align 4
8+
call void @_Z5func2v()
9+
%arrayidx1 = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
10+
%0 = load i32, ptr %arrayidx1, align 4
11+
ret i32 %0
12+
}
13+
14+
declare void @_Z5func2v()
15+
...
16+
---
17+
name: _Z4funcv
18+
alignment: 4
19+
legalized: true
20+
regBankSelected: true
21+
selected: true
22+
tracksRegLiveness: true
23+
noPhis: true
24+
isSSA: false
25+
noVRegs: true
26+
hasFakeUses: false
27+
frameInfo:
28+
maxAlignment: 4
29+
adjustsStack: true
30+
hasCalls: true
31+
maxCallFrameSize: 0
32+
stack:
33+
- { id: 0, name: array, size: 4294967296, alignment: 4, local-offset: -4294967296 }
34+
machineFunctionInfo: {}
35+
body: |
36+
bb.1.entry:
37+
renamable $w8 = MOVi32imm 7
38+
STRWui killed renamable $w8, %stack.0.array, 20 :: (store (s32) into %ir.arrayidx)
39+
ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
40+
BL @_Z5func2v, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp
41+
ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
42+
renamable $w0 = LDRWui %stack.0.array, 20 :: (dereferenceable load (s32) from %ir.arrayidx1)
43+
; CHECK: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
44+
RET_ReallyLR implicit killed $w0
45+
46+
...

0 commit comments

Comments
 (0)