Skip to content

[AArch64] Fix a corner case with large stack allocation #122038

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1195,10 +1195,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
}

bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
MachineBasicBlock &MBB, uint64_t StackBumpBytes) const {
if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
return false;

if (MBB.empty())
return true;

Expand Down Expand Up @@ -2363,7 +2362,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
}
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.

bool CombineAfterCSRBump = false;
if (!CombineSPBump && PrologueSaveSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64FrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
int &MinCSFrameIndex,
int &MaxCSFrameIndex) const;
bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
unsigned StackBumpBytes) const;
uint64_t StackBumpBytes) const;
void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
void emitCalleeSavedSVELocations(MachineBasicBlock &MBB,
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# RUN: llc -mtriple=aarch64 -run-pass=prologepilog %s -o - | FileCheck %s
--- |
define i32 @_Z4funcv() {
entry:
%array = alloca [1073741824 x i32], align 4
%arrayidx = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
store i32 7, ptr %arrayidx, align 4
call void @_Z5func2v()
%arrayidx1 = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
%0 = load i32, ptr %arrayidx1, align 4
ret i32 %0
}

declare void @_Z5func2v()
...
---
name: _Z4funcv
alignment: 4
legalized: true
regBankSelected: true
selected: true
tracksRegLiveness: true
noPhis: true
isSSA: false
noVRegs: true
hasFakeUses: false
frameInfo:
maxAlignment: 4
adjustsStack: true
hasCalls: true
maxCallFrameSize: 0
stack:
- { id: 0, name: array, size: 4294967296, alignment: 4, local-offset: -4294967296 }
machineFunctionInfo: {}
body: |
bb.1.entry:
renamable $w8 = MOVi32imm 7
STRWui killed renamable $w8, %stack.0.array, 20 :: (store (s32) into %ir.arrayidx)
ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
BL @_Z5func2v, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp
ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
renamable $w0 = LDRWui %stack.0.array, 20 :: (dereferenceable load (s32) from %ir.arrayidx1)
; CHECK: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
RET_ReallyLR implicit killed $w0

...
Loading