diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 64dfb1e39485f..f5c33498c9d49 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1195,10 +1195,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( } bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue( - MachineBasicBlock &MBB, unsigned StackBumpBytes) const { + MachineBasicBlock &MBB, uint64_t StackBumpBytes) const { if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes)) return false; - if (MBB.empty()) return true; @@ -2363,7 +2362,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, } bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes); // Assume we can't combine the last pop with the sp restore. - bool CombineAfterCSRBump = false; if (!CombineSPBump && PrologueSaveSize != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 20445e63bcb13..8f84702f4d2ba 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -146,7 +146,7 @@ class AArch64FrameLowering : public TargetFrameLowering { int &MinCSFrameIndex, int &MaxCSFrameIndex) const; bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB, - unsigned StackBumpBytes) const; + uint64_t StackBumpBytes) const; void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const; void emitCalleeSavedSVELocations(MachineBasicBlock &MBB, diff --git a/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir b/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir new file mode 100644 index 0000000000000..f920813f2b42d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir @@ -0,0 +1,46 @@ +# RUN: llc -mtriple=aarch64 -run-pass=prologepilog %s -o - | FileCheck %s +--- | + define i32 @_Z4funcv() { + entry: + %array = alloca [1073741824 x i32], align 4 + %arrayidx = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20 + store i32 7, ptr %arrayidx, align 4 + call void @_Z5func2v() + %arrayidx1 = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20 + %0 = load i32, ptr %arrayidx1, align 4 + ret i32 %0 + } + + declare void @_Z5func2v() +... +--- +name: _Z4funcv +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +noPhis: true +isSSA: false +noVRegs: true +hasFakeUses: false +frameInfo: + maxAlignment: 4 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 +stack: + - { id: 0, name: array, size: 4294967296, alignment: 4, local-offset: -4294967296 } +machineFunctionInfo: {} +body: | + bb.1.entry: + renamable $w8 = MOVi32imm 7 + STRWui killed renamable $w8, %stack.0.array, 20 :: (store (s32) into %ir.arrayidx) + ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + BL @_Z5func2v, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp + ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + renamable $w0 = LDRWui %stack.0.array, 20 :: (dereferenceable load (s32) from %ir.arrayidx1) + ; CHECK: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 + RET_ReallyLR implicit killed $w0 + +...