Skip to content

[RISCV] Don't fold offsets into auipc if offset is larger than the reference global variable. #135297

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 26 additions & 9 deletions llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
bool detectFoldable(MachineInstr &Hi, MachineInstr *&Lo);

bool detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Lo);
void foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
bool foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
int64_t Offset);
bool foldLargeOffset(MachineInstr &Hi, MachineInstr &Lo,
MachineInstr &TailAdd, Register GSReg);
Expand Down Expand Up @@ -142,9 +142,21 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
// Update the offset in Hi and Lo instructions.
// Delete the tail instruction and update all the uses to use the
// output from Lo.
void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
bool RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
MachineInstr &Tail, int64_t Offset) {
assert(isInt<32>(Offset) && "Unexpected offset");

// If Hi is an AUIPC, don't fold the offset if it is outside the bounds of
// the global object. The object may be within 2GB of the PC, but addresses
// outside of the object might not be.
if (Hi.getOpcode() == RISCV::AUIPC && Hi.getOperand(1).isGlobal()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, actually, one minor question here: if Hi.getOperand(1).isGlobal() is false, what is the symbol? Do we need to just unconditionally refuse to fold?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will also try fold ConstantPool and BlockAddress offsets in this pass. Though I think the main reason for them to be in this pass is because this pass also folds the ADDI into load/store instructions.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably we don't want to fold offsets into blockaddress.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. I'll commit this to fix the known issue and I'll add separate PR for block address.

const GlobalValue *GV = Hi.getOperand(1).getGlobal();
Type *Ty = GV->getValueType();
if (!Ty->isSized() || Offset < 0 ||
(uint64_t)Offset > GV->getDataLayout().getTypeAllocSize(Ty))
return false;
}

// Put the offset back in Hi and the Lo
Hi.getOperand(1).setOffset(Offset);
if (Hi.getOpcode() != RISCV::AUIPC)
Expand All @@ -156,6 +168,7 @@ void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
Tail.eraseFromParent();
LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
<< " " << Hi << " " << Lo;);
return true;
}

// Detect patterns for large offsets that are passed into an ADD instruction.
Expand Down Expand Up @@ -205,7 +218,8 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
// Handle rs1 of ADDI is X0.
if (AddiReg == RISCV::X0) {
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail);
foldOffset(Hi, Lo, TailAdd, OffLo);
if (!foldOffset(Hi, Lo, TailAdd, OffLo))
return false;
OffsetTail.eraseFromParent();
return true;
}
Expand All @@ -226,7 +240,8 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
return false;
LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
<< " " << OffsetLui);
foldOffset(Hi, Lo, TailAdd, Offset);
if (!foldOffset(Hi, Lo, TailAdd, Offset))
return false;
OffsetTail.eraseFromParent();
OffsetLui.eraseFromParent();
return true;
Expand All @@ -235,7 +250,8 @@ bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
// exists.
LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
foldOffset(Hi, Lo, TailAdd, Offset);
if (!foldOffset(Hi, Lo, TailAdd, Offset))
return false;
OffsetTail.eraseFromParent();
return true;
}
Expand Down Expand Up @@ -294,7 +310,8 @@ bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr &Hi,
Offset = (uint64_t)Offset << ShAmt;

LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
foldOffset(Hi, Lo, TailShXAdd, Offset);
if (!foldOffset(Hi, Lo, TailShXAdd, Offset))
return false;
OffsetTail.eraseFromParent();
return true;
}
Expand Down Expand Up @@ -327,15 +344,15 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
if (TailTail.getOpcode() == RISCV::ADDI) {
Offset += TailTail.getOperand(2).getImm();
LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
foldOffset(Hi, Lo, TailTail, Offset);
if (!foldOffset(Hi, Lo, TailTail, Offset))
return false;
Tail.eraseFromParent();
return true;
}
}

LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
foldOffset(Hi, Lo, Tail, Offset);
return true;
return foldOffset(Hi, Lo, Tail, Offset);
}
case RISCV::ADD:
// The offset is too large to fit in the immediate field of ADDI.
Expand Down
69 changes: 69 additions & 0 deletions llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1227,3 +1227,72 @@ for.inc.peel: ; preds = %entry
store i32 %spec.select, ptr null, align 4
ret i32 0
}

@ki_end = external dso_local global [0 x i8], align 1

define i1 @pr134525() nounwind {
; RV32I-LABEL: pr134525:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lui a0, %hi(ki_end)
; RV32I-NEXT: addi a0, a0, %lo(ki_end)
; RV32I-NEXT: lui a1, 523776
; RV32I-NEXT: lui a2, 32
; RV32I-NEXT: add a1, a0, a1
; RV32I-NEXT: addi a2, a2, 1
; RV32I-NEXT: sltu a2, a1, a2
; RV32I-NEXT: sltu a0, a1, a0
; RV32I-NEXT: not a0, a0
; RV32I-NEXT: and a0, a0, a2
; RV32I-NEXT: ret
;
; RV32I-MEDIUM-LABEL: pr134525:
; RV32I-MEDIUM: # %bb.0: # %entry
; RV32I-MEDIUM-NEXT: .Lpcrel_hi15:
; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(ki_end)
; RV32I-MEDIUM-NEXT: lui a1, 523776
; RV32I-MEDIUM-NEXT: lui a2, 32
; RV32I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi15)
; RV32I-MEDIUM-NEXT: addi a2, a2, 1
; RV32I-MEDIUM-NEXT: add a1, a0, a1
; RV32I-MEDIUM-NEXT: sltu a2, a1, a2
; RV32I-MEDIUM-NEXT: sltu a0, a1, a0
; RV32I-MEDIUM-NEXT: not a0, a0
; RV32I-MEDIUM-NEXT: and a0, a0, a2
; RV32I-MEDIUM-NEXT: ret
;
; RV64I-LABEL: pr134525:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lui a0, %hi(ki_end+2145386496)
; RV64I-NEXT: addi a0, a0, %lo(ki_end+2145386496)
; RV64I-NEXT: lui a1, 32
; RV64I-NEXT: addiw a1, a1, 1
; RV64I-NEXT: sltu a0, a0, a1
; RV64I-NEXT: ret
;
; RV64I-MEDIUM-LABEL: pr134525:
; RV64I-MEDIUM: # %bb.0: # %entry
; RV64I-MEDIUM-NEXT: .Lpcrel_hi15:
; RV64I-MEDIUM-NEXT: auipc a0, %pcrel_hi(ki_end)
; RV64I-MEDIUM-NEXT: lui a1, 523776
; RV64I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi15)
; RV64I-MEDIUM-NEXT: add a0, a0, a1
; RV64I-MEDIUM-NEXT: lui a1, 32
; RV64I-MEDIUM-NEXT: addiw a1, a1, 1
; RV64I-MEDIUM-NEXT: sltu a0, a0, a1
; RV64I-MEDIUM-NEXT: ret
;
; RV64I-LARGE-LABEL: pr134525:
; RV64I-LARGE: # %bb.0: # %entry
; RV64I-LARGE-NEXT: .Lpcrel_hi16:
; RV64I-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI22_0)
; RV64I-LARGE-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi16)(a0)
; RV64I-LARGE-NEXT: lui a1, 523776
; RV64I-LARGE-NEXT: add a0, a0, a1
; RV64I-LARGE-NEXT: lui a1, 32
; RV64I-LARGE-NEXT: addiw a1, a1, 1
; RV64I-LARGE-NEXT: sltu a0, a0, a1
; RV64I-LARGE-NEXT: ret
entry:
%cmp = icmp ult i64 sub (i64 ptrtoint (ptr @ki_end to i64), i64 -2145386496), 131073
ret i1 %cmp
}