Skip to content

Commit ea26451

Browse files
committed
[SDAG][AMDGPU] Allow opting in to OOB-generating PTRADD transforms
This PR adds a TargetLowering hook, canTransformPtrArithOutOfBounds, that targets can use to allow transformations to introduce out-of-bounds pointer arithmetic. It also moves two such transformations from the AMDGPU-specific DAG combines to the generic DAGCombiner. This is motivated by target features like AArch64's checked pointer arithmetic, CPA, which does not tolerate the introduction of out-of-bounds pointer arithmetic.
1 parent 812e4fa commit ea26451

File tree

4 files changed

+94
-100
lines changed

4 files changed

+94
-100
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3504,6 +3504,13 @@ class LLVM_ABI TargetLoweringBase {
35043504
return false;
35053505
}
35063506

3507+
/// True if the target allows transformations of in-bounds pointer
3508+
/// arithmetic that cause out-of-bounds intermediate results.
3509+
virtual bool canTransformPtrArithOutOfBounds(const Function &F,
3510+
EVT PtrVT) const {
3511+
return false;
3512+
}
3513+
35073514
/// Does this target support complex deinterleaving
35083515
virtual bool isComplexDeinterleavingSupported() const { return false; }
35093516

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 74 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2688,59 +2688,82 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
26882688
if (PtrVT == IntVT && isNullConstant(N0))
26892689
return N1;
26902690

2691-
if (N0.getOpcode() != ISD::PTRADD ||
2692-
reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2693-
return SDValue();
2694-
2695-
SDValue X = N0.getOperand(0);
2696-
SDValue Y = N0.getOperand(1);
2697-
SDValue Z = N1;
2698-
bool N0OneUse = N0.hasOneUse();
2699-
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2700-
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2701-
2702-
// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2703-
// * y is a constant and (ptradd x, y) has one use; or
2704-
// * y and z are both constants.
2705-
if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2706-
// If both additions in the original were NUW, the new ones are as well.
2707-
SDNodeFlags Flags =
2708-
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2709-
SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2710-
AddToWorklist(Add.getNode());
2711-
return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2691+
if (N0.getOpcode() == ISD::PTRADD &&
2692+
!reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2693+
SDValue X = N0.getOperand(0);
2694+
SDValue Y = N0.getOperand(1);
2695+
SDValue Z = N1;
2696+
bool N0OneUse = N0.hasOneUse();
2697+
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2698+
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2699+
2700+
// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2701+
// * y is a constant and (ptradd x, y) has one use; or
2702+
// * y and z are both constants.
2703+
if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2704+
// If both additions in the original were NUW, the new ones are as well.
2705+
SDNodeFlags Flags =
2706+
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2707+
SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2708+
AddToWorklist(Add.getNode());
2709+
return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2710+
}
2711+
}
2712+
2713+
// The following combines can turn in-bounds pointer arithmetic out of bounds.
2714+
// That is problematic for settings like AArch64's CPA, which checks that
2715+
// intermediate results of pointer arithmetic remain in bounds. The target
2716+
// therefore needs to opt-in to enable them.
2717+
if (!TLI.canTransformPtrArithOutOfBounds(
2718+
DAG.getMachineFunction().getFunction(), PtrVT))
2719+
return SDValue();
2720+
2721+
if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
2722+
// Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2723+
// global address GA and constant c, such that c can be folded into GA.
2724+
SDValue GAValue = N0.getOperand(0);
2725+
if (const GlobalAddressSDNode *GA =
2726+
dyn_cast<GlobalAddressSDNode>(GAValue)) {
2727+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2728+
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2729+
// If both additions in the original were NUW, reassociation preserves
2730+
// that.
2731+
SDNodeFlags Flags =
2732+
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2733+
SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2734+
AddToWorklist(Inner.getNode());
2735+
return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2736+
}
2737+
}
27122738
}
27132739

2714-
// TODO: There is another possible fold here that was proven useful.
2715-
// It would be this:
2716-
//
2717-
// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2718-
// * (ptradd x, y) has one use; and
2719-
// * y is a constant; and
2720-
// * z is not a constant.
2721-
//
2722-
// In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2723-
// opportunity to select more complex instructions such as SUBPT and
2724-
// MSUBPT. However, a hypothetical corner case has been found that we could
2725-
// not avoid. Consider this (pseudo-POSIX C):
2726-
//
2727-
// char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2728-
// char *p = mmap(LARGE_CONSTANT);
2729-
// char *q = foo(p, -LARGE_CONSTANT);
2730-
//
2731-
// Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2732-
// further + z takes it back to the start of the mapping, so valid,
2733-
// regardless of the address mmap gave back. However, if mmap gives you an
2734-
// address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2735-
// borrow from the high bits (with the subsequent + z carrying back into
2736-
// the high bits to give you a well-defined pointer) and thus trip
2737-
// FEAT_CPA's pointer corruption checks.
2738-
//
2739-
// We leave this fold as an opportunity for future work, addressing the
2740-
// corner case for FEAT_CPA, as well as reconciling the solution with the
2741-
// more general application of pointer arithmetic in other future targets.
2742-
// For now each architecture that wants this fold must implement it in the
2743-
// target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2740+
if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2741+
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2742+
// y is not, and (add y, z) is used only once.
2743+
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2744+
// z is not, and (add y, z) is used only once.
2745+
// The goal is to move constant offsets to the outermost ptradd, to create
2746+
// more opportunities to fold offsets into memory instructions.
2747+
// Together with the another combine above, this also implements
2748+
// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2749+
SDValue X = N0;
2750+
SDValue Y = N1.getOperand(0);
2751+
SDValue Z = N1.getOperand(1);
2752+
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2753+
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2754+
2755+
// If both additions in the original were NUW, reassociation preserves that.
2756+
SDNodeFlags ReassocFlags =
2757+
(N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2758+
2759+
if (ZIsConstant != YIsConstant) {
2760+
if (YIsConstant)
2761+
std::swap(Y, Z);
2762+
SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2763+
AddToWorklist(Inner.getNode());
2764+
return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2765+
}
2766+
}
27442767

27452768
return SDValue();
27462769
}

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -10688,6 +10688,11 @@ bool SITargetLowering::shouldPreservePtrArith(const Function &F,
1068810688
return UseSelectionDAGPTRADD && PtrVT == MVT::i64;
1068910689
}
1069010690

10691+
bool SITargetLowering::canTransformPtrArithOutOfBounds(const Function &F,
10692+
EVT PtrVT) const {
10693+
return true;
10694+
}
10695+
1069110696
// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
1069210697
// offset (the offset that is included in bounds checking and swizzling, to be
1069310698
// split between the instruction's voffset and immoffset fields) and soffset
@@ -15231,64 +15236,17 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
1523115236
return Folded;
1523215237
}
1523315238

15234-
if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
15235-
// Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
15236-
// global address GA and constant c, such that c can be folded into GA.
15237-
SDValue GAValue = N0.getOperand(0);
15238-
if (const GlobalAddressSDNode *GA =
15239-
dyn_cast<GlobalAddressSDNode>(GAValue)) {
15240-
if (DCI.isBeforeLegalizeOps() && isOffsetFoldingLegal(GA)) {
15241-
// If both additions in the original were NUW, reassociation preserves
15242-
// that.
15243-
SDNodeFlags Flags =
15244-
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
15245-
SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
15246-
DCI.AddToWorklist(Inner.getNode());
15247-
return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
15248-
}
15249-
}
15250-
}
15251-
1525215239
if (N1.getOpcode() != ISD::ADD || !N1.hasOneUse())
1525315240
return SDValue();
1525415241

15255-
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
15256-
// y is not, and (add y, z) is used only once.
15257-
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
15258-
// z is not, and (add y, z) is used only once.
15259-
// The goal is to move constant offsets to the outermost ptradd, to create
15260-
// more opportunities to fold offsets into memory instructions.
15261-
// Together with the generic combines in DAGCombiner.cpp, this also
15262-
// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
15263-
//
15264-
// This transform is here instead of in the general DAGCombiner as it can
15265-
// turn in-bounds pointer arithmetic out-of-bounds, which is problematic for
15266-
// AArch64's CPA.
1526715242
SDValue X = N0;
1526815243
SDValue Y = N1.getOperand(0);
1526915244
SDValue Z = N1.getOperand(1);
1527015245
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
1527115246
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
1527215247

15273-
// If both additions in the original were NUW, reassociation preserves that.
15274-
SDNodeFlags ReassocFlags =
15275-
(N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
15276-
15277-
if (ZIsConstant != YIsConstant) {
15278-
if (YIsConstant)
15279-
std::swap(Y, Z);
15280-
SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
15281-
DCI.AddToWorklist(Inner.getNode());
15282-
return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
15283-
}
15284-
15285-
// If one of Y and Z is constant, they have been handled above. If both were
15286-
// constant, the addition would have been folded in SelectionDAG::getNode
15287-
// already. This ensures that the generic DAG combines won't undo the
15288-
// following reassociation.
15289-
assert(!YIsConstant && !ZIsConstant);
15290-
15291-
if (!X->isDivergent() && Y->isDivergent() != Z->isDivergent()) {
15248+
if (!YIsConstant && !ZIsConstant && !X->isDivergent() &&
15249+
Y->isDivergent() != Z->isDivergent()) {
1529215250
// Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if x and
1529315251
// y are uniform and z isn't.
1529415252
// Reassociate (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if x and
@@ -15299,6 +15257,9 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
1529915257
// reassociate.
1530015258
if (Y->isDivergent())
1530115259
std::swap(Y, Z);
15260+
// If both additions in the original were NUW, reassociation preserves that.
15261+
SDNodeFlags ReassocFlags =
15262+
(N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
1530215263
SDValue UniformInner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
1530315264
DCI.AddToWorklist(UniformInner.getNode());
1530415265
return DAG.getMemBasePlusOffset(UniformInner, Z, DL, ReassocFlags);

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
264264

265265
bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override;
266266

267+
bool canTransformPtrArithOutOfBounds(const Function &F,
268+
EVT PtrVT) const override;
269+
267270
private:
268271
// Analyze a combined offset from an amdgcn_s_buffer_load intrinsic and store
269272
// the three offsets (voffset, soffset and instoffset) into the SDValue[3]

0 commit comments

Comments
 (0)