@@ -2689,59 +2689,82 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2689
2689
if (PtrVT == IntVT && isNullConstant(N0))
2690
2690
return N1;
2691
2691
2692
- if (N0.getOpcode() != ISD::PTRADD ||
2693
- reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
2694
- return SDValue();
2695
-
2696
- SDValue X = N0.getOperand(0);
2697
- SDValue Y = N0.getOperand(1);
2698
- SDValue Z = N1;
2699
- bool N0OneUse = N0.hasOneUse();
2700
- bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2701
- bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2702
-
2703
- // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2704
- // * y is a constant and (ptradd x, y) has one use; or
2705
- // * y and z are both constants.
2706
- if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2707
- // If both additions in the original were NUW, the new ones are as well.
2708
- SDNodeFlags Flags =
2709
- (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2710
- SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2711
- AddToWorklist(Add.getNode());
2712
- return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2692
+ if (N0.getOpcode() == ISD::PTRADD &&
2693
+ !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2694
+ SDValue X = N0.getOperand(0);
2695
+ SDValue Y = N0.getOperand(1);
2696
+ SDValue Z = N1;
2697
+ bool N0OneUse = N0.hasOneUse();
2698
+ bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2699
+ bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2700
+
2701
+ // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2702
+ // * y is a constant and (ptradd x, y) has one use; or
2703
+ // * y and z are both constants.
2704
+ if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2705
+ // If both additions in the original were NUW, the new ones are as well.
2706
+ SDNodeFlags Flags =
2707
+ (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2708
+ SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2709
+ AddToWorklist(Add.getNode());
2710
+ return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2711
+ }
2712
+ }
2713
+
2714
+ // The following combines can turn in-bounds pointer arithmetic out of bounds.
2715
+ // That is problematic for settings like AArch64's CPA, which checks that
2716
+ // intermediate results of pointer arithmetic remain in bounds. The target
2717
+ // therefore needs to opt-in to enable them.
2718
+ if (!TLI.canTransformPtrArithOutOfBounds(
2719
+ DAG.getMachineFunction().getFunction(), PtrVT))
2720
+ return SDValue();
2721
+
2722
+ if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) {
2723
+ // Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with
2724
+ // global address GA and constant c, such that c can be folded into GA.
2725
+ SDValue GAValue = N0.getOperand(0);
2726
+ if (const GlobalAddressSDNode *GA =
2727
+ dyn_cast<GlobalAddressSDNode>(GAValue)) {
2728
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2729
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2730
+ // If both additions in the original were NUW, reassociation preserves
2731
+ // that.
2732
+ SDNodeFlags Flags =
2733
+ (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2734
+ SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
2735
+ AddToWorklist(Inner.getNode());
2736
+ return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
2737
+ }
2738
+ }
2713
2739
}
2714
2740
2715
- // TODO: There is another possible fold here that was proven useful.
2716
- // It would be this:
2717
- //
2718
- // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2719
- // * (ptradd x, y) has one use; and
2720
- // * y is a constant; and
2721
- // * z is not a constant.
2722
- //
2723
- // In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2724
- // opportunity to select more complex instructions such as SUBPT and
2725
- // MSUBPT. However, a hypothetical corner case has been found that we could
2726
- // not avoid. Consider this (pseudo-POSIX C):
2727
- //
2728
- // char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2729
- // char *p = mmap(LARGE_CONSTANT);
2730
- // char *q = foo(p, -LARGE_CONSTANT);
2731
- //
2732
- // Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2733
- // further + z takes it back to the start of the mapping, so valid,
2734
- // regardless of the address mmap gave back. However, if mmap gives you an
2735
- // address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2736
- // borrow from the high bits (with the subsequent + z carrying back into
2737
- // the high bits to give you a well-defined pointer) and thus trip
2738
- // FEAT_CPA's pointer corruption checks.
2739
- //
2740
- // We leave this fold as an opportunity for future work, addressing the
2741
- // corner case for FEAT_CPA, as well as reconciling the solution with the
2742
- // more general application of pointer arithmetic in other future targets.
2743
- // For now each architecture that wants this fold must implement it in the
2744
- // target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
2741
+ if (N1.getOpcode() == ISD::ADD && N1.hasOneUse()) {
2742
+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
2743
+ // y is not, and (add y, z) is used only once.
2744
+ // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
2745
+ // z is not, and (add y, z) is used only once.
2746
+ // The goal is to move constant offsets to the outermost ptradd, to create
2747
+ // more opportunities to fold offsets into memory instructions.
2748
+ // Together with the another combine above, this also implements
2749
+ // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
2750
+ SDValue X = N0;
2751
+ SDValue Y = N1.getOperand(0);
2752
+ SDValue Z = N1.getOperand(1);
2753
+ bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2754
+ bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2755
+
2756
+ // If both additions in the original were NUW, reassociation preserves that.
2757
+ SDNodeFlags ReassocFlags =
2758
+ (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
2759
+
2760
+ if (ZIsConstant != YIsConstant) {
2761
+ if (YIsConstant)
2762
+ std::swap(Y, Z);
2763
+ SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, ReassocFlags);
2764
+ AddToWorklist(Inner.getNode());
2765
+ return DAG.getMemBasePlusOffset(Inner, Z, DL, ReassocFlags);
2766
+ }
2767
+ }
2745
2768
2746
2769
return SDValue();
2747
2770
}
0 commit comments