Skip to content

Commit 69ed5fd

Browse files
gbaraldimaleadt
authored andcommitted
Remove llvm-muladd pass and move it's functionality to to llvm-simdloop (#55802)
Provides no-op fallbacks for compatibility. Co-authored-by: Tim Besard <[email protected]>
1 parent addff45 commit 69ed5fd

File tree

10 files changed

+96
-201
lines changed

10 files changed

+96
-201
lines changed

doc/src/devdocs/llvm-passes.md

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -114,18 +114,6 @@ This pass is used to verify Julia's invariants about LLVM IR. This includes thin
114114

115115
These passes are used to perform transformations on LLVM IR that LLVM will not perform itself, e.g. fast math flag propagation, escape analysis, and optimizations on Julia-specific internal functions. They use knowledge about Julia's semantics to perform these optimizations.
116116

117-
### CombineMulAdd
118-
119-
* Filename: `llvm-muladd.cpp`
120-
* Class Name: `CombineMulAddPass`
121-
* Opt Name: `function(CombineMulAdd)`
122-
123-
This pass serves to optimize the particular combination of a regular `fmul` with a fast `fadd` into a contract `fmul` with a fast `fadd`. This is later optimized by the backend to a [fused multiply-add](https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation#Fused_multiply%E2%80%93add) instruction, which can provide significantly faster operations at the cost of more [unpredictable semantics](https://simonbyrne.github.io/notes/fastmath/).
124-
125-
!!! note
126-
127-
This optimization only occurs when the `fmul` has a single use, which is the fast `fadd`.
128-
129117
### AllocOpt
130118

131119
* Filename: `llvm-alloc-opt.cpp`

doc/src/devdocs/llvm.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ The code for lowering Julia AST to LLVM IR or interpreting it directly is in dir
3030
| `llvm-julia-licm.cpp` | Custom LLVM pass to hoist/sink Julia-specific intrinsics |
3131
| `llvm-late-gc-lowering.cpp` | Custom LLVM pass to root GC-tracked values |
3232
| `llvm-lower-handlers.cpp` | Custom LLVM pass to lower try-catch blocks |
33-
| `llvm-muladd.cpp` | Custom LLVM pass for fast-match FMA |
3433
| `llvm-multiversioning.cpp` | Custom LLVM pass to generate sysimg code on multiple architectures |
3534
| `llvm-propagate-addrspaces.cpp` | Custom LLVM pass to canonicalize addrspaces |
3635
| `llvm-ptls.cpp` | Custom LLVM pass to lower TLS operations |

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ RT_LLVMLINK :=
5252
CG_LLVMLINK :=
5353

5454
ifeq ($(JULIACODEGEN),LLVM)
55-
CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd \
55+
CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop \
5656
llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering llvm-ptls \
5757
llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
5858
llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \

src/llvm-muladd.cpp

Lines changed: 0 additions & 117 deletions
This file was deleted.

src/llvm-simdloop.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ STATISTIC(ReductionChainLength, "Total sum of instructions folded from reduction
4141
STATISTIC(MaxChainLength, "Max length of reduction chain");
4242
STATISTIC(AddChains, "Addition reduction chains");
4343
STATISTIC(MulChains, "Multiply reduction chains");
44+
STATISTIC(TotalContracted, "Total number of multiplies marked for FMA");
4445

4546
#ifndef __clang_gcanalyzer__
4647
#define REMARK(remark) ORE.emit(remark)
@@ -49,6 +50,49 @@ STATISTIC(MulChains, "Multiply reduction chains");
4950
#endif
5051
namespace {
5152

53+
/**
54+
* Combine
55+
* ```
56+
* %v0 = fmul ... %a, %b
57+
* %v = fadd contract ... %v0, %c
58+
* ```
59+
* to
60+
* %v0 = fmul contract ... %a, %b
61+
* %v = fadd contract ... %v0, %c
62+
* when `%v0` has no other use
63+
*/
64+
65+
static bool checkCombine(Value *maybeMul, Loop &L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT
66+
{
67+
auto mulOp = dyn_cast<Instruction>(maybeMul);
68+
if (!mulOp || mulOp->getOpcode() != Instruction::FMul)
69+
return false;
70+
if (!L.contains(mulOp))
71+
return false;
72+
if (!mulOp->hasOneUse()) {
73+
LLVM_DEBUG(dbgs() << "mulOp has multiple uses: " << *maybeMul << "\n");
74+
REMARK([&](){
75+
return OptimizationRemarkMissed(DEBUG_TYPE, "Multiuse FMul", mulOp)
76+
<< "fmul had multiple uses " << ore::NV("fmul", mulOp);
77+
});
78+
return false;
79+
}
80+
// On 5.0+ we only need to mark the mulOp as contract and the backend will do the work for us.
81+
auto fmf = mulOp->getFastMathFlags();
82+
if (!fmf.allowContract()) {
83+
LLVM_DEBUG(dbgs() << "Marking mulOp for FMA: " << *maybeMul << "\n");
84+
REMARK([&](){
85+
return OptimizationRemark(DEBUG_TYPE, "Marked for FMA", mulOp)
86+
<< "marked for fma " << ore::NV("fmul", mulOp);
87+
});
88+
++TotalContracted;
89+
fmf.setAllowContract(true);
90+
mulOp->copyFastMathFlags(fmf);
91+
return true;
92+
}
93+
return false;
94+
}
95+
5296
static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFEPOINT
5397
{
5498
switch (J->getOpcode()) {
@@ -150,6 +194,28 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop &L, OptimizationRe
150194
});
151195
(*K)->setHasAllowReassoc(true);
152196
(*K)->setHasAllowContract(true);
197+
switch ((*K)->getOpcode()) {
198+
case Instruction::FAdd: {
199+
if (!(*K)->hasAllowContract())
200+
continue;
201+
// (*K)->getOperand(0)->print(dbgs());
202+
// (*K)->getOperand(1)->print(dbgs());
203+
checkCombine((*K)->getOperand(0), L, ORE);
204+
checkCombine((*K)->getOperand(1), L, ORE);
205+
break;
206+
}
207+
case Instruction::FSub: {
208+
if (!(*K)->hasAllowContract())
209+
continue;
210+
// (*K)->getOperand(0)->print(dbgs());
211+
// (*K)->getOperand(1)->print(dbgs());
212+
checkCombine((*K)->getOperand(0), L, ORE);
213+
checkCombine((*K)->getOperand(1), L, ORE);
214+
break;
215+
}
216+
default:
217+
break;
218+
}
153219
if (SE)
154220
SE->forgetValue(*K);
155221
++length;

src/passes.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,18 @@ struct DemoteFloat16Pass : PassInfoMixin<DemoteFloat16Pass> {
1515
static bool isRequired() { return true; }
1616
};
1717

18-
struct CombineMulAddPass : PassInfoMixin<CombineMulAddPass> {
19-
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
20-
};
21-
2218
struct LateLowerGCPass : PassInfoMixin<LateLowerGCPass> {
2319
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
2420
static bool isRequired() { return true; }
2521
};
2622

23+
struct CombineMulAddPass : PassInfoMixin<CombineMulAddPass> {
24+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT {
25+
// no-op
26+
return PreservedAnalyses::all();
27+
}
28+
};
29+
2730
struct AllocOptPass : PassInfoMixin<AllocOptPass> {
2831
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT;
2932
};

src/pipeline.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,6 @@ static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimi
577577
if (options.cleanup) {
578578
if (O.getSpeedupLevel() >= 2) {
579579
FunctionPassManager FPM;
580-
JULIA_PASS(FPM.addPass(CombineMulAddPass()));
581580
FPM.addPass(DivRemPairsPass());
582581
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
583582
}

test/llvmpasses/julia-simdloop.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,26 @@ loopdone:
6161
ret double %nextv
6262
}
6363

64+
; CHECK-LABEL: @simd_test_sub4(
65+
define double @simd_test_sub4(double *%a) {
66+
top:
67+
br label %loop
68+
loop:
69+
%i = phi i64 [0, %top], [%nexti, %loop]
70+
%v = phi double [0.000000e+00, %top], [%nextv, %loop]
71+
%aptr = getelementptr double, double *%a, i64 %i
72+
%aval = load double, double *%aptr
73+
%nextv2 = fmul double %aval, %aval
74+
; CHECK: fmul contract double %aval, %aval
75+
%nextv = fsub double %v, %nextv2
76+
; CHECK: fsub reassoc contract double %v, %nextv2
77+
%nexti = add i64 %i, 1
78+
%done = icmp sgt i64 %nexti, 500
79+
br i1 %done, label %loopdone, label %loop, !llvm.loop !0
80+
loopdone:
81+
ret double %nextv
82+
}
83+
6484
; Tests if we correctly pass through other metadata
6585
; CHECK-LABEL: @disabled(
6686
define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
@@ -84,6 +104,7 @@ for.end: ; preds = %for.body
84104
ret i32 %1
85105
}
86106

107+
87108
!0 = distinct !{!0, !"julia.simdloop"}
88109
!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"}
89110
!2 = distinct !{!2, !"julia.simdloop", !"julia.ivdep", !3}

test/llvmpasses/muladd.ll

Lines changed: 0 additions & 64 deletions
This file was deleted.

test/llvmpasses/parsing.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; COM: NewPM-only test, tests for ability to parse Julia passes
22

3-
; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(LowerSIMDLoop,JuliaLICM),GCInvariantVerifier<strong>,GCInvariantVerifier<no-strong>),LowerPTLSPass<imaging>,LowerPTLSPass<no-imaging>,JuliaMultiVersioning<external>,JuliaMultiVersioning<no-external>)' -S %s -o /dev/null
3+
; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(LowerSIMDLoop,JuliaLICM),GCInvariantVerifier<strong>,GCInvariantVerifier<no-strong>),LowerPTLSPass<imaging>,LowerPTLSPass<no-imaging>,JuliaMultiVersioning<external>,JuliaMultiVersioning<no-external>)' -S %s -o /dev/null
44
; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;llvm_only>" -S %s -o /dev/null
55
; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;no_llvm_only>" -S %s -o /dev/null
66
; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;no_enable_vector_pipeline>" -S %s -o /dev/null

0 commit comments

Comments
 (0)