diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index f113cd2ba2fbf..daef074e9dc72 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -905,6 +905,14 @@ def AlwaysInline : DeclOrStmtAttr { let Documentation = [AlwaysInlineDocs]; } +def AlwaysSpecialize : InheritableParamAttr { + let Spellings = [GNU<"always_specialize">, CXX11<"clang", "always_specialize">, + C23<"clang", "always_specialize">]; + let Subjects = SubjectList<[ParmVar]>; + let Documentation = [AlwaysSpecializeDocs]; + let SimpleHandler = 1; +} + def Artificial : InheritableAttr { let Spellings = [GCC<"artificial">]; let Subjects = SubjectList<[InlineFunction]>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 047f51ffa59ed..64129a3107218 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -8126,6 +8126,17 @@ Attribute docs`_, and `the GCC Inline docs`_. let Heading = "always_inline, __force_inline"; } +def AlwaysSpecializeDocs : Documentation { + let Category = DocCatConsumed; + let Content = [{ + The ``always_specialize`` attribute on a function parameter indicates that + the function shall be duplicated and specialized with respect to constant + arguments. This will usually increase code size. It controls an IR transform + similar in spirit to ``always_inline``. + }]; + let Heading = "always_specialize"; +} + def EnforceTCBDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 16e49aab4fe61..4ba32986146ef 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2559,6 +2559,9 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute("aarch64_new_zt0"); } + if (D->hasAttr()) + B.addAttribute(llvm::Attribute::AlwaysSpecialize); + // Track whether we need to add the optnone LLVM attribute, // starting with the default for this optimization level. bool ShouldAddOptNone = @@ -2978,6 +2981,12 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, F->addParamAttr(0, llvm::Attribute::Returned); } + for (auto [Index, Param] : enumerate(FD->parameters())) + if (Param->hasAttrs()) + for (auto *A : Param->getAttrs()) + if (A->getKind() == attr::AlwaysSpecialize) + F->addParamAttr(Index, llvm::Attribute::AlwaysSpecialize); + // Only a few attributes are set on declarations; these may later be // overridden by a definition. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 1aeae41042a1c..c32f147737883 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7137,6 +7137,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_AlwaysInline: handleAlwaysInlineAttr(S, D, AL); break; + case ParsedAttr::AT_AlwaysSpecialize: + handleSimpleAttribute(S, D, AL); + break; case ParsedAttr::AT_AnalyzerNoReturn: handleAnalyzerNoReturnAttr(S, D, AL); break; diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c index ea9784a76f923..7f83bd38e7803 100644 --- a/clang/test/CodeGen/lto-newpm-pipeline.c +++ b/clang/test/CodeGen/lto-newpm-pipeline.c @@ -31,6 +31,7 @@ // CHECK-FULL-O0-NEXT: Running pass: EntryExitInstrumenterPass // CHECK-FULL-O0-NEXT: Running pass: AlwaysInlinerPass // CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis +// CHECK-FULL-O0-NEXT: Running pass: AlwaysSpecializerPass // CHECK-FULL-O0-NEXT: Running pass: CoroConditionalWrapper // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass @@ -45,6 +46,7 @@ // CHECK-THIN-O0-NEXT: Running pass: EntryExitInstrumenterPass // CHECK-THIN-O0-NEXT: Running pass: AlwaysInlinerPass // CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis +// CHECK-THIN-O0-NEXT: Running pass: AlwaysSpecializerPass // CHECK-THIN-O0-NEXT: Running pass: CoroConditionalWrapper // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index 41d00dae3f69a..cd90e06609c55 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -15,6 +15,7 @@ // CHECK-NEXT: AlignValue (SubjectMatchRule_variable, SubjectMatchRule_type_alias) // CHECK-NEXT: AlwaysDestroy (SubjectMatchRule_variable) // CHECK-NEXT: AlwaysInline (SubjectMatchRule_function) +// CHECK-NEXT: AlwaysSpecialize (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: Annotate () // CHECK-NEXT: AnyX86NoCfCheck (SubjectMatchRule_hasType_functionType) // CHECK-NEXT: ArcWeakrefUnavailable (SubjectMatchRule_objc_interface) diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index b362a88963f6c..476bb4167dea8 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -798,6 +798,7 @@ enum AttributeKindCodes { ATTR_KIND_NO_DIVERGENCE_SOURCE = 100, ATTR_KIND_SANITIZE_TYPE = 101, ATTR_KIND_CAPTURES = 102, + ATTR_KIND_ALWAYS_SPECIALIZE = 103, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index d488c5f419b82..fb4c7366d9491 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -98,6 +98,9 @@ def AllocSize : IntAttr<"allocsize", IntersectPreserve, [FnAttr]>; /// inline=always. def AlwaysInline : EnumAttr<"alwaysinline", IntersectPreserve, [FnAttr]>; +/// Specialize function when argument at call site is known constant +def AlwaysSpecialize : EnumAttr<"alwaysspecialize", IntersectPreserve, [ParamAttr]>; + /// Callee is recognized as a builtin, despite nobuiltin attribute on its /// declaration. def Builtin : EnumAttr<"builtin", IntersectPreserve, [FnAttr]>; diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 42610d505c2bd..4c3f0ea08ed43 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -50,6 +50,7 @@ void initializeTarget(PassRegistry &); void initializeAAResultsWrapperPassPass(PassRegistry &); void initializeAlwaysInlinerLegacyPassPass(PassRegistry &); +void initializeAlwaysSpecializerPass(PassRegistry &); void initializeAssignmentTrackingAnalysisPass(PassRegistry &); void initializeAssumptionCacheTrackerPass(PassRegistry &); void initializeAtomicExpandLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/Transforms/IPO/AlwaysSpecializer.h b/llvm/include/llvm/Transforms/IPO/AlwaysSpecializer.h new file mode 100644 index 0000000000000..020d8eec3e760 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/AlwaysSpecializer.h @@ -0,0 +1,29 @@ +//=== AlwaysSpecializer.h - implementation of always_specialize -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_IPO_ALWAYSSPECIALIZER_H +#define LLVM_TRANSFORMS_IPO_ALWAYSSPECIALIZER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; +class ModulePass; + +class AlwaysSpecializerPass : public PassInfoMixin { +public: + AlwaysSpecializerPass(); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } +}; + +ModulePass *createAlwaysSpecializerPass(); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_ALWAYSSPECIALIZER_H diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index fde934fbb3cf1..5cb348e1a330e 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2054,6 +2054,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::Alignment; case bitc::ATTR_KIND_ALWAYS_INLINE: return Attribute::AlwaysInline; + case bitc::ATTR_KIND_ALWAYS_SPECIALIZE: + return Attribute::AlwaysSpecialize; case bitc::ATTR_KIND_BUILTIN: return Attribute::Builtin; case bitc::ATTR_KIND_BY_VAL: diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 628b939af19ce..f3afc91176723 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -750,6 +750,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_ALLOC_SIZE; case Attribute::AlwaysInline: return bitc::ATTR_KIND_ALWAYS_INLINE; + case Attribute::AlwaysSpecialize: + return bitc::ATTR_KIND_ALWAYS_SPECIALIZE; case Attribute::Builtin: return bitc::ATTR_KIND_BUILTIN; case Attribute::ByVal: diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 4603eaff8ade9..63ad02bcc522c 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -195,6 +195,7 @@ #include "llvm/Transforms/Coroutines/CoroSplit.h" #include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/AlwaysSpecializer.h" #include "llvm/Transforms/IPO/Annotation2Metadata.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index a99146d5eaa34..a14ffddeb164b 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -42,6 +42,7 @@ #include "llvm/Transforms/Coroutines/CoroSplit.h" #include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/AlwaysSpecializer.h" #include "llvm/Transforms/IPO/Annotation2Metadata.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" @@ -1277,6 +1278,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType)); MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true)); + MPM.addPass(AlwaysSpecializerPass()); if (EnableModuleInliner) MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); @@ -2252,6 +2254,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, // code generation. MPM.addPass(AlwaysInlinerPass( /*InsertLifetimeIntrinsics=*/false)); + MPM.addPass(AlwaysSpecializerPass()); if (PTO.MergeFunctions) MPM.addPass(MergeFunctionsPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index f761d0dab09a8..b65981652e258 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -50,6 +50,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA()) #define MODULE_PASS(NAME, CREATE_PASS) #endif MODULE_PASS("always-inline", AlwaysInlinerPass()) +MODULE_PASS("always-specialize", AlwaysSpecializerPass()) MODULE_PASS("annotation2metadata", Annotation2MetadataPass()) MODULE_PASS("assign-guid", AssignGUIDPass()) MODULE_PASS("attributor", AttributorPass()) diff --git a/llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp b/llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp new file mode 100644 index 0000000000000..9e0bbe883bd10 --- /dev/null +++ b/llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp @@ -0,0 +1,324 @@ +//===- AlwaysSpecializer.cpp - implementation of always_specialize --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Function specialisation under programmer control. +// +// Specifically, function parameters are marked [[always_specialize]], then call +// sites which pass a constant argument are rewritten to call specialisations. +// +// The difficult parts of function specialisation are the cost model, ensuring +// termination and specialisation to the anticipated extent. +// +// Cost model is under programmer control, exactly like always_inline. +// +// Termination follows from the implementation following a phased structure: +// 1. Functions are identifed in the input IR +// 2. Calls that exist in the input IR are identified +// Those constitute the complete set of specialisations that will be created. +// +// This pass does the _minimum_ specialisation, in the sense that only call +// sites in the input will lead to cloning. A specialised function will call +// another specialised function iff there was a call site with the same +// argument vector in the input. +// +// Running the identifyCalls + createClones sequence N times will behave +// as expected, specialising recursively to that depth. This patch has N=1 +// in the first instance, with no commandline argument to override. +// Similarly variadic functions are not yet handled. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/AlwaysSpecializer.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/IPO/FunctionSpecialization.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +#define DEBUG_TYPE "always-specialize" + +namespace { + +class AlwaysSpecializer : public ModulePass { +public: + static char ID; + + AlwaysSpecializer() : ModulePass(ID) {} + StringRef getPassName() const override { return "Always specializer"; } + + // One constant for each argument, nullptr if that one is non-constant + using ArgVector = SmallVector; + + // A map from the ArgVector to the matching specialisation + using FunctionSpecializations = MapVector; + + // The four mini-passes populate and then use a map: + // 1. identifyFunctions writes all keys, with default initialised values. + // 2. identifyCalls writes all the ArgVector keys in the values of SpecList. + // 3. createClones writes the Function* values at the leaves. + // 4. replaceCalls walks the map doing the trivial rewrite. + + // Conceptually a Map but a vector suffices. + using SpecListTy = + SmallVector, 4>; + + SpecListTy identifyFunctions(Module &M); + bool identifyCalls(Module &M, Function *F, FunctionSpecializations &); + bool createClones(Module &M, Function *F, FunctionSpecializations &); + bool replaceCalls(Module &M, Function *F, FunctionSpecializations &); + + bool runOnModule(Module &M) override { + bool Changed = false; + + // Sets all the keys in the structure used in this invocation. + SpecListTy SpecList = identifyFunctions(M); + size_t Count = SpecList.size(); + if (Count == 0) { + return false; + } + + // Record distinct call sites as vector -> nullptr + for (auto &[F, spec] : SpecList) + Changed |= identifyCalls(M, F, spec); + + // Create and record the clones. Note that call sites within the clones + // cannot trigger creating more clones so no termination risk. + for (auto &[F, spec] : SpecList) + Changed |= createClones(M, F, spec); + + // Replacing calls as the final phase means no need to track + // partially-specialised calls and no creating further clones. + for (auto &[F, spec] : SpecList) + Changed |= replaceCalls(M, F, spec); + + return Changed; + } + + static bool isCandidateFunction(const Function &F); + static bool callEligible(const Function &F, const CallBase *CB, + ArgVector &Out); + static Function *cloneCandidateFunction(Module &M, Function *F, + const ArgVector &C); + + // Only a member variable to reuse the allocation. Short lived. + ArgVector ArgVec; +}; + +AlwaysSpecializer::SpecListTy AlwaysSpecializer::identifyFunctions(Module &M) { + SpecListTy SpecList; + for (Function &F : M) { + if (isCandidateFunction(F)) { + SpecList.push_back(std::make_pair(&F, FunctionSpecializations())); + } + } + return SpecList; +} + +bool AlwaysSpecializer::identifyCalls(Module &M, Function *F, + FunctionSpecializations &Specs) { + bool Found = false; + + for (User *U : F->users()) { + CallBase *CB = dyn_cast(U); + if (!CB || !callEligible(*F, CB, ArgVec)) { + continue; + } + + if (!Specs.contains(ArgVec)) { + Found = true; + Specs.insert(std::make_pair(ArgVec, nullptr)); + } + } + + return Found; +} + +bool AlwaysSpecializer::createClones(Module &M, Function *F, + FunctionSpecializations &Specs) { + bool Changed = false; + + for (auto It = Specs.begin(); It != Specs.end(); ++It) { + if (It->second) + continue; + Function *Clone = cloneCandidateFunction(M, F, It->first); + if (Clone) { + Changed = true; + It->second = Clone; + } + } + + return Changed; +} + +bool AlwaysSpecializer::replaceCalls(Module &M, Function *F, + FunctionSpecializations &Specs) { + bool Changed = false; + + for (User *u : make_early_inc_range(F->users())) { + CallBase *CB = dyn_cast(u); + if (!CB || !callEligible(*F, CB, ArgVec)) { + continue; + } + + Function *Clone = Specs[ArgVec]; + if (Clone) { + Changed = true; + CB->setCalledFunction(Clone); + } + } + + return Changed; +} + +bool AlwaysSpecializer::isCandidateFunction(const Function &F) { + + // Test if the function itself can't be specialised + if (!F.hasExactDefinition() || F.isIntrinsic() || + F.hasFnAttribute(Attribute::Naked)) + return false; + + // Variadics are left for a follow up patch + if (F.isVarArg()) + return false; + + // Need calls to the function for it to be worth considering + if (F.use_empty()) + return false; + + // Look for the attribute on a non-dead, non-indirect parameter + for (const Argument &Arg : F.args()) { + if (Arg.hasPointeeInMemoryValueAttr()) + continue; + + if (F.hasParamAttribute(Arg.getArgNo(), Attribute::AlwaysSpecialize)) + if (!Arg.use_empty()) + return true; + } + + return false; +} + +bool AlwaysSpecializer::callEligible(const Function &F, const CallBase *CB, + ArgVector &Out) { + const size_t Arity = F.arg_size(); + bool Eligible = false; + + if (CB->getCalledOperand() != &F) { + return false; + } + + if (CB->getFunctionType() != F.getFunctionType()) { + return false; + } + + if (CB->arg_size() != Arity) { + return false; + } + + Out.clear(); + for (size_t I = 0; I < Arity; I++) { + Constant *Arg = dyn_cast(CB->getArgOperand(I)); + if (Arg && F.hasParamAttribute(I, Attribute::AlwaysSpecialize)) { + Eligible = true; + Out.push_back(Arg); + } else { + Out.push_back(nullptr); + } + } + + return Eligible; +} + +Function *AlwaysSpecializer::cloneCandidateFunction(Module &M, Function *F, + const ArgVector &C) { + + Function *Clone = + Function::Create(F->getFunctionType(), F->getLinkage(), + F->getAddressSpace(), F->getName() + ".spec"); + + // Roughly CloneFunction but inserting specialisations next to the original. + ValueToValueMapTy VMap; + Function::arg_iterator DestI = Clone->arg_begin(); + for (const Argument &I : F->args()) { + DestI->setName(I.getName()); + VMap[&I] = &*DestI++; + } + SmallVector Returns; + CloneFunctionInto(Clone, F, VMap, CloneFunctionChangeType::LocalChangesOnly, + Returns); + + M.getFunctionList().insert(F->getIterator(), Clone); + + // Clones are local things. + Clone->setDSOLocal(true); + Clone->setVisibility(GlobalValue::DefaultVisibility); + Clone->setLinkage(GlobalValue::PrivateLinkage); + + // Replace uses of the argument with the constant. + for (size_t I = 0; I < C.size(); I++) { + if (!C[I]) + continue; + + // The argument is going to be dead, drop the specialise attr. + Clone->removeParamAttr(I, Attribute::AlwaysSpecialize); + + Argument *V = Clone->getArg(I); + for (User *U : make_early_inc_range(V->users())) { + + if (auto *Inst = dyn_cast(U)) { + SimplifyQuery SQ = SimplifyQuery(Clone->getDataLayout(), Inst); + + // Do some simplification on the fly so that call sites in the cloned + // functions can potentially themselves resolve to specialisations + if (Value *NewInst = simplifyWithOpReplaced( + Inst, V, C[I], SQ, false /*AllowRefinement*/)) { + Inst->replaceAllUsesWith(NewInst); + continue; + } + + // If we're about to create a load from a constant, try to resolve it + // immediately so that the uses of the load are now also constant. + // This covers constant vtable containing pointer to constant vtable. + if (auto *Load = dyn_cast(Inst)) { + if (Load->getOperand(0) == V) { + if (Value *NewInst = simplifyLoadInst(Load, C[I], SQ)) { + Load->replaceAllUsesWith(NewInst); + continue; + } + } + } + } + } + + // Replace any remaining uses that the above failed to simplify. + V->replaceAllUsesWith(C[I]); + } + + return Clone; +} + +} // namespace + +char AlwaysSpecializer::ID = 0; + +INITIALIZE_PASS(AlwaysSpecializer, DEBUG_TYPE, "TODO", false, false) + +ModulePass *createAlwaysSpecializerPass() { return new AlwaysSpecializer(); } + +PreservedAnalyses AlwaysSpecializerPass::run(Module &M, + ModuleAnalysisManager &) { + return AlwaysSpecializer().runOnModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} + +AlwaysSpecializerPass::AlwaysSpecializerPass() {} diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 1c4ee0336d4db..f510d2c944092 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_component_library(LLVMipo AlwaysInliner.cpp + AlwaysSpecializer.cpp Annotation2Metadata.cpp ArgumentPromotion.cpp Attributor.cpp diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp index 43c5df3575003..b5cbdeac8033d 100644 --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -384,7 +384,6 @@ PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) { return FAM.getResult(F); }; - if (!runIPSCCP(M, DL, &FAM, GetTLI, GetTTI, GetAC, GetDT, GetBFI, isFuncSpecEnabled())) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 1210bdf4a1c98..5d1733f0dcfc6 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -990,6 +990,7 @@ Function *CodeExtractor::constructFunctionDeclaration( case Attribute::Alignment: case Attribute::AllocatedPointer: case Attribute::AllocAlign: + case Attribute::AlwaysSpecialize: case Attribute::ByVal: case Attribute::Captures: case Attribute::Dereferenceable: diff --git a/llvm/test/Other/new-pm-O0-defaults.ll b/llvm/test/Other/new-pm-O0-defaults.ll index 81d1ee0df2c5b..2db9aa2eb793c 100644 --- a/llvm/test/Other/new-pm-O0-defaults.ll +++ b/llvm/test/Other/new-pm-O0-defaults.ll @@ -34,10 +34,12 @@ ; CHECK-DIS-NEXT: Running pass: AddDiscriminatorsPass ; CHECK-DIS-NEXT: Running pass: AlwaysInlinerPass ; CHECK-DIS-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-DIS-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-DEFAULT: Running analysis: InnerAnalysisManagerProxy ; CHECK-DEFAULT-NEXT: Running pass: EntryExitInstrumenterPass ; CHECK-DEFAULT-NEXT: Running pass: AlwaysInlinerPass ; CHECK-DEFAULT-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-DEFAULT-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-MATRIX: Running pass: LowerMatrixIntrinsicsPass ; CHECK-MATRIX-NEXT: Running analysis: TargetIRAnalysis ; CHECK-CORO-NEXT: Running pass: CoroConditionalWrapper diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index c554fdbf4c799..21b1fb291784c 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -133,6 +133,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index 62bb02d9b3c40..9baf119000d3e 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -64,6 +64,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 0da7a9f73bdce..1b5aaa11108ce 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -52,6 +52,7 @@ ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 38b7890682783..ccbe82f27987f 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -62,6 +62,7 @@ ; CHECK-O-NEXT: Running pass: PGOForceFunctionAttrsPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll index 5aacd26def2be..2de56b91f0b21 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -96,6 +96,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index f6a9406596803..462c273788740 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -87,6 +87,7 @@ ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo ; CHECK-O-NEXT: Running pass: PGOForceFunctionAttrsPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index 48a9433d24999..01a7364ea6073 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -67,6 +67,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo ; CHECK-O-NEXT: Running pass: PGOForceFunctionAttrsPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass +; CHECK-O-NEXT: Running pass: AlwaysSpecializerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-diamond.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-diamond.ll new file mode 100644 index 0000000000000..248593e22f811 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-diamond.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @leaf(i32 alwaysspecialize %x, i32 alwaysspecialize %y) { +entry: + %add = add nsw i32 %x, %y + ret i32 %add +} + +define i32 @in_order(i32 alwaysspecialize %x) { +entry: + %call = call i32 @leaf(i32 42, i32 %x) + ret i32 %call +} + +define i32 @swapped(i32 alwaysspecialize %x) { +entry: + %call = call i32 @leaf(i32 %x, i32 81) + ret i32 %call +} + +define i32 @root() { +entry: + %call = call i32 @in_order(i32 81) + %call1 = call i32 @swapped(i32 42) + %mul = mul nsw i32 %call, %call1 + ret i32 %mul +} + + + +; CHECK-LABEL: define {{[^@]+}}@leaf.spec +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X]], 81 +; CHECK-NEXT: ret i32 [[ADD]] +; +; +; CHECK-LABEL: define {{[^@]+}}@leaf.spec.1 +; CHECK-SAME: (i32 [[X:%.*]], i32 alwaysspecialize [[Y:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 42, [[Y]] +; CHECK-NEXT: ret i32 [[ADD]] +; +; +; CHECK-LABEL: define {{[^@]+}}@leaf +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]], i32 alwaysspecialize [[Y:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[ADD]] +; +; +; CHECK-LABEL: define {{[^@]+}}@in_order.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @leaf(i32 42, i32 81) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@in_order +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @leaf.spec.1(i32 42, i32 [[X]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@swapped.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @leaf(i32 42, i32 81) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@swapped +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @leaf.spec(i32 [[X]], i32 81) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@root() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @in_order.spec(i32 81) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @swapped.spec(i32 42) +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CALL]], [[CALL1]] +; CHECK-NEXT: ret i32 [[MUL]] +; diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-factorial.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-factorial.ll new file mode 100644 index 0000000000000..b42427a9edfce --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-factorial.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @factorial(i32 alwaysspecialize %x) { +entry: + %cmp = icmp ult i32 %x, 2 + br i1 %cmp, label %if.then, label %if.end + +if.then: + br label %return + +if.end: + %sub = sub i32 %x, 1 + %call = call i32 @factorial(i32 %sub) + %mul = mul i32 %x, %call + br label %return + +return: + %retval.0 = phi i32 [ 1, %if.then ], [ %mul, %if.end ] + ret i32 %retval.0 +} + +define i32 @factorial_driver() { +entry: + %call = call i32 @factorial(i32 0) + %call1 = call i32 @factorial(i32 1) + %add = add i32 %call, %call1 + %call2 = call i32 @factorial(i32 2) + %add3 = add i32 %add, %call2 + %call4 = call i32 @factorial(i32 3) + %add5 = add i32 %add3, %call4 + %call6 = call i32 @factorial(i32 4) + %add7 = add i32 %add5, %call6 + ret i32 %add7 +} + + + +; CHECK-LABEL: define {{[^@]+}}@factorial.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 4, 2 +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 4, 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial.spec.1(i32 3) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 4, [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[MUL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial.spec.1 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 3, 2 +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 3, 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial.spec.2(i32 2) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 3, [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[MUL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial.spec.2 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 2, 2 +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 2, 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial.spec.3(i32 1) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[MUL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial.spec.3 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 1, 2 +; CHECK-NEXT: br i1 true, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 1, 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial.spec.4(i32 0) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[CALL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial.spec.4 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 0, 2 +; CHECK-NEXT: br i1 true, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial(i32 -1) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[MUL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X]], 2 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[X]], 1 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial(i32 [[SUB]]) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X]], [[CALL]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[MUL]], [[IF_END]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +; +; CHECK-LABEL: define {{[^@]+}}@factorial_driver() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @factorial.spec.4(i32 0) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @factorial.spec.3(i32 1) +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CALL]], [[CALL1]] +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @factorial.spec.2(i32 2) +; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD]], [[CALL2]] +; CHECK-NEXT: [[CALL4:%.*]] = call i32 @factorial.spec.1(i32 3) +; CHECK-NEXT: [[ADD5:%.*]] = add i32 [[ADD3]], [[CALL4]] +; CHECK-NEXT: [[CALL6:%.*]] = call i32 @factorial.spec(i32 4) +; CHECK-NEXT: [[ADD7:%.*]] = add i32 [[ADD5]], [[CALL6]] +; CHECK-NEXT: ret i32 [[ADD7]] +; diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-indirect.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-indirect.ll new file mode 100644 index 0000000000000..ac82a3cbc4dc7 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-indirect.ll @@ -0,0 +1,294 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define zeroext i1 @odd(i32 alwaysspecialize %x) { +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + br label %cond.end + +cond.false: + %sub = sub i32 %x, 1 + %call = call zeroext i1 @even(i32 %sub) + %conv = zext i1 %call to i32 + br label %cond.end + +cond.end: + %cond = phi i32 [ 0, %cond.true ], [ %conv, %cond.false ] + %tobool = icmp ne i32 %cond, 0 + ret i1 %tobool +} + +define zeroext i1 @even(i32 alwaysspecialize %x) { +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + br label %cond.end + +cond.false: + %sub = sub i32 %x, 1 + %call = call zeroext i1 @odd(i32 %sub) + %conv = zext i1 %call to i32 + br label %cond.end + +cond.end: + %cond = phi i32 [ 1, %cond.true ], [ %conv, %cond.false ] + %tobool = icmp ne i32 %cond, 0 + ret i1 %tobool +} + +define zeroext i1 @evenodd_driver() { +entry: + %call = call zeroext i1 @even(i32 0) + br i1 %call, label %land.lhs.true, label %land.end + +land.lhs.true: + %call1 = call zeroext i1 @even(i32 1) + br i1 %call1, label %land.end, label %land.lhs.true2 + +land.lhs.true2: + %call3 = call zeroext i1 @even(i32 2) + br i1 %call3, label %land.lhs.true4, label %land.end + +land.lhs.true4: + %call5 = call zeroext i1 @even(i32 3) + br i1 %call5, label %land.end, label %land.lhs.true6 + +land.lhs.true6: + %call7 = call zeroext i1 @odd(i32 0) + br i1 %call7, label %land.end, label %land.lhs.true8 + +land.lhs.true8: + %call9 = call zeroext i1 @odd(i32 1) + br i1 %call9, label %land.lhs.true10, label %land.end + +land.lhs.true10: + %call11 = call zeroext i1 @odd(i32 2) + br i1 %call11, label %land.end, label %land.rhs + +land.rhs: + %call12 = call zeroext i1 @odd(i32 3) + br label %land.end + +land.end: + %0 = phi i1 [ false, %land.lhs.true10 ], [ false, %land.lhs.true8 ], [ false, %land.lhs.true6 ], [ false, %land.lhs.true4 ], [ false, %land.lhs.true2 ], [ false, %land.lhs.true ], [ false, %entry ], [ %call12, %land.rhs ] + ret i1 %0 +} + + + +; CHECK-LABEL: define {{[^@]+}}@odd.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 3, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 3, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even.spec.4(i32 2) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@odd.spec.1 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 2, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 2, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even.spec.5(i32 1) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@odd.spec.2 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 1, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 1, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even.spec.6(i32 0) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@odd.spec.3 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: br i1 true, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even(i32 -1) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@odd +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[X]], 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even(i32 [[SUB]]) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@even.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 3, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 3, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @odd.spec.1(i32 2) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@even.spec.4 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 2, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 2, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @odd.spec.2(i32 1) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@even.spec.5 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 1, 0 +; CHECK-NEXT: br i1 false, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 1, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @odd.spec.3(i32 0) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@even.spec.6 +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: br i1 true, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @odd(i32 -1) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@even +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: br label [[COND_END:%.*]] +; CHECK: cond.false: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[X]], 1 +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @odd(i32 [[SUB]]) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CALL]] to i32 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[COND]], 0 +; CHECK-NEXT: ret i1 [[TOBOOL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@evenodd_driver() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @even.spec.6(i32 0) +; CHECK-NEXT: br i1 [[CALL]], label [[LAND_LHS_TRUE:%.*]], label [[LAND_END:%.*]] +; CHECK: land.lhs.true: +; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i1 @even.spec.5(i32 1) +; CHECK-NEXT: br i1 [[CALL1]], label [[LAND_END]], label [[LAND_LHS_TRUE2:%.*]] +; CHECK: land.lhs.true2: +; CHECK-NEXT: [[CALL3:%.*]] = call zeroext i1 @even.spec.4(i32 2) +; CHECK-NEXT: br i1 [[CALL3]], label [[LAND_LHS_TRUE4:%.*]], label [[LAND_END]] +; CHECK: land.lhs.true4: +; CHECK-NEXT: [[CALL5:%.*]] = call zeroext i1 @even.spec(i32 3) +; CHECK-NEXT: br i1 [[CALL5]], label [[LAND_END]], label [[LAND_LHS_TRUE6:%.*]] +; CHECK: land.lhs.true6: +; CHECK-NEXT: [[CALL7:%.*]] = call zeroext i1 @odd.spec.3(i32 0) +; CHECK-NEXT: br i1 [[CALL7]], label [[LAND_END]], label [[LAND_LHS_TRUE8:%.*]] +; CHECK: land.lhs.true8: +; CHECK-NEXT: [[CALL9:%.*]] = call zeroext i1 @odd.spec.2(i32 1) +; CHECK-NEXT: br i1 [[CALL9]], label [[LAND_LHS_TRUE10:%.*]], label [[LAND_END]] +; CHECK: land.lhs.true10: +; CHECK-NEXT: [[CALL11:%.*]] = call zeroext i1 @odd.spec.1(i32 2) +; CHECK-NEXT: br i1 [[CALL11]], label [[LAND_END]], label [[LAND_RHS:%.*]] +; CHECK: land.rhs: +; CHECK-NEXT: [[CALL12:%.*]] = call zeroext i1 @odd.spec(i32 3) +; CHECK-NEXT: br label [[LAND_END]] +; CHECK: land.end: +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[LAND_LHS_TRUE10]] ], [ false, [[LAND_LHS_TRUE8]] ], [ false, [[LAND_LHS_TRUE6]] ], [ false, [[LAND_LHS_TRUE4]] ], [ false, [[LAND_LHS_TRUE2]] ], [ false, [[LAND_LHS_TRUE]] ], [ false, [[ENTRY:%.*]] ], [ [[CALL12]], [[LAND_RHS]] ] +; CHECK-NEXT: ret i1 [[TMP0]] +; diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive.ll new file mode 100644 index 0000000000000..b4216ebfb212a --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @loop(i32 alwaysspecialize %x) { +entry: + %call = call i32 @loop(i32 5) + %call1 = call i32 @loop(i32 %x) + %add = add i32 %call, %call1 + ret i32 %add +} + +define i32 @loop_driver() { +entry: + %call = call i32 @loop(i32 5) + ret i32 %call +} + + + +; CHECK-LABEL: define {{[^@]+}}@loop.spec +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @loop.spec(i32 5) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @loop.spec(i32 5) +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CALL]], [[CALL1]] +; CHECK-NEXT: ret i32 [[ADD]] +; +; +; CHECK-LABEL: define {{[^@]+}}@loop +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @loop.spec(i32 5) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @loop(i32 [[X]]) +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CALL]], [[CALL1]] +; CHECK-NEXT: ret i32 [[ADD]] +; +; +; CHECK-LABEL: define {{[^@]+}}@loop_driver() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @loop.spec(i32 5) +; CHECK-NEXT: ret i32 [[CALL]] +; diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-simple.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-simple.ll new file mode 100644 index 0000000000000..a173e93c7cf37 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-simple.ll @@ -0,0 +1,229 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@ptrfirst.x = internal global i32 42, align 4 +@ptrboth.x = internal global i32 42, align 4 +@ptrboth.y = internal constant i32 81, align 4 +@ptrallsame.x = internal global i32 42, align 4 + +define i32 @callee(i32 alwaysspecialize %x, i32 %y, i32 alwaysspecialize %z) { +entry: + %add = add nsw i32 %x, %z + %mul = mul nsw i32 %add, %z + ret i32 %mul +} + +define i32 @first(i32 %a, i32 %b) { +entry: + %call = call i32 @callee(i32 42, i32 %a, i32 %b) + ret i32 %call +} + +define i32 @second(i32 %a, i32 %b) { +entry: + %call = call i32 @callee(i32 %a, i32 42, i32 %b) + ret i32 %call +} + +define i32 @third(i32 %a, i32 %b) { +entry: + %call = call i32 @callee(i32 %a, i32 %b, i32 42) + ret i32 %call +} + +define i32 @both(i32 %a) { +entry: + %call = call i32 @callee(i32 21, i32 %a, i32 42) + ret i32 %call +} + +define i32 @ptrcallee(ptr alwaysspecialize %x, ptr %y, ptr alwaysspecialize %z) { +entry: + %0 = load i32, ptr %x, align 4 + %1 = load i32, ptr %z, align 4 + %add = add nsw i32 %0, %1 + %2 = load i32, ptr %z, align 4 + %mul = mul nsw i32 %add, %2 + ret i32 %mul +} + +define i32 @ptrfirst(ptr %a, ptr %b) { +entry: + %call = call i32 @ptrcallee(ptr @ptrfirst.x, ptr %a, ptr %b) + ret i32 %call +} + +define i32 @ptrboth(ptr %a) { +entry: + %call = call i32 @ptrcallee(ptr @ptrboth.x, ptr %a, ptr @ptrboth.y) + ret i32 %call +} + +define i32 @ptrallsame() { +entry: + %call = call i32 @ptrcallee(ptr @ptrallsame.x, ptr @ptrallsame.x, ptr @ptrallsame.x) + ret i32 %call +} + +define i32 @virtualcall(ptr alwaysspecialize %func, i32 %x) { +entry: + %call = call i32 %func(i32 %x) + ret i32 %call +} + +define i32 @devirtualisecaller(i32 %x) { +entry: + %call = call i32 @virtualcall(ptr @virtualcallee, i32 %x) + ret i32 %call +} + +declare i32 @virtualcallee(i32 noundef) + + + +; CHECK-LABEL: define {{[^@]+}}@callee.spec +; CHECK-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 21, 42 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], 42 +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@callee.spec.1 +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X]], 42 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], 42 +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@callee.spec.2 +; CHECK-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]], i32 alwaysspecialize [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 42, [[Z]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[Z]] +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i32 alwaysspecialize [[X:%.*]], i32 [[Y:%.*]], i32 alwaysspecialize [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X]], [[Z]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[Z]] +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@first +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @callee.spec.2(i32 42, i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@second +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @callee(i32 [[A]], i32 42, i32 [[B]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@third +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @callee.spec.1(i32 [[A]], i32 [[B]], i32 42) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@both +; CHECK-SAME: (i32 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @callee.spec(i32 21, i32 [[A]], i32 42) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrcallee.spec +; CHECK-SAME: (ptr [[X:%.*]], ptr [[Y:%.*]], ptr [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @ptrallsame.x, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @ptrallsame.x, align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @ptrallsame.x, align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[TMP2]] +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrcallee.spec.3 +; CHECK-SAME: (ptr [[X:%.*]], ptr [[Y:%.*]], ptr [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @ptrboth.x, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @ptrboth.y, align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 81 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @ptrboth.y, align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], 81 +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrcallee.spec.4 +; CHECK-SAME: (ptr [[X:%.*]], ptr [[Y:%.*]], ptr alwaysspecialize [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @ptrfirst.x, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Z]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Z]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[TMP2]] +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrcallee +; CHECK-SAME: (ptr alwaysspecialize [[X:%.*]], ptr [[Y:%.*]], ptr alwaysspecialize [[Z:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Z]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[Z]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[TMP2]] +; CHECK-NEXT: ret i32 [[MUL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrfirst +; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @ptrcallee.spec.4(ptr @ptrfirst.x, ptr [[A]], ptr [[B]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrboth +; CHECK-SAME: (ptr [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @ptrcallee.spec.3(ptr @ptrboth.x, ptr [[A]], ptr @ptrboth.y) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@ptrallsame() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @ptrcallee.spec(ptr @ptrallsame.x, ptr @ptrallsame.x, ptr @ptrallsame.x) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@virtualcall.spec +; CHECK-SAME: (ptr [[FUNC:%.*]], i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @virtualcallee(i32 [[X]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@virtualcall +; CHECK-SAME: (ptr alwaysspecialize [[FUNC:%.*]], i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 [[FUNC]](i32 [[X]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@devirtualisecaller +; CHECK-SAME: (i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @virtualcall.spec(ptr @virtualcallee, i32 [[X]]) +; CHECK-NEXT: ret i32 [[CALL]] +; diff --git a/llvm/test/Transforms/FunctionSpecialization/always-specialize-variadic.ll b/llvm/test/Transforms/FunctionSpecialization/always-specialize-variadic.ll new file mode 100644 index 0000000000000..b0ae24ed97b7a --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/always-specialize-variadic.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --include-generated-funcs +; RUN: opt -S --passes=always-specialize < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.__va_list_tag = type { i32, i32, ptr, ptr } + +@.str = private unnamed_addr constant [10 x i8] c"%s -> %d\0A\00", align 1 +@.str.1 = private unnamed_addr constant [6 x i8] c"thing\00", align 1 + +define i32 @print(ptr %F, ptr alwaysspecialize %fmt, ...) { +entry: + %va = alloca [1 x %struct.__va_list_tag], align 16 + %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %va, i64 0, i64 0 + call void @llvm.va_start.p0(ptr %arraydecay) + %arraydecay1 = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %va, i64 0, i64 0 + %call = call i32 @vprint(ptr %F, ptr %fmt, ptr %arraydecay1) + %arraydecay2 = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %va, i64 0, i64 0 + call void @llvm.va_end.p0(ptr %arraydecay2) + ret i32 %call +} + +declare void @llvm.va_start.p0(ptr) + +declare i32 @vprint(ptr noundef, ptr noundef, ptr noundef) + +declare void @llvm.va_end.p0(ptr) + +define i32 @caller(ptr %F, i32 %x) { +entry: + %call = call i32 (ptr, ptr, ...) @print(ptr %F, ptr @.str, ptr @.str.1, i32 %x) + ret i32 %call +} + + + +; CHECK-LABEL: define {{[^@]+}}@print +; CHECK-SAME: (ptr [[F:%.*]], ptr alwaysspecialize [[FMT:%.*]], ...) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VA:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[VA]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) +; CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[VA]], i64 0, i64 0 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @vprint(ptr [[F]], ptr [[FMT]], ptr [[ARRAYDECAY1]]) +; CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[VA]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.va_end.p0(ptr [[ARRAYDECAY2]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (ptr [[F:%.*]], i32 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ptr, ...) @print(ptr [[F]], ptr @.str, ptr @.str.1, i32 [[X]]) +; CHECK-NEXT: ret i32 [[CALL]] +; diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index de46efa13025d..65c3ee5467604 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -421,6 +421,7 @@ extern "C" int optMain( initializeTarget(Registry); // For codegen passes, only passes that do IR to IR transformation are // supported. + initializeAlwaysSpecializerPass(Registry); initializeExpandLargeDivRemLegacyPassPass(Registry); initializeExpandFpLegacyPassPass(Registry); initializeExpandMemCmpLegacyPassPass(Registry);