From c72eb649d157bdf0bf81b201f1a664b304773f19 Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Mon, 29 Apr 2024 11:18:33 -0700 Subject: [PATCH] [BOLT] Add split function support for the Linux kernel While rewriting the Linux kernel, we try to fit optimized functions into their original boundaries. When a function becomes larger, we skip it during the rewrite and end up with less than optimal code layout. To overcome that issue, add support for --split-function option so that hot part of the function could be fit into the original space. The cold part should go to reserved space in the binary. --- bolt/lib/Passes/SplitFunctions.cpp | 13 +++++++ bolt/lib/Rewrite/LinuxKernelRewriter.cpp | 44 +++++++++++++++++++----- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp index f9e634d15a972..bd0b6dea0e065 100644 --- a/bolt/lib/Passes/SplitFunctions.cpp +++ b/bolt/lib/Passes/SplitFunctions.cpp @@ -715,6 +715,12 @@ Error SplitFunctions::runOnFunctions(BinaryContext &BC) { if (!opts::SplitFunctions) return Error::success(); + if (BC.IsLinuxKernel && BC.BOLTReserved.empty()) { + BC.errs() << "BOLT-ERROR: split functions require reserved space in the " + "Linux kernel binary\n"; + exit(1); + } + // If split strategy is not CDSplit, then a second run of the pass is not // needed after function reordering. if (BC.HasFinalizedFunctionOrder && @@ -829,6 +835,13 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) { } } } + + // Outlining blocks with dynamic branches is not supported yet. + if (BC.IsLinuxKernel) { + if (llvm::any_of( + *BB, [&](MCInst &Inst) { return BC.MIB->isDynamicBranch(Inst); })) + BB->setCanOutline(false); + } } BF.getLayout().updateLayoutIndices(); diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp index 17077b4fa2487..b976699cef178 100644 --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -783,11 +783,9 @@ Error LinuxKernelRewriter::rewriteORCTables() { }; // Emit new ORC entries for the emitted function. - auto emitORC = [&](const BinaryFunction &BF) -> Error { - assert(!BF.isSplit() && "Split functions not supported by ORC writer yet."); - + auto emitORC = [&](const FunctionFragment &FF) -> Error { ORCState CurrentState = NullORC; - for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { + for (BinaryBasicBlock *BB : FF) { for (MCInst &Inst : *BB) { ErrorOr ErrorOrState = BC.MIB->tryGetAnnotationAs(Inst, "ORC"); @@ -808,7 +806,36 @@ Error LinuxKernelRewriter::rewriteORCTables() { return Error::success(); }; + // Emit ORC entries for cold fragments. We assume that these fragments are + // emitted contiguously in memory using reserved space in the kernel. This + // assumption is validated in post-emit pass validateORCTables() where we + // check that ORC entries are sorted by their addresses. + auto emitColdORC = [&]() -> Error { + for (BinaryFunction &BF : + llvm::make_second_range(BC.getBinaryFunctions())) { + if (!BC.shouldEmit(BF)) + continue; + for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) + if (Error E = emitORC(FF)) + return E; + } + + return Error::success(); + }; + + bool ShouldEmitCold = !BC.BOLTReserved.empty(); for (ORCListEntry &Entry : ORCEntries) { + if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) { + if (Error E = emitColdORC()) + return E; + + // Emit terminator entry at the end of the reserved region. + if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC)) + return E; + + ShouldEmitCold = false; + } + // Emit original entries for functions that we haven't modified. if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) { // Emit terminator only if it marks the start of a function. @@ -822,7 +849,7 @@ Error LinuxKernelRewriter::rewriteORCTables() { // Emit all ORC entries for a function referenced by an entry and skip over // the rest of entries for this function by resetting its ORC attribute. if (Entry.BF->hasORC()) { - if (Error E = emitORC(*Entry.BF)) + if (Error E = emitORC(Entry.BF->getLayout().getMainFragment())) return E; Entry.BF->setHasORC(false); } @@ -831,10 +858,9 @@ Error LinuxKernelRewriter::rewriteORCTables() { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted << " ORC entries\n"); - // Replicate terminator entry at the end of sections to match the original - // table sizes. - const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second; - const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize(); + // Populate ORC tables with a terminator entry with max address to match the + // original table sizes. + const uint64_t LastIP = std::numeric_limits::max(); while (UnwindWriter.bytesRemaining()) { if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true)) return E;