From 5d207e9f4738b2c49e171a4d9270a294de44ecb4 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Wed, 8 Jan 2025 14:28:43 -0800 Subject: [PATCH 1/9] [SDP]Introduce StaticDataSplitter pass and implemenet jump table splitting --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 6 + llvm/include/llvm/CodeGen/MachineFunction.h | 9 + .../llvm/CodeGen/MachineJumpTableInfo.h | 8 +- llvm/include/llvm/CodeGen/Passes.h | 4 + llvm/include/llvm/InitializePasses.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 1 + llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/CodeGen.cpp | 1 + llvm/lib/CodeGen/MachineBasicBlock.cpp | 4 + llvm/lib/CodeGen/MachineFunction.cpp | 13 ++ llvm/lib/CodeGen/StaticDataSplitter.cpp | 153 ++++++++++++++++ llvm/lib/CodeGen/TargetPassConfig.cpp | 1 + llvm/test/CodeGen/X86/jump-table-partition.ll | 163 ++++++++++++++++++ 13 files changed, 363 insertions(+), 2 deletions(-) create mode 100644 llvm/lib/CodeGen/StaticDataSplitter.cpp create mode 100644 llvm/test/CodeGen/X86/jump-table-partition.ll diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 7fe33c3913f2d..9ed356c2ab233 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -997,6 +997,12 @@ class MachineBasicBlock /// no changes occurred in the meantime. bool canSplitCriticalEdge(const MachineBasicBlock *Succ) const; + /// Return an index for MachineJumpTableInfo if \p this basic block ends with + /// an indirect jump using a jump table, otherwise -1. + /// This function is a thin wrapper and forward calls to the per-target method + /// `TargetInstrInfo::getjumpTableIndex`. + int getJumpTableIndex() const; + void pop_front() { Insts.pop_front(); } void pop_back() { Insts.pop_back(); } void push_back(MachineInstr *MI) { Insts.push_back(MI); } diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index d696add8a1af5..c0f983d1c6787 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -88,6 +88,15 @@ template <> struct ilist_callback_traits { } }; +// The hotness of static data tracked by a MachineFunction and not represented +// as a global object in the module IR / MIR. Typical examples are +// MachineJumpTableInfo and MachineConstantPool. +enum class DataHotness { + Unknown, + Cold, + Hot, +}; + /// MachineFunctionInfo - This class can be derived from and used by targets to /// hold private target-specific information for each MachineFunction. Objects /// of type are accessed/created with MF::getInfo and destroyed when the diff --git a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h index e8e9c2f6338e0..cc1f54a81b9bb 100644 --- a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -28,6 +28,7 @@ namespace llvm { class MachineBasicBlock; class DataLayout; class raw_ostream; +enum class DataHotness; /// MachineJumpTableEntry - One jump table in the jump table info. /// @@ -35,8 +36,9 @@ struct MachineJumpTableEntry { /// MBBs - The vector of basic blocks from which to create the jump table. std::vector MBBs; - explicit MachineJumpTableEntry(const std::vector &M) - : MBBs(M) {} + DataHotness Hotness; + + explicit MachineJumpTableEntry(const std::vector &M); }; class MachineJumpTableInfo { @@ -107,6 +109,8 @@ class MachineJumpTableInfo { return JumpTables; } + void updateJumpTableHotness(size_t JTI, DataHotness Hotness); + /// RemoveJumpTable - Mark the specific index as being dead. This will /// prevent it from being emitted. void RemoveJumpTable(unsigned Idx) { diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index d1fac4a304cff..16423d03ff701 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -71,6 +71,10 @@ namespace llvm { /// using profile information. MachineFunctionPass *createMachineFunctionSplitterPass(); + /// createStaticDataSplitterPass - This pass partions static data sections + /// into a hot and cold section using profile information. + MachineFunctionPass *createStaticDataSplitterPass(); + /// MachineFunctionPrinter pass - This pass prints out the machine function to /// the given stream as a debugging tool. MachineFunctionPass * diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 1cb9013bc48cc..8111afcc1fb20 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -293,6 +293,7 @@ void initializeSpeculativeExecutionLegacyPassPass(PassRegistry &); void initializeSpillPlacementWrapperLegacyPass(PassRegistry &); void initializeStackColoringLegacyPass(PassRegistry &); void initializeStackFrameLayoutAnalysisPassPass(PassRegistry &); +void initializeStaticDataSplitterPass(PassRegistry &); void initializeStackMapLivenessPass(PassRegistry &); void initializeStackProtectorPass(PassRegistry &); void initializeStackSafetyGlobalInfoWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 5a4e79d7225db..f4cddafa00971 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -236,6 +236,7 @@ DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass) DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass) DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass) DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass) +DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter) DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass) DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass) DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinaryMetadata) diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 145fd2fac8b56..88f863d8204d0 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -226,6 +226,7 @@ add_llvm_component_library(LLVMCodeGen StackMaps.cpp StackProtector.cpp StackSlotColoring.cpp + StaticDataSplitter.cpp SwiftErrorValueTracking.cpp SwitchLoweringUtils.cpp TailDuplication.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 8efe540770913..84d92705de022 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -130,6 +130,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackMapLivenessPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); + initializeStaticDataSplitterPass(Registry); initializeStripDebugMachineModulePass(Registry); initializeTailDuplicateLegacyPass(Registry); initializeTargetPassConfigPass(Registry); diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 5ac6472a01e9f..af298c5994fbb 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1426,6 +1426,10 @@ bool MachineBasicBlock::canSplitCriticalEdge( return true; } +int MachineBasicBlock::getJumpTableIndex() const { + return findJumpTableIndex(*this); +} + /// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's /// neighboring instructions so the bundle won't be broken by removing MI. static void unbundleSingleMI(MachineInstr *MI) { diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index e6b9538fe9a02..b5a89f3bcf42f 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1291,6 +1291,10 @@ const unsigned MachineFunction::DebugOperandMemNumber = 1000000; // MachineJumpTableInfo implementation //===----------------------------------------------------------------------===// +MachineJumpTableEntry::MachineJumpTableEntry( + const std::vector &MBBs) + : MBBs(MBBs), Hotness(DataHotness::Unknown) {} + /// Return the size of each entry in the jump table. unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const { // The size of a jump table entry is 4 bytes unless the entry is just the @@ -1340,6 +1344,15 @@ unsigned MachineJumpTableInfo::createJumpTableIndex( return JumpTables.size()-1; } +void MachineJumpTableInfo::updateJumpTableHotness(size_t JTI, + DataHotness Hotness) { + assert(JTI < JumpTables.size() && "Invalid JTI!"); + // Note record the largest hotness is important for mergable data (constant + // pools). Even if jump table instances are not merged, record the largest + // value seen fwiw. + JumpTables[JTI].Hotness = std::max(JumpTables[JTI].Hotness, Hotness); +} + /// If Old is the target of any jump tables, update the jump tables to branch /// to New instead. bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp new file mode 100644 index 0000000000000..14b9c1b3394d2 --- /dev/null +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -0,0 +1,153 @@ +//===- StaticDataSplitter.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass uses profile information to partition static data sections into +// hot and cold ones. It begins to split jump tables based on profile, and +// subsequent patches will handle constant pools and other module internal data. +// +// For the original RFC of this pass please see +// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744. + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/MBFIWrapper.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +#define DEBUG_TYPE "static-data-splitter" + +STATISTIC(NumHotJumpTables, "Number of hot jump tables seen"); +STATISTIC(NumColdJumpTables, "Number of cold jump tables seen"); +STATISTIC(NumUnknownJumpTables, + "Number of jump tables with unknown hotness. Such jump tables will " + "be placed in the hot-suffixed section by default."); + +class StaticDataSplitter : public MachineFunctionPass { + const MachineBranchProbabilityInfo *MBPI = nullptr; + const MachineBlockFrequencyInfo *MBFI = nullptr; + const ProfileSummaryInfo *PSI = nullptr; + + // Returns true iff any jump table is hot-cold categorized. + bool splitJumpTables(MachineFunction &MF); + + // Same as above but works on functions with profile information. + bool splitJumpTablesWithProfiles(MachineFunction &MF, + MachineJumpTableInfo &MJTI); + +public: + static char ID; + + StaticDataSplitter() : MachineFunctionPass(ID) { + initializeStaticDataSplitterPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Static Data Splitter"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { + MBPI = &getAnalysis().getMBPI(); + MBFI = &getAnalysis().getMBFI(); + PSI = &getAnalysis().getPSI(); + + // Split jump tables based on profile information. Subsequent patches will + // handle other data types like constant pools, module-internal data, etc. + return splitJumpTables(MF); +} + +bool StaticDataSplitter::splitJumpTablesWithProfiles( + MachineFunction &MF, MachineJumpTableInfo &MJTI) { + int NumChangedJumpTables = 0; + // Regard a jump table as hot by default. If the source and all of destination + // blocks are cold, regard the jump table as cold. + DataHotness Hotness = DataHotness::Hot; + for (const auto &MBB : MF) { + // IMPORTANT, `getJumpTableIndex` is a thin wrapper around per-target + // interface `TargetInstrInfo::getjumpTableIndex`, and only X86 implements + // it so far. + const int JTI = MBB.getJumpTableIndex(); + // This is not a source block of jump table. + if (JTI == -1) + continue; + + bool AllBlocksCold = true; + + if (!PSI->isColdBlock(&MBB, MBFI)) + AllBlocksCold = false; + + for (const MachineBasicBlock *MBB : MJTI.getJumpTables()[JTI].MBBs) + if (!PSI->isColdBlock(MBB, MBFI)) + AllBlocksCold = false; + + if (AllBlocksCold) { + Hotness = DataHotness::Cold; + ++NumColdJumpTables; + } else { + ++NumHotJumpTables; + } + + MF.getJumpTableInfo()->updateJumpTableHotness(JTI, Hotness); + ++NumChangedJumpTables; + } + return NumChangedJumpTables > 0; +} + +bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) { + MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + if (!MJTI || MJTI->getJumpTables().empty()) + return false; + + // Place jump tables according to block hotness if block counters are + // available. Check function entry count because BFI depends on it to derive + // block counters. + if (PSI && PSI->hasProfileSummary() && MBFI && + MF.getFunction().getEntryCount()) + return splitJumpTablesWithProfiles(MF, *MJTI); + + // Conservatively place all jump tables in the hot-suffixed section if profile + // information for the function is not available, or the target doesn't + // implement `TargetInstrInfo::getJumpTableIndex` yet. + for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) + MF.getJumpTableInfo()->updateJumpTableHotness(JTI, DataHotness::Hot); + + NumUnknownJumpTables += MJTI->getJumpTables().size(); + return true; +} + +char StaticDataSplitter::ID = 0; + +INITIALIZE_PASS_BEGIN(StaticDataSplitter, DEBUG_TYPE, "Split static data", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_END(StaticDataSplitter, DEBUG_TYPE, "Split static data", false, + false) + +MachineFunctionPass *llvm::createStaticDataSplitterPass() { + return new StaticDataSplitter(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index d407e9f0871d4..23929672f11d6 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1256,6 +1256,7 @@ void TargetPassConfig::addMachinePasses() { "performance.\n"; } } + addPass(createStaticDataSplitterPass()); addPass(createMachineFunctionSplitterPass()); } // We run the BasicBlockSections pass if either we need BB sections or BB diff --git a/llvm/test/CodeGen/X86/jump-table-partition.ll b/llvm/test/CodeGen/X86/jump-table-partition.ll new file mode 100644 index 0000000000000..3a8f1395f6b28 --- /dev/null +++ b/llvm/test/CodeGen/X86/jump-table-partition.ll @@ -0,0 +1,163 @@ +;; -stats requires asserts +; requires: asserts + +; RUN: llc -stop-after=block-placement %s -o - | llc --run-pass=static-data-splitter -stats -x mir -o - 2>&1 | FileCheck %s --check-prefix=STAT + +; `func_with_hot_jumptable` contains a hot jump table and `func_with_cold_jumptable` contains a cold one. +; `func_without_entry_count` simulates the functions without profile information (e.g., not instrumented or not profiled), +; it's jump table hotness is unknown and regarded as hot conservatively. +; +; Tests stat messages are expected. +; TODO: Update test to verify section suffixes when target-lowering and assembler changes are implemented. +; +; STAT-DAG: 1 static-data-splitter - Number of cold jump tables seen +; STAT-DAG: 1 static-data-splitter - Number of hot jump tables seen +; STAT-DAG: 1 static-data-splitter - Number of jump tables with unknown hotness + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str.2 = private constant [7 x i8] c"case 3\00" +@.str.3 = private constant [7 x i8] c"case 4\00" +@.str.4 = private constant [7 x i8] c"case 5\00" +@str.9 = private constant [7 x i8] c"case 2\00" +@str.10 = private constant [7 x i8] c"case 1\00" +@str.11 = private constant [8 x i8] c"default\00" + +define i32 @func_with_hot_jumptable(i32 %num) !prof !13 { +entry: + switch i32 %num, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + i32 3, label %sw.bb3 + i32 4, label %sw.bb5 + i32 5, label %sw.bb7 + ], !prof !14 + +sw.bb: ; preds = %entry + %puts11 = tail call i32 @puts(ptr @str.10) + br label %sw.epilog + +sw.bb1: ; preds = %entry + %puts = tail call i32 @puts(ptr @str.9) + br label %sw.epilog + +sw.bb3: ; preds = %entry + %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2) + br label %sw.bb5 + +sw.bb5: ; preds = %entry, %sw.bb3 + %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3) + br label %sw.bb7 + +sw.bb7: ; preds = %entry, %sw.bb5 + %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4) + br label %sw.epilog + +sw.default: ; preds = %entry + %puts12 = tail call i32 @puts(ptr @str.11) + br label %sw.epilog + +sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb + %div = sdiv i32 %num, 3 + ret i32 %div +} + +define void @func_with_cold_jumptable(i32 %num) !prof !15 { +entry: + switch i32 %num, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + i32 3, label %sw.bb3 + i32 4, label %sw.bb5 + i32 5, label %sw.bb7 + ], !prof !16 + +sw.bb: ; preds = %entry + %puts10 = tail call i32 @puts(ptr @str.10) + br label %sw.epilog + +sw.bb1: ; preds = %entry + %puts = tail call i32 @puts(ptr @str.9) + br label %sw.epilog + +sw.bb3: ; preds = %entry + %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2) + br label %sw.bb5 + +sw.bb5: ; preds = %entry, %sw.bb3 + %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3) + br label %sw.bb7 + +sw.bb7: ; preds = %entry, %sw.bb5 + %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4) + br label %sw.epilog + +sw.default: ; preds = %entry + %puts11 = tail call i32 @puts(ptr @str.11) + br label %sw.epilog + +sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb + ret void +} + +define void @func_without_entry_count(i32 %num) { +entry: + switch i32 %num, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + i32 3, label %sw.bb3 + i32 4, label %sw.bb5 + i32 5, label %sw.bb7 + ] + +sw.bb: ; preds = %entry + %puts10 = tail call i32 @puts(ptr @str.10) + br label %sw.epilog + +sw.bb1: ; preds = %entry + %puts = tail call i32 @puts(ptr @str.9) + br label %sw.epilog + +sw.bb3: ; preds = %entry + %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2) + br label %sw.bb5 + +sw.bb5: ; preds = %entry, %sw.bb3 + %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3) + br label %sw.bb7 + +sw.bb7: ; preds = %entry, %sw.bb5 + %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4) + br label %sw.epilog + +sw.default: ; preds = %entry + %puts11 = tail call i32 @puts(ptr @str.11) + br label %sw.epilog + +sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb + ret void +} + +declare i32 @puts(ptr) +declare i32 @printf(ptr, ...) + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 230002} +!4 = !{!"MaxCount", i64 100000} +!5 = !{!"MaxInternalCount", i64 50000} +!6 = !{!"MaxFunctionCount", i64 100000} +!7 = !{!"NumCounts", i64 14} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12} +!11 = !{i32 990000, i64 10000, i32 7} +!12 = !{i32 999999, i64 1, i32 9} +!13 = !{!"function_entry_count", i64 100000} +!14 = !{!"branch_weights", i32 50000, i32 10000, i32 10000, i32 10000, i32 10000, i32 10000} +!15 = !{!"function_entry_count", i64 1} +!16 = !{!"branch_weights", i32 1, i32 0, i32 0, i32 0, i32 0, i32 0} From 34b6b9b45564d994844cc9610edddf026a0e49cc Mon Sep 17 00:00:00 2001 From: mingmingl Date: Thu, 9 Jan 2025 11:09:45 -0800 Subject: [PATCH 2/9] Flag-gate the new pass and resolve review feedback --- llvm/lib/CodeGen/StaticDataSplitter.cpp | 11 ++++++----- llvm/lib/CodeGen/TargetPassConfig.cpp | 8 +++++++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 14b9c1b3394d2..482a61027cf98 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -83,7 +83,10 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles( MachineFunction &MF, MachineJumpTableInfo &MJTI) { int NumChangedJumpTables = 0; // Regard a jump table as hot by default. If the source and all of destination - // blocks are cold, regard the jump table as cold. + // blocks are cold, regard the jump table as cold. While a destination block + // does not read a jump table (unless it's also a source block), a hot + // destination heuristically makes its jump table hot to accommodate for + // potential profile data skews (from sampled profiles, for example). DataHotness Hotness = DataHotness::Hot; for (const auto &MBB : MF) { // IMPORTANT, `getJumpTableIndex` is a thin wrapper around per-target @@ -121,11 +124,9 @@ bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) { if (!MJTI || MJTI->getJumpTables().empty()) return false; - // Place jump tables according to block hotness if block counters are - // available. Check function entry count because BFI depends on it to derive - // block counters. + // Place jump tables according to block hotness if function has profile data. if (PSI && PSI->hasProfileSummary() && MBFI && - MF.getFunction().getEntryCount()) + MF.getFunction().hasProfileData()) return splitJumpTablesWithProfiles(MF, *MJTI); // Conservatively place all jump tables in the hot-suffixed section if profile diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 23929672f11d6..6a964c0910fc6 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -263,6 +263,11 @@ static cl::opt GCEmptyBlocks("gc-empty-basic-blocks", cl::init(false), cl::Hidden, cl::desc("Enable garbage-collecting empty basic blocks")); +static cl::opt + SplitStaticData("split-static-data", cl::Hidden, cl::init(false), + cl::desc("Split static data sections into hot and cold " + "section ones using profile information")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -1256,8 +1261,9 @@ void TargetPassConfig::addMachinePasses() { "performance.\n"; } } - addPass(createStaticDataSplitterPass()); addPass(createMachineFunctionSplitterPass()); + if (SplitStaticData) + addPass(createStaticDataSplitterPass()); } // We run the BasicBlockSections pass if either we need BB sections or BB // address map (or both). From 1bacc51a5ae03ca9bda60dda4a63de9944d62950 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Fri, 10 Jan 2025 17:11:29 -0800 Subject: [PATCH 3/9] Apply suggestions from code review Co-authored-by: Ellis Hoag --- llvm/lib/CodeGen/MachineFunction.cpp | 2 +- llvm/lib/CodeGen/StaticDataSplitter.cpp | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index b5a89f3bcf42f..117bd988a40af 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1347,7 +1347,7 @@ unsigned MachineJumpTableInfo::createJumpTableIndex( void MachineJumpTableInfo::updateJumpTableHotness(size_t JTI, DataHotness Hotness) { assert(JTI < JumpTables.size() && "Invalid JTI!"); - // Note record the largest hotness is important for mergable data (constant + // Note: recording the largest hotness is important for mergable data (constant // pools). Even if jump table instances are not merged, record the largest // value seen fwiw. JumpTables[JTI].Hotness = std::max(JumpTables[JTI].Hotness, Hotness); diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 482a61027cf98..55c4b20b76102 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -97,10 +97,7 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles( if (JTI == -1) continue; - bool AllBlocksCold = true; - - if (!PSI->isColdBlock(&MBB, MBFI)) - AllBlocksCold = false; + bool AllBlocksCold = PSI->isColdBlock(&MBB, MBFI); for (const MachineBasicBlock *MBB : MJTI.getJumpTables()[JTI].MBBs) if (!PSI->isColdBlock(MBB, MBFI)) From 8a85d1aa36e22619231c5e079f369c499dd67f6a Mon Sep 17 00:00:00 2001 From: mingmingl Date: Fri, 10 Jan 2025 17:17:14 -0800 Subject: [PATCH 4/9] resolve review feedback --- llvm/include/llvm/CodeGen/MachineFunction.h | 2 +- .../llvm/CodeGen/MachineJumpTableInfo.h | 9 +- llvm/include/llvm/CodeGen/Passes.h | 2 +- llvm/lib/CodeGen/MachineFunction.cpp | 14 +- llvm/lib/CodeGen/StaticDataSplitter.cpp | 88 +++++--- llvm/test/CodeGen/X86/jump-table-partition.ll | 212 ++++++++++-------- 6 files changed, 184 insertions(+), 143 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index c0f983d1c6787..dcdbcaec168d2 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -91,7 +91,7 @@ template <> struct ilist_callback_traits { // The hotness of static data tracked by a MachineFunction and not represented // as a global object in the module IR / MIR. Typical examples are // MachineJumpTableInfo and MachineConstantPool. -enum class DataHotness { +enum class MachineFunctionDataHotness { Unknown, Cold, Hot, diff --git a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h index cc1f54a81b9bb..e3675d6489b35 100644 --- a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -28,7 +28,7 @@ namespace llvm { class MachineBasicBlock; class DataLayout; class raw_ostream; -enum class DataHotness; +enum class MachineFunctionDataHotness; /// MachineJumpTableEntry - One jump table in the jump table info. /// @@ -36,7 +36,7 @@ struct MachineJumpTableEntry { /// MBBs - The vector of basic blocks from which to create the jump table. std::vector MBBs; - DataHotness Hotness; + MachineFunctionDataHotness Hotness; explicit MachineJumpTableEntry(const std::vector &M); }; @@ -109,7 +109,10 @@ class MachineJumpTableInfo { return JumpTables; } - void updateJumpTableHotness(size_t JTI, DataHotness Hotness); + // Update machine jump table entry's hotness. Return true if the hotness is + // updated. + bool updateJumpTableEntryHotness(size_t JTI, + MachineFunctionDataHotness Hotness); /// RemoveJumpTable - Mark the specific index as being dead. This will /// prevent it from being emitted. diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 16423d03ff701..b5d2a7e6bf035 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -71,7 +71,7 @@ namespace llvm { /// using profile information. MachineFunctionPass *createMachineFunctionSplitterPass(); - /// createStaticDataSplitterPass - This pass partions static data sections + /// createStaticDataSplitterPass - This pass partitions a static data section /// into a hot and cold section using profile information. MachineFunctionPass *createStaticDataSplitterPass(); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 117bd988a40af..d09e93d79aae6 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1293,7 +1293,7 @@ const unsigned MachineFunction::DebugOperandMemNumber = 1000000; MachineJumpTableEntry::MachineJumpTableEntry( const std::vector &MBBs) - : MBBs(MBBs), Hotness(DataHotness::Unknown) {} + : MBBs(MBBs), Hotness(MachineFunctionDataHotness::Unknown) {} /// Return the size of each entry in the jump table. unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const { @@ -1344,13 +1344,17 @@ unsigned MachineJumpTableInfo::createJumpTableIndex( return JumpTables.size()-1; } -void MachineJumpTableInfo::updateJumpTableHotness(size_t JTI, - DataHotness Hotness) { +bool MachineJumpTableInfo::updateJumpTableEntryHotness( + size_t JTI, MachineFunctionDataHotness Hotness) { assert(JTI < JumpTables.size() && "Invalid JTI!"); - // Note: recording the largest hotness is important for mergable data (constant + // Note record the largest hotness is important for mergable data (constant // pools). Even if jump table instances are not merged, record the largest // value seen fwiw. - JumpTables[JTI].Hotness = std::max(JumpTables[JTI].Hotness, Hotness); + if (Hotness <= JumpTables[JTI].Hotness) + return false; + + JumpTables[JTI].Hotness = Hotness; + return true; } /// If Old is the target of any jump tables, update the jump tables to branch diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 55c4b20b76102..82673e851a681 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -6,13 +6,16 @@ // //===----------------------------------------------------------------------===// // -// This pass uses profile information to partition static data sections into -// hot and cold ones. It begins to split jump tables based on profile, and -// subsequent patches will handle constant pools and other module internal data. +// The pass uses branch profile data to assign hotness based section qualifiers +// for the following types of static data: +// - Jump tables +// - Constant pools (TODO) +// - Other module-internal data (TODO) // // For the original RFC of this pass please see -// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744. +// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744 +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MBFIWrapper.h" @@ -35,8 +38,16 @@ using namespace llvm; STATISTIC(NumHotJumpTables, "Number of hot jump tables seen"); STATISTIC(NumColdJumpTables, "Number of cold jump tables seen"); STATISTIC(NumUnknownJumpTables, - "Number of jump tables with unknown hotness. Such jump tables will " - "be placed in the hot-suffixed section by default."); + "Number of jump tables with unknown hotness. Option " + "-static-data-default-hotness specifies the hotness."); + +static cl::opt StaticDataDefaultHotness( + "static-data-default-hotness", cl::Hidden, + cl::desc("This option specifies the hotness of static data when profile " + "information is unavailable"), + cl::init(MachineFunctionDataHotness::Hot), + cl::values(clEnumValN(MachineFunctionDataHotness::Hot, "hot", "Hot"), + clEnumValN(MachineFunctionDataHotness::Cold, "cold", "Cold"))); class StaticDataSplitter : public MachineFunctionPass { const MachineBranchProbabilityInfo *MBPI = nullptr; @@ -74,20 +85,13 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { MBFI = &getAnalysis().getMBFI(); PSI = &getAnalysis().getPSI(); - // Split jump tables based on profile information. Subsequent patches will - // handle other data types like constant pools, module-internal data, etc. return splitJumpTables(MF); } bool StaticDataSplitter::splitJumpTablesWithProfiles( MachineFunction &MF, MachineJumpTableInfo &MJTI) { int NumChangedJumpTables = 0; - // Regard a jump table as hot by default. If the source and all of destination - // blocks are cold, regard the jump table as cold. While a destination block - // does not read a jump table (unless it's also a source block), a hot - // destination heuristically makes its jump table hot to accommodate for - // potential profile data skews (from sampled profiles, for example). - DataHotness Hotness = DataHotness::Hot; + for (const auto &MBB : MF) { // IMPORTANT, `getJumpTableIndex` is a thin wrapper around per-target // interface `TargetInstrInfo::getjumpTableIndex`, and only X86 implements @@ -97,21 +101,14 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles( if (JTI == -1) continue; - bool AllBlocksCold = PSI->isColdBlock(&MBB, MBFI); + auto Hotness = MachineFunctionDataHotness::Hot; - for (const MachineBasicBlock *MBB : MJTI.getJumpTables()[JTI].MBBs) - if (!PSI->isColdBlock(MBB, MBFI)) - AllBlocksCold = false; + // Hotness is based on source basic block hotness. + if (PSI->isColdBlock(&MBB, MBFI)) + Hotness = MachineFunctionDataHotness::Cold; - if (AllBlocksCold) { - Hotness = DataHotness::Cold; - ++NumColdJumpTables; - } else { - ++NumHotJumpTables; - } - - MF.getJumpTableInfo()->updateJumpTableHotness(JTI, Hotness); - ++NumChangedJumpTables; + if (MF.getJumpTableInfo()->updateJumpTableEntryHotness(JTI, Hotness)) + ++NumChangedJumpTables; } return NumChangedJumpTables > 0; } @@ -121,18 +118,41 @@ bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) { if (!MJTI || MJTI->getJumpTables().empty()) return false; + const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI && + MF.getFunction().hasProfileData(); + auto statOnExit = llvm::make_scope_exit([&] { + if (!AreStatisticsEnabled()) + return; + + if (!ProfileAvailable) { + NumUnknownJumpTables += MJTI->getJumpTables().size(); + return; + } + + for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) { + auto Hotness = MJTI->getJumpTables()[JTI].Hotness; + if (Hotness == MachineFunctionDataHotness::Hot) + NumHotJumpTables++; + else { + assert(Hotness == MachineFunctionDataHotness::Cold && + "A jump table is either hot or cold when profile information is " + "available."); + NumColdJumpTables++; + } + } + }); + // Place jump tables according to block hotness if function has profile data. - if (PSI && PSI->hasProfileSummary() && MBFI && - MF.getFunction().hasProfileData()) + if (ProfileAvailable) return splitJumpTablesWithProfiles(MF, *MJTI); - // Conservatively place all jump tables in the hot-suffixed section if profile - // information for the function is not available, or the target doesn't - // implement `TargetInstrInfo::getJumpTableIndex` yet. + // If function profile is unavailable (e.g., module not instrumented, or new + // code paths lacking samples), -static-data-default-hotness specifies the + // hotness. for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) - MF.getJumpTableInfo()->updateJumpTableHotness(JTI, DataHotness::Hot); + MF.getJumpTableInfo()->updateJumpTableEntryHotness( + JTI, StaticDataDefaultHotness); - NumUnknownJumpTables += MJTI->getJumpTables().size(); return true; } diff --git a/llvm/test/CodeGen/X86/jump-table-partition.ll b/llvm/test/CodeGen/X86/jump-table-partition.ll index 3a8f1395f6b28..1b717451a86e2 100644 --- a/llvm/test/CodeGen/X86/jump-table-partition.ll +++ b/llvm/test/CodeGen/X86/jump-table-partition.ll @@ -1,146 +1,160 @@ -;; -stats requires asserts +; -stats requires asserts ; requires: asserts -; RUN: llc -stop-after=block-placement %s -o - | llc --run-pass=static-data-splitter -stats -x mir -o - 2>&1 | FileCheck %s --check-prefix=STAT +; Stop after 'finalize-isel' for simpler MIR, and lower the minimum number of +; jump table entries so 'switch' needs fewer cases to generate a jump table. +; RUN: llc -stop-after=finalize-isel -min-jump-table-entries=2 %s -o %t.mir +; RUN: llc --run-pass=static-data-splitter -stats -x mir %t.mir -o - 2>&1 | FileCheck %s --check-prefix=STAT -; `func_with_hot_jumptable` contains a hot jump table and `func_with_cold_jumptable` contains a cold one. -; `func_without_entry_count` simulates the functions without profile information (e.g., not instrumented or not profiled), -; it's jump table hotness is unknown and regarded as hot conservatively. -; ; Tests stat messages are expected. ; TODO: Update test to verify section suffixes when target-lowering and assembler changes are implemented. -; -; STAT-DAG: 1 static-data-splitter - Number of cold jump tables seen -; STAT-DAG: 1 static-data-splitter - Number of hot jump tables seen +; TODO: Also run static-data-splitter pass with -static-data-default-hotness=cold and check data section suffix. + +; STAT-DAG: 2 static-data-splitter - Number of cold jump tables seen +; STAT-DAG: 2 static-data-splitter - Number of hot jump tables seen ; STAT-DAG: 1 static-data-splitter - Number of jump tables with unknown hotness +; In function @foo, the 2 switch instructions to jt0.* and jt2.* get lowered to hot jump tables, +; and the 2 switch instructions to jt1.* and jt3.* get lowered to cold jump tables. + +; @func_without_profile doesn't have profiles. It's jump table hotness is unknown. + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -@.str.2 = private constant [7 x i8] c"case 3\00" -@.str.3 = private constant [7 x i8] c"case 4\00" -@.str.4 = private constant [7 x i8] c"case 5\00" -@str.9 = private constant [7 x i8] c"case 2\00" -@str.10 = private constant [7 x i8] c"case 1\00" -@str.11 = private constant [8 x i8] c"default\00" +@str.9 = private constant [7 x i8] c".str.9\00" +@str.10 = private constant [8 x i8] c".str.10\00" +@str.11 = private constant [8 x i8] c".str.11\00" -define i32 @func_with_hot_jumptable(i32 %num) !prof !13 { +@case2 = private constant [7 x i8] c"case 2\00" +@case1 = private constant [7 x i8] c"case 1\00" +@default = private constant [8 x i8] c"default\00" +@jt3 = private constant [4 x i8] c"jt3\00" + +define i32 @foo(i32 %num) !prof !13 { entry: - switch i32 %num, label %sw.default [ - i32 1, label %sw.bb - i32 2, label %sw.bb1 - i32 3, label %sw.bb3 - i32 4, label %sw.bb5 - i32 5, label %sw.bb7 + %mod3 = sdiv i32 %num, 3 + switch i32 %mod3, label %jt0.default [ + i32 1, label %jt0.bb1 + i32 2, label %jt0.bb2 ], !prof !14 -sw.bb: ; preds = %entry - %puts11 = tail call i32 @puts(ptr @str.10) - br label %sw.epilog +jt0.bb1: + call i32 @puts(ptr @case1) + br label %jt0.epilog -sw.bb1: ; preds = %entry - %puts = tail call i32 @puts(ptr @str.9) - br label %sw.epilog +jt0.bb2: + call i32 @puts(ptr @case2) + br label %jt0.epilog -sw.bb3: ; preds = %entry - %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2) - br label %sw.bb5 +jt0.default: + call i32 @puts(ptr @default) + br label %jt0.epilog -sw.bb5: ; preds = %entry, %sw.bb3 - %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3) - br label %sw.bb7 +jt0.epilog: + %zero = icmp eq i32 %num, 0 + br i1 %zero, label %hot, label %cold, !prof !15 -sw.bb7: ; preds = %entry, %sw.bb5 - %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4) - br label %sw.epilog +hot: + %c2 = call i32 @transform(i32 %num) + switch i32 %c2, label %jt2.default [ + i32 1, label %jt2.bb1 + i32 2, label %jt2.bb2 + ], !prof !14 -sw.default: ; preds = %entry - %puts12 = tail call i32 @puts(ptr @str.11) - br label %sw.epilog +jt2.bb1: + call i32 @puts(ptr @case1) + br label %jt1.epilog -sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb - %div = sdiv i32 %num, 3 - ret i32 %div -} +jt2.bb2: + call i32 @puts(ptr @case2) + br label %jt1.epilog -define void @func_with_cold_jumptable(i32 %num) !prof !15 { -entry: - switch i32 %num, label %sw.default [ - i32 1, label %sw.bb - i32 2, label %sw.bb1 - i32 3, label %sw.bb3 - i32 4, label %sw.bb5 - i32 5, label %sw.bb7 - ], !prof !16 +jt2.default: + call i32 @puts(ptr @default) + br label %jt2.epilog -sw.bb: ; preds = %entry - %puts10 = tail call i32 @puts(ptr @str.10) - br label %sw.epilog +jt2.epilog: + %c2cmp = icmp ne i32 %c2, 0 + br i1 %c2cmp, label %return, label %jt3.prologue, !prof !16 -sw.bb1: ; preds = %entry - %puts = tail call i32 @puts(ptr @str.9) - br label %sw.epilog +cold: + %c1 = call i32 @compute(i32 %num) + switch i32 %c1, label %jt1.default [ + i32 1, label %jt1.bb1 + i32 2, label %jt1.bb2 + ], !prof !14 -sw.bb3: ; preds = %entry - %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2) - br label %sw.bb5 +jt1.bb1: + call i32 @puts(ptr @case1) + br label %jt1.epilog -sw.bb5: ; preds = %entry, %sw.bb3 - %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3) - br label %sw.bb7 +jt1.bb2: + call i32 @puts(ptr @case2) + br label %jt1.epilog -sw.bb7: ; preds = %entry, %sw.bb5 - %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4) - br label %sw.epilog +jt1.default: + call i32 @puts(ptr @default) + br label %jt1.epilog -sw.default: ; preds = %entry - %puts11 = tail call i32 @puts(ptr @str.11) - br label %sw.epilog +jt1.epilog: + br label %return -sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb - ret void +jt3.prologue: + %c3 = call i32 @cleanup(i32 %num) + switch i32 %c3, label %jt3.default [ + i32 1, label %jt3.bb1 + i32 2, label %jt3.bb2 + ], !prof !14 + +jt3.bb1: + call i32 @puts(ptr @case1) + br label %jt3.epilog + +jt3.bb2: + call i32 @puts(ptr @case2) + br label %jt3.epilog + +jt3.default: + call i32 @puts(ptr @default) + br label %jt3.epilog + +jt3.epilog: + call i32 @puts(ptr @jt3) + br label %return + +return: + ret i32 %mod3 } -define void @func_without_entry_count(i32 %num) { +define void @func_without_profile(i32 %num) { entry: switch i32 %num, label %sw.default [ i32 1, label %sw.bb i32 2, label %sw.bb1 - i32 3, label %sw.bb3 - i32 4, label %sw.bb5 - i32 5, label %sw.bb7 ] -sw.bb: ; preds = %entry - %puts10 = tail call i32 @puts(ptr @str.10) +sw.bb: + call i32 @puts(ptr @str.10) br label %sw.epilog -sw.bb1: ; preds = %entry - %puts = tail call i32 @puts(ptr @str.9) - br label %sw.epilog - -sw.bb3: ; preds = %entry - %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2) - br label %sw.bb5 - -sw.bb5: ; preds = %entry, %sw.bb3 - %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3) - br label %sw.bb7 - -sw.bb7: ; preds = %entry, %sw.bb5 - %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4) +sw.bb1: + call i32 @puts(ptr @str.9) br label %sw.epilog -sw.default: ; preds = %entry - %puts11 = tail call i32 @puts(ptr @str.11) +sw.default: + call i32 @puts(ptr @str.11) br label %sw.epilog -sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb +sw.epilog: ret void } declare i32 @puts(ptr) declare i32 @printf(ptr, ...) +declare i32 @compute(i32) +declare i32 @transform(i32) +declare i32 @cleanup(i32) !llvm.module.flags = !{!0} @@ -158,6 +172,6 @@ declare i32 @printf(ptr, ...) !11 = !{i32 990000, i64 10000, i32 7} !12 = !{i32 999999, i64 1, i32 9} !13 = !{!"function_entry_count", i64 100000} -!14 = !{!"branch_weights", i32 50000, i32 10000, i32 10000, i32 10000, i32 10000, i32 10000} -!15 = !{!"function_entry_count", i64 1} -!16 = !{!"branch_weights", i32 1, i32 0, i32 0, i32 0, i32 0, i32 0} +!14 = !{!"branch_weights", i32 60000, i32 20000, i32 20000} +!15 = !{!"branch_weights", i32 1, i32 99999} +!16 = !{!"branch_weights", i32 99998, i32 1} From e54dacbbbf94538674868b7a8ae1a86dccac44fb Mon Sep 17 00:00:00 2001 From: mingmingl Date: Sun, 12 Jan 2025 22:20:27 -0800 Subject: [PATCH 5/9] Discover jump table by calling MachineOperand::isJTI() --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 6 --- llvm/lib/CodeGen/MachineBasicBlock.cpp | 4 -- llvm/lib/CodeGen/StaticDataSplitter.cpp | 40 +++++++++++-------- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 9ed356c2ab233..7fe33c3913f2d 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -997,12 +997,6 @@ class MachineBasicBlock /// no changes occurred in the meantime. bool canSplitCriticalEdge(const MachineBasicBlock *Succ) const; - /// Return an index for MachineJumpTableInfo if \p this basic block ends with - /// an indirect jump using a jump table, otherwise -1. - /// This function is a thin wrapper and forward calls to the per-target method - /// `TargetInstrInfo::getjumpTableIndex`. - int getJumpTableIndex() const; - void pop_front() { Insts.pop_front(); } void pop_back() { Insts.pop_back(); } void push_back(MachineInstr *MI) { Insts.push_back(MI); } diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index af298c5994fbb..5ac6472a01e9f 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1426,10 +1426,6 @@ bool MachineBasicBlock::canSplitCriticalEdge( return true; } -int MachineBasicBlock::getJumpTableIndex() const { - return findJumpTableIndex(*this); -} - /// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's /// neighboring instructions so the bundle won't be broken by removing MI. static void unbundleSingleMI(MachineInstr *MI) { diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 82673e851a681..2277a15ea6e3e 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -92,23 +92,31 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles( MachineFunction &MF, MachineJumpTableInfo &MJTI) { int NumChangedJumpTables = 0; + // Jump table could be used by either terminating instructions or + // non-terminating ones, so we walk all instructions and use + // `MachineOperand::isJTI()` to identify jump table operands. + // Similarly, `MachineOperand::isCPI()` can identify constant pool usages + // in the same loop. for (const auto &MBB : MF) { - // IMPORTANT, `getJumpTableIndex` is a thin wrapper around per-target - // interface `TargetInstrInfo::getjumpTableIndex`, and only X86 implements - // it so far. - const int JTI = MBB.getJumpTableIndex(); - // This is not a source block of jump table. - if (JTI == -1) - continue; - - auto Hotness = MachineFunctionDataHotness::Hot; - - // Hotness is based on source basic block hotness. - if (PSI->isColdBlock(&MBB, MBFI)) - Hotness = MachineFunctionDataHotness::Cold; - - if (MF.getJumpTableInfo()->updateJumpTableEntryHotness(JTI, Hotness)) - ++NumChangedJumpTables; + for (const MachineInstr &I : MBB) { + for (const MachineOperand &Op : I.operands()) { + if (!Op.isJTI()) + continue; + const int JTI = Op.getIndex(); + // This is not a source block of jump table. + if (JTI == -1) + continue; + + auto Hotness = MachineFunctionDataHotness::Hot; + + // Hotness is based on source basic block hotness. + if (PSI->isColdBlock(&MBB, MBFI)) + Hotness = MachineFunctionDataHotness::Cold; + + if (MF.getJumpTableInfo()->updateJumpTableEntryHotness(JTI, Hotness)) + ++NumChangedJumpTables; + } + } } return NumChangedJumpTables > 0; } From 366bbbfc88bc0a0a32e434ff0f4b13ca9444cef3 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Thu, 16 Jan 2025 16:40:41 -0800 Subject: [PATCH 6/9] resolve comments --- llvm/lib/CodeGen/StaticDataSplitter.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 2277a15ea6e3e..2fd77e99229c6 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -110,6 +110,8 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles( auto Hotness = MachineFunctionDataHotness::Hot; // Hotness is based on source basic block hotness. + // TODO: PSI APIs are about instruction hotness. Introduce API for data + // access hotness. if (PSI->isColdBlock(&MBB, MBFI)) Hotness = MachineFunctionDataHotness::Cold; From c8c122ce598cff17053a1ccada51b546914c0b73 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Tue, 21 Jan 2025 10:28:57 -0800 Subject: [PATCH 7/9] resolve comments --- llvm/lib/CodeGen/StaticDataSplitter.cpp | 2 +- llvm/test/CodeGen/X86/jump-table-partition.ll | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 2fd77e99229c6..5a192b22b1dab 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -115,7 +115,7 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles( if (PSI->isColdBlock(&MBB, MBFI)) Hotness = MachineFunctionDataHotness::Cold; - if (MF.getJumpTableInfo()->updateJumpTableEntryHotness(JTI, Hotness)) + if (MJTI.updateJumpTableEntryHotness(JTI, Hotness)) ++NumChangedJumpTables; } } diff --git a/llvm/test/CodeGen/X86/jump-table-partition.ll b/llvm/test/CodeGen/X86/jump-table-partition.ll index 1b717451a86e2..681135a5fb2bd 100644 --- a/llvm/test/CodeGen/X86/jump-table-partition.ll +++ b/llvm/test/CodeGen/X86/jump-table-partition.ll @@ -14,8 +14,8 @@ ; STAT-DAG: 2 static-data-splitter - Number of hot jump tables seen ; STAT-DAG: 1 static-data-splitter - Number of jump tables with unknown hotness -; In function @foo, the 2 switch instructions to jt0.* and jt2.* get lowered to hot jump tables, -; and the 2 switch instructions to jt1.* and jt3.* get lowered to cold jump tables. +; In function @foo, the 2 switch instructions to jt0.* and jt1.* get lowered to hot jump tables, +; and the 2 switch instructions to jt2.* and jt3.* get lowered to cold jump tables. ; @func_without_profile doesn't have profiles. It's jump table hotness is unknown. @@ -53,9 +53,9 @@ jt0.default: jt0.epilog: %zero = icmp eq i32 %num, 0 - br i1 %zero, label %hot, label %cold, !prof !15 + br i1 %zero, label %cold, label %hot, !prof !15 -hot: +cold: %c2 = call i32 @transform(i32 %num) switch i32 %c2, label %jt2.default [ i32 1, label %jt2.bb1 @@ -78,7 +78,7 @@ jt2.epilog: %c2cmp = icmp ne i32 %c2, 0 br i1 %c2cmp, label %return, label %jt3.prologue, !prof !16 -cold: +hot: %c1 = call i32 @compute(i32 %num) switch i32 %c1, label %jt1.default [ i32 1, label %jt1.bb1 From e5e0cee20c24c8a926d083b1aae6800db1b18459 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Tue, 21 Jan 2025 13:07:23 -0800 Subject: [PATCH 8/9] Apply suggestions from code review Co-authored-by: Ellis Hoag --- llvm/lib/CodeGen/StaticDataSplitter.cpp | 8 ++++---- llvm/lib/CodeGen/TargetPassConfig.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 5a192b22b1dab..fc5e2d10599be 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -141,13 +141,13 @@ bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) { for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) { auto Hotness = MJTI->getJumpTables()[JTI].Hotness; - if (Hotness == MachineFunctionDataHotness::Hot) - NumHotJumpTables++; - else { + if (Hotness == MachineFunctionDataHotness::Hot) { + ++NumHotJumpTables; + } else { assert(Hotness == MachineFunctionDataHotness::Cold && "A jump table is either hot or cold when profile information is " "available."); - NumColdJumpTables++; + ++NumColdJumpTables; } } }); diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 6a964c0910fc6..bca1eb07deb13 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -266,7 +266,7 @@ static cl::opt static cl::opt SplitStaticData("split-static-data", cl::Hidden, cl::init(false), cl::desc("Split static data sections into hot and cold " - "section ones using profile information")); + "sections using profile information")); /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. From 89c80adf9348a06185c185bdd2763eb6d39b78f2 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Wed, 22 Jan 2025 17:56:08 -0800 Subject: [PATCH 9/9] resolve comments --- llvm/include/llvm/CodeGen/MachineJumpTableInfo.h | 2 ++ llvm/lib/CodeGen/MachineFunction.cpp | 4 +--- llvm/lib/CodeGen/StaticDataSplitter.cpp | 4 ++-- llvm/test/CodeGen/X86/jump-table-partition.ll | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h index e3675d6489b35..56ecbe22ff6dd 100644 --- a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -36,6 +36,8 @@ struct MachineJumpTableEntry { /// MBBs - The vector of basic blocks from which to create the jump table. std::vector MBBs; + /// The hotness of MJTE is inferred from the hotness of the source basic + /// block(s) that reference it. MachineFunctionDataHotness Hotness; explicit MachineJumpTableEntry(const std::vector &M); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index d09e93d79aae6..faff2eca5080c 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1347,9 +1347,7 @@ unsigned MachineJumpTableInfo::createJumpTableIndex( bool MachineJumpTableInfo::updateJumpTableEntryHotness( size_t JTI, MachineFunctionDataHotness Hotness) { assert(JTI < JumpTables.size() && "Invalid JTI!"); - // Note record the largest hotness is important for mergable data (constant - // pools). Even if jump table instances are not merged, record the largest - // value seen fwiw. + // Record the largest hotness value. if (Hotness <= JumpTables[JTI].Hotness) return false; diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index fc5e2d10599be..25f02fde8a4b8 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -58,7 +58,7 @@ class StaticDataSplitter : public MachineFunctionPass { bool splitJumpTables(MachineFunction &MF); // Same as above but works on functions with profile information. - bool splitJumpTablesWithProfiles(MachineFunction &MF, + bool splitJumpTablesWithProfiles(const MachineFunction &MF, MachineJumpTableInfo &MJTI); public: @@ -89,7 +89,7 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { } bool StaticDataSplitter::splitJumpTablesWithProfiles( - MachineFunction &MF, MachineJumpTableInfo &MJTI) { + const MachineFunction &MF, MachineJumpTableInfo &MJTI) { int NumChangedJumpTables = 0; // Jump table could be used by either terminating instructions or diff --git a/llvm/test/CodeGen/X86/jump-table-partition.ll b/llvm/test/CodeGen/X86/jump-table-partition.ll index 681135a5fb2bd..c85338de0c3d4 100644 --- a/llvm/test/CodeGen/X86/jump-table-partition.ll +++ b/llvm/test/CodeGen/X86/jump-table-partition.ll @@ -7,8 +7,8 @@ ; RUN: llc --run-pass=static-data-splitter -stats -x mir %t.mir -o - 2>&1 | FileCheck %s --check-prefix=STAT ; Tests stat messages are expected. -; TODO: Update test to verify section suffixes when target-lowering and assembler changes are implemented. -; TODO: Also run static-data-splitter pass with -static-data-default-hotness=cold and check data section suffix. +; COM: Update test to verify section suffixes when target-lowering and assembler changes are implemented. +; COM: Also run static-data-splitter pass with -static-data-default-hotness=cold and check data section suffix. ; STAT-DAG: 2 static-data-splitter - Number of cold jump tables seen ; STAT-DAG: 2 static-data-splitter - Number of hot jump tables seen