diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 182d5ff049c37..3a1eae3311bd7 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -75,6 +75,14 @@ enum IndirectCallPromotionType : char { ICP_ALL /// Perform ICP on calls and jump tables. }; +/// Hash functions supported for BF/BB hashing. +enum class HashFunction : char { + StdHash, /// std::hash, implementation is platform-dependent. Provided for + /// backwards compatibility. + XXH3, /// llvm::xxh3_64bits, the default. + Default = XXH3, +}; + /// Information on a single indirect call to a particular callee. struct IndirectCallProfile { MCSymbol *Symbol; @@ -2234,18 +2242,21 @@ class BinaryFunction { /// /// If \p UseDFS is set, process basic blocks in DFS order. Otherwise, use /// the existing layout order. + /// \p HashFunction specifies which function is used for BF hashing. /// /// By default, instruction operands are ignored while calculating the hash. /// The caller can change this via passing \p OperandHashFunc function. /// The return result of this function will be mixed with internal hash. size_t computeHash( - bool UseDFS = false, + bool UseDFS = false, HashFunction HashFunction = HashFunction::Default, OperandHashFuncTy OperandHashFunc = [](const MCOperand &) { return std::string(); }) const; /// Compute hash values for each block of the function. - void computeBlockHashes() const; + /// \p HashFunction specifies which function is used for BB hashing. + void + computeBlockHashes(HashFunction HashFunction = HashFunction::Default) const; void setDWARFUnit(DWARFUnit *Unit) { DwarfUnit = Unit; } diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h index 2218a167a74ec..548b528ae2d65 100644 --- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h +++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h @@ -178,6 +178,14 @@ template <> struct ScalarBitSetTraits { } }; +template <> struct ScalarEnumerationTraits { + using HashFunction = llvm::bolt::HashFunction; + static void enumeration(IO &io, HashFunction &value) { + io.enumCase(value, "std-hash", HashFunction::StdHash); + io.enumCase(value, "xxh3", HashFunction::XXH3); + } +}; + namespace bolt { struct BinaryProfileHeader { uint32_t Version{1}; @@ -188,6 +196,7 @@ struct BinaryProfileHeader { std::string Origin; // How the profile was obtained. std::string EventNames; // Events used for sample profile. bool IsDFSOrder{true}; // Whether using DFS block order in function profile + llvm::bolt::HashFunction HashFunction; // Hash used for BB/BF hashing }; } // end namespace bolt @@ -200,6 +209,8 @@ template <> struct MappingTraits { YamlIO.mapOptional("profile-origin", Header.Origin); YamlIO.mapOptional("profile-events", Header.EventNames); YamlIO.mapOptional("dfs-order", Header.IsDFSOrder); + YamlIO.mapOptional("hash-func", Header.HashFunction, + llvm::bolt::HashFunction::StdHash); } }; diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index be033cf076688..0ac47a53a4467 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -3633,7 +3633,7 @@ BinaryFunction::BasicBlockListType BinaryFunction::dfs() const { return DFS; } -size_t BinaryFunction::computeHash(bool UseDFS, +size_t BinaryFunction::computeHash(bool UseDFS, HashFunction HashFunction, OperandHashFuncTy OperandHashFunc) const { if (size() == 0) return 0; @@ -3652,7 +3652,13 @@ size_t BinaryFunction::computeHash(bool UseDFS, for (const BinaryBasicBlock *BB : Order) HashString.append(hashBlock(BC, *BB, OperandHashFunc)); - return Hash = llvm::xxh3_64bits(HashString); + switch (HashFunction) { + case HashFunction::StdHash: + return Hash = std::hash{}(HashString); + case HashFunction::XXH3: + return Hash = llvm::xxh3_64bits(HashString); + } + llvm_unreachable("Unhandled HashFunction"); } void BinaryFunction::insertBasicBlocks( diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp index b4ec89ca8fd79..dfbc72e48e5d2 100644 --- a/bolt/lib/Passes/IdenticalCodeFolding.cpp +++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp @@ -360,9 +360,9 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { // Pre-compute hash before pushing into hashtable. // Hash instruction operands to minimize hash collisions. - BF.computeHash(opts::ICFUseDFS, [&BC](const MCOperand &Op) { - return hashInstOperand(BC, Op); - }); + BF.computeHash( + opts::ICFUseDFS, HashFunction::Default, + [&BC](const MCOperand &Op) { return hashInstOperand(BC, Op); }); }; ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index 6fb6f380f71ee..26180f1321477 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -225,7 +225,7 @@ class StaleMatcher { std::unordered_map> OpHashToBlocks; }; -void BinaryFunction::computeBlockHashes() const { +void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const { if (size() == 0) return; @@ -241,12 +241,26 @@ void BinaryFunction::computeBlockHashes() const { // Hashing complete instructions. std::string InstrHashStr = hashBlock( BC, *BB, [&](const MCOperand &Op) { return hashInstOperand(BC, Op); }); - uint64_t InstrHash = llvm::xxh3_64bits(InstrHashStr); - BlendedHashes[I].InstrHash = (uint16_t)InstrHash; + if (HashFunction == HashFunction::StdHash) { + uint64_t InstrHash = std::hash{}(InstrHashStr); + BlendedHashes[I].InstrHash = (uint16_t)hash_value(InstrHash); + } else if (HashFunction == HashFunction::XXH3) { + uint64_t InstrHash = llvm::xxh3_64bits(InstrHashStr); + BlendedHashes[I].InstrHash = (uint16_t)InstrHash; + } else { + llvm_unreachable("Unhandled HashFunction"); + } // Hashing opcodes. std::string OpcodeHashStr = hashBlockLoose(BC, *BB); - OpcodeHashes[I] = llvm::xxh3_64bits(OpcodeHashStr); - BlendedHashes[I].OpcodeHash = (uint16_t)OpcodeHashes[I]; + if (HashFunction == HashFunction::StdHash) { + OpcodeHashes[I] = std::hash{}(OpcodeHashStr); + BlendedHashes[I].OpcodeHash = (uint16_t)hash_value(OpcodeHashes[I]); + } else if (HashFunction == HashFunction::XXH3) { + OpcodeHashes[I] = llvm::xxh3_64bits(OpcodeHashStr); + BlendedHashes[I].OpcodeHash = (uint16_t)OpcodeHashes[I]; + } else { + llvm_unreachable("Unhandled HashFunction"); + } } // Initialize neighbor hash. @@ -258,7 +272,12 @@ void BinaryFunction::computeBlockHashes() const { uint64_t SuccHash = OpcodeHashes[SuccBB->getIndex()]; Hash = hashing::detail::hash_16_bytes(Hash, SuccHash); } - BlendedHashes[I].SuccHash = (uint8_t)Hash; + if (HashFunction == HashFunction::StdHash) { + // Compatibility with old behavior. + BlendedHashes[I].SuccHash = (uint8_t)hash_value(Hash); + } else { + BlendedHashes[I].SuccHash = (uint8_t)Hash; + } // Append hashes of predecessors. Hash = 0; @@ -266,7 +285,12 @@ void BinaryFunction::computeBlockHashes() const { uint64_t PredHash = OpcodeHashes[PredBB->getIndex()]; Hash = hashing::detail::hash_16_bytes(Hash, PredHash); } - BlendedHashes[I].PredHash = (uint8_t)Hash; + if (HashFunction == HashFunction::StdHash) { + // Compatibility with old behavior. + BlendedHashes[I].PredHash = (uint8_t)hash_value(Hash); + } else { + BlendedHashes[I].PredHash = (uint8_t)Hash; + } } // Assign hashes. @@ -682,7 +706,7 @@ bool YAMLProfileReader::inferStaleProfile( << "\"" << BF.getPrintName() << "\"\n"); // Make sure that block hashes are up to date. - BF.computeBlockHashes(); + BF.computeBlockHashes(YamlBP.Header.HashFunction); const BinaryFunction::BasicBlockOrderType BlockOrder( BF.getLayout().block_begin(), BF.getLayout().block_end()); diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 079cb352d36e7..ade562ef6fb11 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -83,6 +83,7 @@ bool YAMLProfileReader::parseFunctionProfile( BinaryContext &BC = BF.getBinaryContext(); const bool IsDFSOrder = YamlBP.Header.IsDFSOrder; + const HashFunction HashFunction = YamlBP.Header.HashFunction; bool ProfileMatched = true; uint64_t MismatchedBlocks = 0; uint64_t MismatchedCalls = 0; @@ -98,7 +99,8 @@ bool YAMLProfileReader::parseFunctionProfile( FuncRawBranchCount += YamlSI.Count; BF.setRawBranchCount(FuncRawBranchCount); - if (!opts::IgnoreHash && YamlBF.Hash != BF.computeHash(IsDFSOrder)) { + if (!opts::IgnoreHash && + YamlBF.Hash != BF.computeHash(IsDFSOrder, HashFunction)) { if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: function hash mismatch\n"; ProfileMatched = false; @@ -326,6 +328,17 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) { } Error YAMLProfileReader::readProfile(BinaryContext &BC) { + if (opts::Verbosity >= 1) { + outs() << "BOLT-INFO: YAML profile with hash: "; + switch (YamlBP.Header.HashFunction) { + case HashFunction::StdHash: + outs() << "std::hash\n"; + break; + case HashFunction::XXH3: + outs() << "xxh3\n"; + break; + } + } YamlProfileToFunction.resize(YamlBP.Functions.size() + 1); auto profileMatches = [](const yaml::bolt::BinaryFunctionProfile &Profile, @@ -348,7 +361,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { // Recompute hash once per function. if (!opts::IgnoreHash) - Function.computeHash(YamlBP.Header.IsDFSOrder); + Function.computeHash(YamlBP.Header.IsDFSOrder, + YamlBP.Header.HashFunction); if (profileMatches(YamlBF, Function)) matchProfileToFunction(YamlBF, Function); diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index 3326d1d8f5596..dffd851a1d6f7 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -189,6 +189,7 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) { BP.Header.Id = BuildID ? std::string(*BuildID) : ""; BP.Header.Origin = std::string(RI.getProfileReader()->getReaderName()); BP.Header.IsDFSOrder = opts::ProfileUseDFS; + BP.Header.HashFunction = HashFunction::Default; StringSet<> EventNames = RI.getProfileReader()->getEventNames(); if (!EventNames.empty()) { diff --git a/bolt/test/X86/Inputs/blarge_profile_stale.std-hash.yaml b/bolt/test/X86/Inputs/blarge_profile_stale.std-hash.yaml new file mode 100644 index 0000000000000..d520a0d242bf0 --- /dev/null +++ b/bolt/test/X86/Inputs/blarge_profile_stale.std-hash.yaml @@ -0,0 +1,56 @@ +--- +header: + profile-version: 1 + binary-name: 'reader-yaml.test.tmp.exe' + binary-build-id: '' + profile-flags: [ lbr ] + profile-origin: branch profile reader + profile-events: '' + dfs-order: false +functions: + - name: SolveCubic + fid: 6 + hash: 0xC6E9098E973BBE19 + exec: 151 + nblocks: 18 + blocks: + - bid: 0 + insns: 43 + hash: 0xed4db287e71c0000 + exec: 151 + succ: [ { bid: 1, cnt: 151, mis: 2 }, { bid: 7, cnt: 0 } ] + - bid: 1 + insns: 7 + hash: 0x39330000e4560088 + succ: [ { bid: 13, cnt: 151 }, { bid: 2, cnt: 0 } ] + - bid: 13 + insns: 26 + hash: 0xa9700000fe202a7 + succ: [ { bid: 3, cnt: 89 }, { bid: 2, cnt: 10 } ] + - bid: 3 + insns: 9 + hash: 0x62391dad18a700a0 + succ: [ { bid: 5, cnt: 151 } ] + - bid: 5 + insns: 9 + hash: 0x4d906d19ecec0111 + - name: usqrt + fid: 7 + hash: 0x8B62B1F9AD81EA35 + exec: 20 + nblocks: 6 + blocks: + - bid: 0 + insns: 4 + hash: 0x1111111111111111 + exec: 20 + succ: [ { bid: 1, cnt: 0 } ] + - bid: 1 + insns: 9 + hash: 0x27e43a5e10cd0010 + succ: [ { bid: 3, cnt: 320, mis: 171 }, { bid: 2, cnt: 0 } ] + - bid: 3 + insns: 2 + hash: 0x4db935b6471e0039 + succ: [ { bid: 1, cnt: 300, mis: 33 }, { bid: 4, cnt: 20 } ] +... diff --git a/bolt/test/X86/Inputs/blarge_profile_stale.yaml b/bolt/test/X86/Inputs/blarge_profile_stale.yaml index 43b75c99656f1..ac46b37b56a12 100644 --- a/bolt/test/X86/Inputs/blarge_profile_stale.yaml +++ b/bolt/test/X86/Inputs/blarge_profile_stale.yaml @@ -7,6 +7,7 @@ header: profile-origin: branch profile reader profile-events: '' dfs-order: false + hash-func: xxh3 functions: - name: SolveCubic fid: 6 diff --git a/bolt/test/X86/reader-stale-yaml-std.test b/bolt/test/X86/reader-stale-yaml-std.test new file mode 100644 index 0000000000000..e0b6ca0645e19 --- /dev/null +++ b/bolt/test/X86/reader-stale-yaml-std.test @@ -0,0 +1,68 @@ +# This script checks that YamlProfileReader in llvm-bolt is reading data +# correctly and stale data is corrected by profile inference. + +RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe +RUN: llvm-bolt %t.exe -o %t.null -b %p/Inputs/blarge_profile_stale.std-hash.yaml \ +RUN: --print-cfg --print-only=usqrt,SolveCubic --infer-stale-profile=1 -v=1 \ +RUN: 2>&1 | FileCheck %s + +# Verify that yaml reader works as expected. +CHECK: pre-processing profile using YAML profile reader +CHECK: BOLT-INFO: YAML profile with hash: std::hash + +# Function "SolveCubic" has stale profile, since there is one jump in the +# profile (from bid=13 to bid=2) which is not in the CFG in the binary. The test +# verifies that the inference is able to match two blocks (bid=1 and bid=13) +# using "loose" hashes and then correctly propagate the counts. + +CHECK: Binary Function "SolveCubic" after building cfg { +CHECK: State : CFG constructed +CHECK: Address : 0x400e00 +CHECK: Size : 0x368 +CHECK: Section : .text +CHECK: IsSimple : 1 +CHECK: BB Count : 18 +CHECK: Exec Count : 151 +CHECK: Branch Count: 552 +CHECK: } +# Verify block counts. +CHECK: .LBB00 (43 instructions, align : 1) +CHECK: Successors: .Ltmp[[#BB07:]] (mispreds: 0, count: 0), .LFT[[#BB01:]] (mispreds: 0, count: 151) +CHECK: .LFT[[#BB01:]] (5 instructions, align : 1) +CHECK: Successors: .Ltmp[[#BB013:]] (mispreds: 0, count: 151), .LFT[[#BB02:]] (mispreds: 0, count: 0) +CHECK: .Ltmp[[#BB03:]] (26 instructions, align : 1) +CHECK: Successors: .Ltmp[[#BB05:]] (mispreds: 0, count: 151), .LFT[[#BB04:]] (mispreds: 0, count: 0) +CHECK: .Ltmp[[#BB05:]] (9 instructions, align : 1) +CHECK: .Ltmp[[#BB013:]] (12 instructions, align : 1) +CHECK: Successors: .Ltmp[[#BB03:]] (mispreds: 0, count: 151) +CHECK: End of Function "SolveCubic" + +# Function "usqrt" has stale profile, since the number of blocks in the profile +# (nblocks=6) does not match the size of the CFG in the binary. The entry +# block (bid=0) has an incorrect (missing) count, which should be inferred by +# the algorithm. + +CHECK: Binary Function "usqrt" after building cfg { +CHECK: State : CFG constructed +CHECK: Address : 0x401170 +CHECK: Size : 0x43 +CHECK: Section : .text +CHECK: IsSimple : 1 +CHECK: BB Count : 5 +CHECK: Exec Count : 20 +CHECK: Branch Count: 640 +CHECK: } +# Verify block counts. +CHECK: .LBB01 (4 instructions, align : 1) +CHECK: Successors: .Ltmp[[#BB113:]] (mispreds: 0, count: 20) +CHECK: .Ltmp[[#BB113:]] (9 instructions, align : 1) +CHECK: Successors: .Ltmp[[#BB112:]] (mispreds: 0, count: 320), .LFT[[#BB10:]] (mispreds: 0, count: 0) +CHECK: .LFT[[#BB10:]] (2 instructions, align : 1) +CHECK: Successors: .Ltmp[[#BB112:]] (mispreds: 0, count: 0) +CHECK: .Ltmp[[#BB112:]] (2 instructions, align : 1) +CHECK: Successors: .Ltmp[[#BB113:]] (mispreds: 0, count: 300), .LFT[[#BB11:]] (mispreds: 0, count: 20) +CHECK: .LFT[[#BB11:]] (2 instructions, align : 1) +CHECK: End of Function "usqrt" +# Check the overall inference stats. +CHECK: 2 out of 7 functions in the binary (28.6%) have non-empty execution profile +CHECK: inferred profile for 2 (100.00% of profiled, 100.00% of stale) functions responsible for {{.*}} samples ({{.*}} out of {{.*}})