| //===- bolt/Profile/YAMLProfileWriter.cpp - YAML profile serializer -------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "bolt/Profile/YAMLProfileWriter.h" |
| #include "bolt/Core/BinaryBasicBlock.h" |
| #include "bolt/Core/BinaryFunction.h" |
| #include "bolt/Profile/BoltAddressTranslation.h" |
| #include "bolt/Profile/DataAggregator.h" |
| #include "bolt/Profile/ProfileReaderBase.h" |
| #include "bolt/Rewrite/RewriteInstance.h" |
| #include "bolt/Utils/CommandLineOpts.h" |
| #include "llvm/MC/MCPseudoProbe.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/FileSystem.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| #undef DEBUG_TYPE |
| #define DEBUG_TYPE "bolt-prof" |
| |
| namespace opts { |
| using namespace llvm; |
| extern cl::opt<bool> ProfileUseDFS; |
| cl::opt<bool> ProfileWritePseudoProbes( |
| "profile-write-pseudo-probes", |
| cl::desc("Use pseudo probes in profile generation"), cl::Hidden, |
| cl::cat(BoltOptCategory)); |
| } // namespace opts |
| |
| namespace llvm { |
| namespace bolt { |
| |
| const BinaryFunction *YAMLProfileWriter::setCSIDestination( |
| const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI, |
| const MCSymbol *Symbol, const BoltAddressTranslation *BAT, |
| uint32_t Offset) { |
| CSI.DestId = 0; // designated for unknown functions |
| CSI.EntryDiscriminator = 0; |
| |
| if (Symbol) { |
| uint64_t EntryID = 0; |
| if (const BinaryFunction *Callee = |
| BC.getFunctionForSymbol(Symbol, &EntryID)) { |
| if (BAT && BAT->isBATFunction(Callee->getAddress())) |
| std::tie(Callee, EntryID) = BAT->translateSymbol(BC, *Symbol, Offset); |
| else if (const BinaryBasicBlock *BB = |
| Callee->getBasicBlockContainingOffset(Offset)) |
| BC.getFunctionForSymbol(Callee->getSecondaryEntryPointSymbol(*BB), |
| &EntryID); |
| CSI.DestId = Callee->getFunctionNumber(); |
| CSI.EntryDiscriminator = EntryID; |
| return Callee; |
| } |
| } |
| return nullptr; |
| } |
| |
| std::vector<YAMLProfileWriter::InlineTreeNode> |
| YAMLProfileWriter::collectInlineTree( |
| const MCPseudoProbeDecoder &Decoder, |
| const MCDecodedPseudoProbeInlineTree &Root) { |
| auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) { |
| return Decoder.getFuncDescForGUID(Node.Guid)->FuncHash; |
| }; |
| std::vector<InlineTreeNode> InlineTree( |
| {InlineTreeNode{&Root, Root.Guid, getHash(Root), 0, 0}}); |
| uint32_t ParentId = 0; |
| while (ParentId != InlineTree.size()) { |
| const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree; |
| for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren()) |
| InlineTree.emplace_back( |
| InlineTreeNode{&Child, Child.Guid, getHash(Child), ParentId, |
| std::get<1>(Child.getInlineSite())}); |
| ++ParentId; |
| } |
| |
| return InlineTree; |
| } |
| |
| std::tuple<yaml::bolt::ProfilePseudoProbeDesc, |
| YAMLProfileWriter::InlineTreeDesc> |
| YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) { |
| yaml::bolt::ProfilePseudoProbeDesc Desc; |
| InlineTreeDesc InlineTree; |
| |
| for (const MCDecodedPseudoProbeInlineTree &TopLev : |
| Decoder.getDummyInlineRoot().getChildren()) |
| InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev; |
| |
| for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap()) |
| ++InlineTree.HashIdxMap[FuncDesc.FuncHash]; |
| |
| InlineTree.GUIDIdxMap.reserve(Decoder.getGUID2FuncDescMap().size()); |
| for (const auto &Node : Decoder.getInlineTreeVec()) |
| ++InlineTree.GUIDIdxMap[Node.Guid]; |
| |
| std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec; |
| GUIDFreqVec.reserve(InlineTree.GUIDIdxMap.size()); |
| for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap) |
| GUIDFreqVec.emplace_back(Cnt, GUID); |
| llvm::sort(GUIDFreqVec); |
| |
| std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec; |
| HashFreqVec.reserve(InlineTree.HashIdxMap.size()); |
| for (const auto [Hash, Cnt] : InlineTree.HashIdxMap) |
| HashFreqVec.emplace_back(Cnt, Hash); |
| llvm::sort(HashFreqVec); |
| |
| uint32_t Index = 0; |
| Desc.Hash.reserve(HashFreqVec.size()); |
| for (uint64_t Hash : llvm::make_second_range(llvm::reverse(HashFreqVec))) { |
| Desc.Hash.emplace_back(Hash); |
| InlineTree.HashIdxMap[Hash] = Index++; |
| } |
| |
| Index = 0; |
| Desc.GUID.reserve(GUIDFreqVec.size()); |
| for (uint64_t GUID : llvm::make_second_range(llvm::reverse(GUIDFreqVec))) { |
| Desc.GUID.emplace_back(GUID); |
| InlineTree.GUIDIdxMap[GUID] = Index++; |
| uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash; |
| Desc.GUIDHashIdx.emplace_back(InlineTree.HashIdxMap[Hash]); |
| } |
| |
| return {Desc, InlineTree}; |
| } |
| |
| std::vector<yaml::bolt::PseudoProbeInfo> |
| YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) { |
| struct BlockProbeInfoHasher { |
| size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const { |
| auto HashCombine = [](auto &Range) { |
| return llvm::hash_combine_range(Range.begin(), Range.end()); |
| }; |
| return llvm::hash_combine(HashCombine(BPI.BlockProbes), |
| HashCombine(BPI.CallProbes), |
| HashCombine(BPI.IndCallProbes)); |
| } |
| }; |
| |
| // Check identical BlockProbeInfo structs and merge them |
| std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>, |
| BlockProbeInfoHasher> |
| BPIToNodes; |
| for (auto &[NodeId, Probes] : NodeProbes) { |
| yaml::bolt::PseudoProbeInfo BPI; |
| BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end()); |
| BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end()); |
| BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end()); |
| BPIToNodes[BPI].push_back(NodeId); |
| } |
| |
| auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) { |
| for (auto Id : Ids) |
| if (Id > 64) |
| Vec.emplace_back(Id); |
| else |
| Mask |= 1ull << (Id - 1); |
| }; |
| |
| // Add to YAML with merged nodes/block mask optimizations |
| std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes; |
| YamlProbes.reserve(BPIToNodes.size()); |
| for (const auto &[BPI, Nodes] : BPIToNodes) { |
| auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo()); |
| YamlBPI.CallProbes = BPI.CallProbes; |
| YamlBPI.IndCallProbes = BPI.IndCallProbes; |
| if (Nodes.size() == 1) |
| YamlBPI.InlineTreeIndex = Nodes.front(); |
| else |
| YamlBPI.InlineTreeNodes = Nodes; |
| handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask); |
| } |
| return YamlProbes; |
| } |
| |
| std::tuple<std::vector<yaml::bolt::InlineTreeNode>, |
| YAMLProfileWriter::InlineTreeMapTy> |
| YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, |
| const InlineTreeDesc &InlineTree, |
| uint64_t GUID) { |
| DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; |
| std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree; |
| auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID); |
| if (It == InlineTree.TopLevelGUIDToInlineTree.end()) |
| return {YamlInlineTree, InlineTreeNodeId}; |
| const MCDecodedPseudoProbeInlineTree *Root = It->second; |
| assert(Root && "Malformed TopLevelGUIDToInlineTree"); |
| uint32_t Index = 0; |
| uint32_t PrevParent = 0; |
| uint32_t PrevGUIDIdx = 0; |
| for (const auto &Node : collectInlineTree(Decoder, *Root)) { |
| InlineTreeNodeId[Node.InlineTree] = Index++; |
| auto GUIDIdxIt = InlineTree.GUIDIdxMap.find(Node.GUID); |
| assert(GUIDIdxIt != InlineTree.GUIDIdxMap.end() && "Malformed GUIDIdxMap"); |
| uint32_t GUIDIdx = GUIDIdxIt->second; |
| if (GUIDIdx == PrevGUIDIdx) |
| GUIDIdx = UINT32_MAX; |
| else |
| PrevGUIDIdx = GUIDIdx; |
| YamlInlineTree.emplace_back(yaml::bolt::InlineTreeNode{ |
| Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx, 0, 0}); |
| PrevParent = Node.ParentId; |
| } |
| return {YamlInlineTree, InlineTreeNodeId}; |
| } |
| |
| yaml::bolt::BinaryFunctionProfile |
| YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, |
| const InlineTreeDesc &InlineTree, |
| const BoltAddressTranslation *BAT) { |
| yaml::bolt::BinaryFunctionProfile YamlBF; |
| const BinaryContext &BC = BF.getBinaryContext(); |
| const MCPseudoProbeDecoder *PseudoProbeDecoder = |
| opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; |
| |
| const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR; |
| |
| // Prepare function and block hashes |
| BF.computeHash(UseDFS); |
| BF.computeBlockHashes(); |
| |
| YamlBF.Name = DataAggregator::getLocationName(BF, BAT); |
| YamlBF.Id = BF.getFunctionNumber(); |
| YamlBF.Hash = BF.getHash(); |
| YamlBF.NumBasicBlocks = BF.size(); |
| YamlBF.ExecCount = BF.getKnownExecutionCount(); |
| DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; |
| if (PseudoProbeDecoder && BF.getGUID()) { |
| std::tie(YamlBF.InlineTree, InlineTreeNodeId) = |
| convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID()); |
| } |
| |
| BinaryFunction::BasicBlockOrderType Order; |
| llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(), |
| std::back_inserter(Order)); |
| |
| const FunctionLayout Layout = BF.getLayout(); |
| Layout.updateLayoutIndices(Order); |
| |
| for (const BinaryBasicBlock *BB : Order) { |
| yaml::bolt::BinaryBasicBlockProfile YamlBB; |
| YamlBB.Index = BB->getLayoutIndex(); |
| YamlBB.NumInstructions = BB->getNumNonPseudos(); |
| YamlBB.Hash = BB->getHash(); |
| |
| if (!LBRProfile) { |
| YamlBB.EventCount = BB->getKnownExecutionCount(); |
| if (YamlBB.EventCount) |
| YamlBF.Blocks.emplace_back(YamlBB); |
| continue; |
| } |
| |
| YamlBB.ExecCount = BB->getKnownExecutionCount(); |
| |
| for (const MCInst &Instr : *BB) { |
| if (!BC.MIB->isCall(Instr) && !BC.MIB->isIndirectBranch(Instr)) |
| continue; |
| |
| SmallVector<std::pair<StringRef, yaml::bolt::CallSiteInfo>> CSTargets; |
| yaml::bolt::CallSiteInfo CSI; |
| std::optional<uint32_t> Offset = BC.MIB->getOffset(Instr); |
| if (!Offset || *Offset < BB->getInputOffset()) |
| continue; |
| CSI.Offset = *Offset - BB->getInputOffset(); |
| |
| if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) { |
| const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>( |
| Instr, "CallProfile"); |
| if (!ICSP) |
| continue; |
| for (const IndirectCallProfile &CSP : ICSP.get()) { |
| StringRef TargetName = ""; |
| const BinaryFunction *Callee = |
| setCSIDestination(BC, CSI, CSP.Symbol, BAT); |
| if (Callee) |
| TargetName = Callee->getOneName(); |
| CSI.Count = CSP.Count; |
| CSI.Mispreds = CSP.Mispreds; |
| CSTargets.emplace_back(TargetName, CSI); |
| } |
| } else { // direct call or a tail call |
| StringRef TargetName = ""; |
| const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Instr); |
| const BinaryFunction *const Callee = |
| setCSIDestination(BC, CSI, CalleeSymbol, BAT); |
| if (Callee) |
| TargetName = Callee->getOneName(); |
| |
| auto getAnnotationWithDefault = [&](const MCInst &Inst, StringRef Ann) { |
| return BC.MIB->getAnnotationWithDefault(Instr, Ann, 0ull); |
| }; |
| if (BC.MIB->getConditionalTailCall(Instr)) { |
| CSI.Count = getAnnotationWithDefault(Instr, "CTCTakenCount"); |
| CSI.Mispreds = getAnnotationWithDefault(Instr, "CTCMispredCount"); |
| } else { |
| CSI.Count = getAnnotationWithDefault(Instr, "Count"); |
| } |
| |
| if (CSI.Count) |
| CSTargets.emplace_back(TargetName, CSI); |
| } |
| // Sort targets in a similar way to getBranchData, see Location::operator< |
| llvm::sort(CSTargets, [](const auto &RHS, const auto &LHS) { |
| if (RHS.first != LHS.first) |
| return RHS.first < LHS.first; |
| return RHS.second.Offset < LHS.second.Offset; |
| }); |
| for (auto &KV : CSTargets) |
| YamlBB.CallSites.push_back(KV.second); |
| } |
| |
| // Skip printing if there's no profile data for non-entry basic block. |
| // Include landing pads with non-zero execution count. |
| if (YamlBB.CallSites.empty() && !BB->isEntryPoint() && |
| !(BB->isLandingPad() && BB->getKnownExecutionCount() != 0)) { |
| // Include blocks having successors or predecessors with positive counts. |
| uint64_t SuccessorExecCount = 0; |
| for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : |
| BB->branch_info()) |
| SuccessorExecCount += BranchInfo.Count; |
| uint64_t PredecessorExecCount = 0; |
| for (auto Pred : BB->predecessors()) |
| PredecessorExecCount += Pred->getBranchInfo(*BB).Count; |
| if (!SuccessorExecCount && !PredecessorExecCount) |
| continue; |
| } |
| |
| auto BranchInfo = BB->branch_info_begin(); |
| for (const BinaryBasicBlock *Successor : BB->successors()) { |
| yaml::bolt::SuccessorInfo YamlSI; |
| YamlSI.Index = Successor->getLayoutIndex(); |
| YamlSI.Count = BranchInfo->Count; |
| YamlSI.Mispreds = BranchInfo->MispredictedCount; |
| |
| YamlBB.Successors.emplace_back(YamlSI); |
| |
| ++BranchInfo; |
| } |
| |
| if (PseudoProbeDecoder) { |
| const AddressProbesMap &ProbeMap = |
| PseudoProbeDecoder->getAddress2ProbesMap(); |
| const uint64_t FuncAddr = BF.getAddress(); |
| const std::pair<uint64_t, uint64_t> &BlockRange = |
| BB->getInputAddressRange(); |
| const std::pair<uint64_t, uint64_t> BlockAddrRange = { |
| FuncAddr + BlockRange.first, FuncAddr + BlockRange.second}; |
| auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second); |
| YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId); |
| } |
| |
| YamlBF.Blocks.emplace_back(YamlBB); |
| } |
| return YamlBF; |
| } |
| |
| std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) { |
| const BinaryContext &BC = RI.getBinaryContext(); |
| const auto &Functions = BC.getBinaryFunctions(); |
| |
| std::error_code EC; |
| OS = std::make_unique<raw_fd_ostream>(Filename, EC, sys::fs::OF_None); |
| if (EC) { |
| errs() << "BOLT-WARNING: " << EC.message() << " : unable to open " |
| << Filename << " for output.\n"; |
| return EC; |
| } |
| |
| yaml::bolt::BinaryProfile BP; |
| |
| // Fill out the header info. |
| BP.Header.Version = 1; |
| BP.Header.FileName = std::string(BC.getFilename()); |
| std::optional<StringRef> BuildID = BC.getFileBuildID(); |
| BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>"; |
| BP.Header.Origin = std::string(RI.getProfileReader()->getReaderName()); |
| BP.Header.IsDFSOrder = opts::ProfileUseDFS; |
| BP.Header.HashFunction = HashFunction::Default; |
| |
| StringSet<> EventNames = RI.getProfileReader()->getEventNames(); |
| if (!EventNames.empty()) { |
| std::string Sep; |
| for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames) { |
| BP.Header.EventNames += Sep + EventEntry.first().str(); |
| Sep = ","; |
| } |
| } |
| |
| // Make sure the profile is consistent across all functions. |
| uint16_t ProfileFlags = BinaryFunction::PF_NONE; |
| for (const auto &BFI : Functions) { |
| const BinaryFunction &BF = BFI.second; |
| if (BF.hasProfile() && !BF.empty()) { |
| assert(BF.getProfileFlags() != BinaryFunction::PF_NONE); |
| if (ProfileFlags == BinaryFunction::PF_NONE) |
| ProfileFlags = BF.getProfileFlags(); |
| |
| assert(BF.getProfileFlags() == ProfileFlags && |
| "expected consistent profile flags across all functions"); |
| } |
| } |
| BP.Header.Flags = ProfileFlags; |
| |
| // Add probe inline tree nodes. |
| InlineTreeDesc InlineTree; |
| if (const MCPseudoProbeDecoder *Decoder = |
| opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr) |
| std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder); |
| |
| // Add all function objects. |
| for (const auto &BFI : Functions) { |
| const BinaryFunction &BF = BFI.second; |
| if (BF.hasProfile()) { |
| if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource()) |
| continue; |
| |
| BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS, InlineTree)); |
| } |
| } |
| |
| // Write the profile. |
| yaml::Output Out(*OS, nullptr, 0); |
| Out << BP; |
| |
| return std::error_code(); |
| } |
| |
| } // namespace bolt |
| } // namespace llvm |