| #include "llvm/ProfileData/MemProf.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/ProfileData/InstrProf.h" |
| #include "llvm/ProfileData/SampleProf.h" |
| #include "llvm/Support/BLAKE3.h" |
| #include "llvm/Support/Endian.h" |
| #include "llvm/Support/EndianStream.h" |
| #include "llvm/Support/HashBuilder.h" |
| |
| namespace llvm { |
| namespace memprof { |
| MemProfSchema getFullSchema() { |
| MemProfSchema List; |
| #define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name); |
| #include "llvm/ProfileData/MIBEntryDef.inc" |
| #undef MIBEntryDef |
| return List; |
| } |
| |
| MemProfSchema getHotColdSchema() { |
| return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime, |
| Meta::TotalLifetimeAccessDensity}; |
| } |
| |
| static size_t serializedSizeV0(const IndexedAllocationInfo &IAI, |
| const MemProfSchema &Schema) { |
| size_t Size = 0; |
| // The number of frames to serialize. |
| Size += sizeof(uint64_t); |
| // The callstack frame ids. |
| Size += sizeof(FrameId) * IAI.CallStack.size(); |
| // The size of the payload. |
| Size += PortableMemInfoBlock::serializedSize(Schema); |
| return Size; |
| } |
| |
| static size_t serializedSizeV2(const IndexedAllocationInfo &IAI, |
| const MemProfSchema &Schema) { |
| size_t Size = 0; |
| // The CallStackId |
| Size += sizeof(CallStackId); |
| // The size of the payload. |
| Size += PortableMemInfoBlock::serializedSize(Schema); |
| return Size; |
| } |
| |
| static size_t serializedSizeV3(const IndexedAllocationInfo &IAI, |
| const MemProfSchema &Schema) { |
| size_t Size = 0; |
| // The linear call stack ID. |
| Size += sizeof(LinearCallStackId); |
| // The size of the payload. |
| Size += PortableMemInfoBlock::serializedSize(Schema); |
| return Size; |
| } |
| |
| size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema, |
| IndexedVersion Version) const { |
| switch (Version) { |
| case Version0: |
| case Version1: |
| return serializedSizeV0(*this, Schema); |
| case Version2: |
| return serializedSizeV2(*this, Schema); |
| case Version3: |
| return serializedSizeV3(*this, Schema); |
| } |
| llvm_unreachable("unsupported MemProf version"); |
| } |
| |
| static size_t serializedSizeV0(const IndexedMemProfRecord &Record, |
| const MemProfSchema &Schema) { |
| // The number of alloc sites to serialize. |
| size_t Result = sizeof(uint64_t); |
| for (const IndexedAllocationInfo &N : Record.AllocSites) |
| Result += N.serializedSize(Schema, Version0); |
| |
| // The number of callsites we have information for. |
| Result += sizeof(uint64_t); |
| for (const auto &Frames : Record.CallSites) { |
| // The number of frame ids to serialize. |
| Result += sizeof(uint64_t); |
| Result += Frames.size() * sizeof(FrameId); |
| } |
| return Result; |
| } |
| |
| static size_t serializedSizeV2(const IndexedMemProfRecord &Record, |
| const MemProfSchema &Schema) { |
| // The number of alloc sites to serialize. |
| size_t Result = sizeof(uint64_t); |
| for (const IndexedAllocationInfo &N : Record.AllocSites) |
| Result += N.serializedSize(Schema, Version2); |
| |
| // The number of callsites we have information for. |
| Result += sizeof(uint64_t); |
| // The CallStackId |
| Result += Record.CallSiteIds.size() * sizeof(CallStackId); |
| return Result; |
| } |
| |
| static size_t serializedSizeV3(const IndexedMemProfRecord &Record, |
| const MemProfSchema &Schema) { |
| // The number of alloc sites to serialize. |
| size_t Result = sizeof(uint64_t); |
| for (const IndexedAllocationInfo &N : Record.AllocSites) |
| Result += N.serializedSize(Schema, Version3); |
| |
| // The number of callsites we have information for. |
| Result += sizeof(uint64_t); |
| // The linear call stack ID. |
| Result += Record.CallSiteIds.size() * sizeof(LinearCallStackId); |
| return Result; |
| } |
| |
| size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema, |
| IndexedVersion Version) const { |
| switch (Version) { |
| case Version0: |
| case Version1: |
| return serializedSizeV0(*this, Schema); |
| case Version2: |
| return serializedSizeV2(*this, Schema); |
| case Version3: |
| return serializedSizeV3(*this, Schema); |
| } |
| llvm_unreachable("unsupported MemProf version"); |
| } |
| |
| static void serializeV0(const IndexedMemProfRecord &Record, |
| const MemProfSchema &Schema, raw_ostream &OS) { |
| using namespace support; |
| |
| endian::Writer LE(OS, llvm::endianness::little); |
| |
| LE.write<uint64_t>(Record.AllocSites.size()); |
| for (const IndexedAllocationInfo &N : Record.AllocSites) { |
| LE.write<uint64_t>(N.CallStack.size()); |
| for (const FrameId &Id : N.CallStack) |
| LE.write<FrameId>(Id); |
| N.Info.serialize(Schema, OS); |
| } |
| |
| // Related contexts. |
| LE.write<uint64_t>(Record.CallSites.size()); |
| for (const auto &Frames : Record.CallSites) { |
| LE.write<uint64_t>(Frames.size()); |
| for (const FrameId &Id : Frames) |
| LE.write<FrameId>(Id); |
| } |
| } |
| |
| static void serializeV2(const IndexedMemProfRecord &Record, |
| const MemProfSchema &Schema, raw_ostream &OS) { |
| using namespace support; |
| |
| endian::Writer LE(OS, llvm::endianness::little); |
| |
| LE.write<uint64_t>(Record.AllocSites.size()); |
| for (const IndexedAllocationInfo &N : Record.AllocSites) { |
| LE.write<CallStackId>(N.CSId); |
| N.Info.serialize(Schema, OS); |
| } |
| |
| // Related contexts. |
| LE.write<uint64_t>(Record.CallSiteIds.size()); |
| for (const auto &CSId : Record.CallSiteIds) |
| LE.write<CallStackId>(CSId); |
| } |
| |
| static void serializeV3( |
| const IndexedMemProfRecord &Record, const MemProfSchema &Schema, |
| raw_ostream &OS, |
| llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) { |
| using namespace support; |
| |
| endian::Writer LE(OS, llvm::endianness::little); |
| |
| LE.write<uint64_t>(Record.AllocSites.size()); |
| for (const IndexedAllocationInfo &N : Record.AllocSites) { |
| assert(MemProfCallStackIndexes.contains(N.CSId)); |
| LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]); |
| N.Info.serialize(Schema, OS); |
| } |
| |
| // Related contexts. |
| LE.write<uint64_t>(Record.CallSiteIds.size()); |
| for (const auto &CSId : Record.CallSiteIds) { |
| assert(MemProfCallStackIndexes.contains(CSId)); |
| LE.write<LinearCallStackId>(MemProfCallStackIndexes[CSId]); |
| } |
| } |
| |
| void IndexedMemProfRecord::serialize( |
| const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version, |
| llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes) |
| const { |
| switch (Version) { |
| case Version0: |
| case Version1: |
| serializeV0(*this, Schema, OS); |
| return; |
| case Version2: |
| serializeV2(*this, Schema, OS); |
| return; |
| case Version3: |
| serializeV3(*this, Schema, OS, *MemProfCallStackIndexes); |
| return; |
| } |
| llvm_unreachable("unsupported MemProf version"); |
| } |
| |
| static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, |
| const unsigned char *Ptr) { |
| using namespace support; |
| |
| IndexedMemProfRecord Record; |
| |
| // Read the meminfo nodes. |
| const uint64_t NumNodes = |
| endian::readNext<uint64_t, llvm::endianness::little>(Ptr); |
| for (uint64_t I = 0; I < NumNodes; I++) { |
| IndexedAllocationInfo Node; |
| const uint64_t NumFrames = |
| endian::readNext<uint64_t, llvm::endianness::little>(Ptr); |
| for (uint64_t J = 0; J < NumFrames; J++) { |
| const FrameId Id = |
| endian::readNext<FrameId, llvm::endianness::little>(Ptr); |
| Node.CallStack.push_back(Id); |
| } |
| Node.CSId = hashCallStack(Node.CallStack); |
| Node.Info.deserialize(Schema, Ptr); |
| Ptr += PortableMemInfoBlock::serializedSize(Schema); |
| Record.AllocSites.push_back(Node); |
| } |
| |
| // Read the callsite information. |
| const uint64_t NumCtxs = |
| endian::readNext<uint64_t, llvm::endianness::little>(Ptr); |
| for (uint64_t J = 0; J < NumCtxs; J++) { |
| const uint64_t NumFrames = |
| endian::readNext<uint64_t, llvm::endianness::little>(Ptr); |
| llvm::SmallVector<FrameId> Frames; |
| Frames.reserve(NumFrames); |
| for (uint64_t K = 0; K < NumFrames; K++) { |
| const FrameId Id = |
| endian::readNext<FrameId, llvm::endianness::little>(Ptr); |
| Frames.push_back(Id); |
| } |
| Record.CallSites.push_back(Frames); |
| Record.CallSiteIds.push_back(hashCallStack(Frames)); |
| } |
| |
| return Record; |
| } |
| |
| static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, |
| const unsigned char *Ptr) { |
| using namespace support; |
| |
| IndexedMemProfRecord Record; |
| |
| // Read the meminfo nodes. |
| const uint64_t NumNodes = |
| endian::readNext<uint64_t, llvm::endianness::little>(Ptr); |
| Record.AllocSites.reserve(NumNodes); |
| for (uint64_t I = 0; I < NumNodes; I++) { |
| IndexedAllocationInfo Node; |
| Node.CSId = endian::readNext<CallStackId, llvm::endianness::little>(Ptr); |
| Node.Info.deserialize(Schema, Ptr); |
| Ptr += PortableMemInfoBlock::serializedSize(Schema); |
| Record.AllocSites.push_back(Node); |
| } |
| |
| // Read the callsite information. |
| const uint64_t NumCtxs = |
| endian::readNext<uint64_t, llvm::endianness::little>(Ptr); |
| Record.CallSiteIds.reserve(NumCtxs); |
| for (uint64_t J = 0; J < NumCtxs; J++) { |
| CallStackId CSId = |
| endian::readNext<CallStackId, llvm::endianness::little>(Ptr); |
| Record.CallSiteIds.push_back(CSId); |
| } |
| |
| return Record; |
| } |
| |
| static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema, |
| const unsigned char *Ptr) { |
| using namespace support; |
| |
| IndexedMemProfRecord Record; |
| |
| // Read the meminfo nodes. |
| const uint64_t NumNodes = |
| endian::readNext<uint64_t, llvm::endianness::little>(Ptr); |
| Record.AllocSites.reserve(NumNodes); |
| for (uint64_t I = 0; I < NumNodes; I++) { |
| IndexedAllocationInfo Node; |
| Node.CSId = |
| endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); |
| Node.Info.deserialize(Schema, Ptr); |
| Ptr += PortableMemInfoBlock::serializedSize(Schema); |
| Record.AllocSites.push_back(Node); |
| } |
| |
| // Read the callsite information. |
| const uint64_t NumCtxs = |
| endian::readNext<uint64_t, llvm::endianness::little>(Ptr); |
| Record.CallSiteIds.reserve(NumCtxs); |
| for (uint64_t J = 0; J < NumCtxs; J++) { |
| // We are storing LinearCallStackId in CallSiteIds, which is a vector of |
| // CallStackId. Assert that CallStackId is no smaller than |
| // LinearCallStackId. |
| static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId)); |
| LinearCallStackId CSId = |
| endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); |
| Record.CallSiteIds.push_back(CSId); |
| } |
| |
| return Record; |
| } |
| |
| IndexedMemProfRecord |
| IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, |
| const unsigned char *Ptr, |
| IndexedVersion Version) { |
| switch (Version) { |
| case Version0: |
| case Version1: |
| return deserializeV0(Schema, Ptr); |
| case Version2: |
| return deserializeV2(Schema, Ptr); |
| case Version3: |
| return deserializeV3(Schema, Ptr); |
| } |
| llvm_unreachable("unsupported MemProf version"); |
| } |
| |
| MemProfRecord IndexedMemProfRecord::toMemProfRecord( |
| llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const { |
| MemProfRecord Record; |
| |
| Record.AllocSites.reserve(AllocSites.size()); |
| for (const IndexedAllocationInfo &IndexedAI : AllocSites) { |
| AllocationInfo AI; |
| AI.Info = IndexedAI.Info; |
| AI.CallStack = Callback(IndexedAI.CSId); |
| Record.AllocSites.push_back(std::move(AI)); |
| } |
| |
| Record.CallSites.reserve(CallSiteIds.size()); |
| for (CallStackId CSId : CallSiteIds) |
| Record.CallSites.push_back(Callback(CSId)); |
| |
| return Record; |
| } |
| |
| GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { |
| // Canonicalize the function name to drop suffixes such as ".llvm.". Note |
| // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop |
| // those by default. This is by design to differentiate internal linkage |
| // functions during matching. By dropping the other suffixes we can then match |
| // functions in the profile use phase prior to their addition. Note that this |
| // applies to both instrumented and sampled function names. |
| StringRef CanonicalName = |
| sampleprof::FunctionSamples::getCanonicalFnName(FunctionName); |
| |
| // We use the function guid which we expect to be a uint64_t. At |
| // this time, it is the lower 64 bits of the md5 of the canonical |
| // function name. |
| return Function::getGUID(CanonicalName); |
| } |
| |
| Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) { |
| using namespace support; |
| |
| const unsigned char *Ptr = Buffer; |
| const uint64_t NumSchemaIds = |
| endian::readNext<uint64_t, llvm::endianness::little>(Ptr); |
| if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) { |
| return make_error<InstrProfError>(instrprof_error::malformed, |
| "memprof schema invalid"); |
| } |
| |
| MemProfSchema Result; |
| for (size_t I = 0; I < NumSchemaIds; I++) { |
| const uint64_t Tag = |
| endian::readNext<uint64_t, llvm::endianness::little>(Ptr); |
| if (Tag >= static_cast<uint64_t>(Meta::Size)) { |
| return make_error<InstrProfError>(instrprof_error::malformed, |
| "memprof schema invalid"); |
| } |
| Result.push_back(static_cast<Meta>(Tag)); |
| } |
| // Advance the buffer to one past the schema if we succeeded. |
| Buffer = Ptr; |
| return Result; |
| } |
| |
| CallStackId hashCallStack(ArrayRef<FrameId> CS) { |
| llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> |
| HashBuilder; |
| for (FrameId F : CS) |
| HashBuilder.add(F); |
| llvm::BLAKE3Result<8> Hash = HashBuilder.final(); |
| CallStackId CSId; |
| std::memcpy(&CSId, Hash.data(), sizeof(Hash)); |
| return CSId; |
| } |
| |
| // Encode a call stack into RadixArray. Return the starting index within |
| // RadixArray. For each call stack we encode, we emit two or three components |
| // into RadixArray. If a given call stack doesn't have a common prefix relative |
| // to the previous one, we emit: |
| // |
| // - the frames in the given call stack in the root-to-leaf order |
| // |
| // - the length of the given call stack |
| // |
| // If a given call stack has a non-empty common prefix relative to the previous |
| // one, we emit: |
| // |
| // - the relative location of the common prefix, encoded as a negative number. |
| // |
| // - a portion of the given call stack that's beyond the common prefix |
| // |
| // - the length of the given call stack, including the length of the common |
| // prefix. |
| // |
| // The resulting RadixArray requires a somewhat unintuitive backward traversal |
| // to reconstruct a call stack -- read the call stack length and scan backward |
| // while collecting frames in the leaf to root order. build, the caller of this |
| // function, reverses RadixArray in place so that we can reconstruct a call |
| // stack as if we were deserializing an array in a typical way -- the call stack |
| // length followed by the frames in the leaf-to-root order except that we need |
| // to handle pointers to parents along the way. |
| // |
| // To quickly determine the location of the common prefix within RadixArray, |
| // Indexes caches the indexes of the previous call stack's frames within |
| // RadixArray. |
| LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack( |
| const llvm::SmallVector<FrameId> *CallStack, |
| const llvm::SmallVector<FrameId> *Prev, |
| const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes) { |
| // Compute the length of the common root prefix between Prev and CallStack. |
| uint32_t CommonLen = 0; |
| if (Prev) { |
| auto Pos = std::mismatch(Prev->rbegin(), Prev->rend(), CallStack->rbegin(), |
| CallStack->rend()); |
| CommonLen = std::distance(CallStack->rbegin(), Pos.second); |
| } |
| |
| // Drop the portion beyond CommonLen. |
| assert(CommonLen <= Indexes.size()); |
| Indexes.resize(CommonLen); |
| |
| // Append a pointer to the parent. |
| if (CommonLen) { |
| uint32_t CurrentIndex = RadixArray.size(); |
| uint32_t ParentIndex = Indexes.back(); |
| // The offset to the parent must be negative because we are pointing to an |
| // element we've already added to RadixArray. |
| assert(ParentIndex < CurrentIndex); |
| RadixArray.push_back(ParentIndex - CurrentIndex); |
| } |
| |
| // Copy the part of the call stack beyond the common prefix to RadixArray. |
| assert(CommonLen <= CallStack->size()); |
| for (FrameId F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) { |
| // Remember the index of F in RadixArray. |
| Indexes.push_back(RadixArray.size()); |
| RadixArray.push_back(MemProfFrameIndexes.find(F)->second); |
| } |
| assert(CallStack->size() == Indexes.size()); |
| |
| // End with the call stack length. |
| RadixArray.push_back(CallStack->size()); |
| |
| // Return the index within RadixArray where we can start reconstructing a |
| // given call stack from. |
| return RadixArray.size() - 1; |
| } |
| |
| void CallStackRadixTreeBuilder::build( |
| llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> |
| &&MemProfCallStackData, |
| const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes, |
| llvm::DenseMap<FrameId, FrameStat> &FrameHistogram) { |
| // Take the vector portion of MemProfCallStackData. The vector is exactly |
| // what we need to sort. Also, we no longer need its lookup capability. |
| llvm::SmallVector<CSIdPair, 0> CallStacks = MemProfCallStackData.takeVector(); |
| |
| // Return early if we have no work to do. |
| if (CallStacks.empty()) { |
| RadixArray.clear(); |
| CallStackPos.clear(); |
| return; |
| } |
| |
| // Sorting the list of call stacks in the dictionary order is sufficient to |
| // maximize the length of the common prefix between two adjacent call stacks |
| // and thus minimize the length of RadixArray. However, we go one step |
| // further and try to reduce the number of times we follow pointers to parents |
| // during deserilization. Consider a poorly encoded radix tree: |
| // |
| // CallStackId 1: f1 -> f2 -> f3 |
| // | |
| // CallStackId 2: +--- f4 -> f5 |
| // | |
| // CallStackId 3: +--> f6 |
| // |
| // Here, f2 and f4 appear once and twice, respectively, in the call stacks. |
| // Once we encode CallStackId 1 into RadixArray, every other call stack with |
| // common prefix f1 ends up pointing to CallStackId 1. Since CallStackId 3 |
| // share "f1 f4" with CallStackId 2, CallStackId 3 needs to follow pointers to |
| // parents twice. |
| // |
| // We try to alleviate the situation by sorting the list of call stacks by |
| // comparing the popularity of frames rather than the integer values of |
| // FrameIds. In the example above, f4 is more popular than f2, so we sort the |
| // call stacks and encode them as: |
| // |
| // CallStackId 2: f1 -- f4 -> f5 |
| // | | |
| // CallStackId 3: | +--> f6 |
| // | |
| // CallStackId 1: +--> f2 -> f3 |
| // |
| // Notice that CallStackId 3 follows a pointer to a parent only once. |
| // |
| // All this is a quick-n-dirty trick to reduce the number of jumps. The |
| // proper way would be to compute the weight of each radix tree node -- how |
| // many call stacks use a given radix tree node, and encode a radix tree from |
| // the heaviest node first. We do not do so because that's a lot of work. |
| llvm::sort(CallStacks, [&](const CSIdPair &L, const CSIdPair &R) { |
| // Call stacks are stored from leaf to root. Perform comparisons from the |
| // root. |
| return std::lexicographical_compare( |
| L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(), |
| [&](FrameId F1, FrameId F2) { |
| uint64_t H1 = FrameHistogram[F1].Count; |
| uint64_t H2 = FrameHistogram[F2].Count; |
| // Popular frames should come later because we encode call stacks from |
| // the last one in the list. |
| if (H1 != H2) |
| return H1 < H2; |
| // For sort stability. |
| return F1 < F2; |
| }); |
| }); |
| |
| // Reserve some reasonable amount of storage. |
| RadixArray.clear(); |
| RadixArray.reserve(CallStacks.size() * 8); |
| |
| // Indexes will grow as long as the longest call stack. |
| Indexes.clear(); |
| Indexes.reserve(512); |
| |
| // CallStackPos will grow to exactly CallStacks.size() entries. |
| CallStackPos.clear(); |
| CallStackPos.reserve(CallStacks.size()); |
| |
| // Compute the radix array. We encode one call stack at a time, computing the |
| // longest prefix that's shared with the previous call stack we encode. For |
| // each call stack we encode, we remember a mapping from CallStackId to its |
| // position within RadixArray. |
| // |
| // As an optimization, we encode from the last call stack in CallStacks to |
| // reduce the number of times we follow pointers to the parents. Consider the |
| // list of call stacks that has been sorted in the dictionary order: |
| // |
| // Call Stack 1: F1 |
| // Call Stack 2: F1 -> F2 |
| // Call Stack 3: F1 -> F2 -> F3 |
| // |
| // If we traversed CallStacks in the forward order, we would end up with a |
| // radix tree like: |
| // |
| // Call Stack 1: F1 |
| // | |
| // Call Stack 2: +---> F2 |
| // | |
| // Call Stack 3: +---> F3 |
| // |
| // Notice that each call stack jumps to the previous one. However, if we |
| // traverse CallStacks in the reverse order, then Call Stack 3 has the |
| // complete call stack encoded without any pointers. Call Stack 1 and 2 point |
| // to appropriate prefixes of Call Stack 3. |
| const llvm::SmallVector<FrameId> *Prev = nullptr; |
| for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) { |
| LinearCallStackId Pos = |
| encodeCallStack(&CallStack, Prev, MemProfFrameIndexes); |
| CallStackPos.insert({CSId, Pos}); |
| Prev = &CallStack; |
| } |
| |
| // "RadixArray.size() - 1" below is problematic if RadixArray is empty. |
| assert(!RadixArray.empty()); |
| |
| // Reverse the radix array in place. We do so mostly for intuitive |
| // deserialization where we would read the length field and then the call |
| // stack frames proper just like any other array deserialization, except |
| // that we have occasional jumps to take advantage of prefixes. |
| for (size_t I = 0, J = RadixArray.size() - 1; I < J; ++I, --J) |
| std::swap(RadixArray[I], RadixArray[J]); |
| |
| // "Reverse" the indexes stored in CallStackPos. |
| for (auto &[K, V] : CallStackPos) |
| V = RadixArray.size() - 1 - V; |
| } |
| |
| llvm::DenseMap<FrameId, FrameStat> |
| computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> |
| &MemProfCallStackData) { |
| llvm::DenseMap<FrameId, FrameStat> Histogram; |
| |
| for (const auto &KV : MemProfCallStackData) { |
| const auto &CS = KV.second; |
| for (unsigned I = 0, E = CS.size(); I != E; ++I) { |
| auto &S = Histogram[CS[I]]; |
| ++S.Count; |
| S.PositionSum += I; |
| } |
| } |
| return Histogram; |
| } |
| |
| void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) { |
| for (const auto &AS : Record.AllocSites) { |
| assert(AS.CSId == hashCallStack(AS.CallStack)); |
| (void)AS; |
| } |
| } |
| |
| void verifyFunctionProfileData( |
| const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> |
| &FunctionProfileData) { |
| for (const auto &[GUID, Record] : FunctionProfileData) { |
| (void)GUID; |
| verifyIndexedMemProfRecord(Record); |
| } |
| } |
| |
| } // namespace memprof |
| } // namespace llvm |