| //===- bolt/Rewrite/LinuxKernelRewriter.cpp -------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Support for updating Linux Kernel metadata. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "bolt/Core/BinaryFunction.h" |
| #include "bolt/Rewrite/MetadataRewriter.h" |
| #include "bolt/Rewrite/MetadataRewriters.h" |
| #include "bolt/Utils/CommandLineOpts.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
| #include "llvm/Support/BinaryStreamWriter.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/Errc.h" |
| #include "llvm/Support/ErrorOr.h" |
| #include <regex> |
| |
| #define DEBUG_TYPE "bolt-linux" |
| |
| using namespace llvm; |
| using namespace bolt; |
| |
| namespace opts { |
| |
| static cl::opt<bool> |
| AltInstHasPadLen("alt-inst-has-padlen", |
| cl::desc("specify that .altinstructions has padlen field"), |
| cl::init(false), cl::Hidden, cl::cat(BoltCategory)); |
| |
| static cl::opt<uint32_t> |
| AltInstFeatureSize("alt-inst-feature-size", |
| cl::desc("size of feature field in .altinstructions"), |
| cl::init(2), cl::Hidden, cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> |
| DumpAltInstructions("dump-alt-instructions", |
| cl::desc("dump Linux alternative instructions info"), |
| cl::init(false), cl::Hidden, cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> |
| DumpExceptions("dump-linux-exceptions", |
| cl::desc("dump Linux kernel exception table"), |
| cl::init(false), cl::Hidden, cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> |
| DumpORC("dump-orc", cl::desc("dump raw ORC unwind information (sorted)"), |
| cl::init(false), cl::Hidden, cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> DumpParavirtualPatchSites( |
| "dump-para-sites", cl::desc("dump Linux kernel paravitual patch sites"), |
| cl::init(false), cl::Hidden, cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> |
| DumpPCIFixups("dump-pci-fixups", |
| cl::desc("dump Linux kernel PCI fixup table"), |
| cl::init(false), cl::Hidden, cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> DumpSMPLocks("dump-smp-locks", |
| cl::desc("dump Linux kernel SMP locks"), |
| cl::init(false), cl::Hidden, |
| cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> DumpStaticCalls("dump-static-calls", |
| cl::desc("dump Linux kernel static calls"), |
| cl::init(false), cl::Hidden, |
| cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> |
| DumpStaticKeys("dump-static-keys", |
| cl::desc("dump Linux kernel static keys jump table"), |
| cl::init(false), cl::Hidden, cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> LongJumpLabels( |
| "long-jump-labels", |
| cl::desc("always use long jumps/nops for Linux kernel static keys"), |
| cl::init(false), cl::Hidden, cl::cat(BoltCategory)); |
| |
| static cl::opt<bool> |
| PrintORC("print-orc", |
| cl::desc("print ORC unwind information for instructions"), |
| cl::init(true), cl::Hidden, cl::cat(BoltCategory)); |
| |
| } // namespace opts |
| |
| /// Linux kernel version |
| struct LKVersion { |
| LKVersion() {} |
| LKVersion(unsigned Major, unsigned Minor, unsigned Rev) |
| : Major(Major), Minor(Minor), Rev(Rev) {} |
| |
| bool operator<(const LKVersion &Other) const { |
| return std::make_tuple(Major, Minor, Rev) < |
| std::make_tuple(Other.Major, Other.Minor, Other.Rev); |
| } |
| |
| bool operator>(const LKVersion &Other) const { return Other < *this; } |
| |
| bool operator<=(const LKVersion &Other) const { return !(*this > Other); } |
| |
| bool operator>=(const LKVersion &Other) const { return !(*this < Other); } |
| |
| bool operator==(const LKVersion &Other) const { |
| return Major == Other.Major && Minor == Other.Minor && Rev == Other.Rev; |
| } |
| |
| bool operator!=(const LKVersion &Other) const { return !(*this == Other); } |
| |
| unsigned Major{0}; |
| unsigned Minor{0}; |
| unsigned Rev{0}; |
| }; |
| |
| /// Linux Kernel supports stack unwinding using ORC (oops rewind capability). |
| /// ORC state at every IP can be described by the following data structure. |
| struct ORCState { |
| int16_t SPOffset; |
| int16_t BPOffset; |
| int16_t Info; |
| |
| bool operator==(const ORCState &Other) const { |
| return SPOffset == Other.SPOffset && BPOffset == Other.BPOffset && |
| Info == Other.Info; |
| } |
| |
| bool operator!=(const ORCState &Other) const { return !(*this == Other); } |
| }; |
| |
| /// Section terminator ORC entry. |
| static ORCState NullORC = {0, 0, 0}; |
| |
| /// Basic printer for ORC entry. It does not provide the same level of |
| /// information as objtool (for now). |
| inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) { |
| if (!opts::PrintORC) |
| return OS; |
| if (E != NullORC) |
| OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset, |
| E.Info); |
| else |
| OS << "{terminator}"; |
| |
| return OS; |
| } |
| |
| namespace { |
| |
| /// Extension to DataExtractor that supports reading addresses stored in |
| /// PC-relative format. |
| class AddressExtractor : public DataExtractor { |
| uint64_t DataAddress; |
| |
| public: |
| AddressExtractor(StringRef Data, uint64_t DataAddress, bool IsLittleEndian, |
| uint8_t AddressSize) |
| : DataExtractor(Data, IsLittleEndian, AddressSize), |
| DataAddress(DataAddress) {} |
| |
| /// Extract 32-bit PC-relative address/pointer. |
| uint64_t getPCRelAddress32(Cursor &C) { |
| const uint64_t Base = DataAddress + C.tell(); |
| return Base + (int32_t)getU32(C); |
| } |
| |
| /// Extract 64-bit PC-relative address/pointer. |
| uint64_t getPCRelAddress64(Cursor &C) { |
| const uint64_t Base = DataAddress + C.tell(); |
| return Base + (int64_t)getU64(C); |
| } |
| }; |
| |
| class LinuxKernelRewriter final : public MetadataRewriter { |
| LKVersion LinuxKernelVersion; |
| |
| /// Information required for updating metadata referencing an instruction. |
| struct InstructionFixup { |
| BinarySection &Section; // Section referencing the instruction. |
| uint64_t Offset; // Offset in the section above. |
| BinaryFunction &BF; // Function containing the instruction. |
| MCSymbol &Label; // Label marking the instruction. |
| bool IsPCRelative; // If the reference type is relative. |
| }; |
| std::vector<InstructionFixup> Fixups; |
| |
| /// Size of an entry in .smp_locks section. |
| static constexpr size_t SMP_LOCKS_ENTRY_SIZE = 4; |
| |
| /// Linux ORC sections. |
| ErrorOr<BinarySection &> ORCUnwindSection = std::errc::bad_address; |
| ErrorOr<BinarySection &> ORCUnwindIPSection = std::errc::bad_address; |
| |
| /// Size of entries in ORC sections. |
| static constexpr size_t ORC_UNWIND_ENTRY_SIZE = 6; |
| static constexpr size_t ORC_UNWIND_IP_ENTRY_SIZE = 4; |
| |
| struct ORCListEntry { |
| uint64_t IP; /// Instruction address. |
| BinaryFunction *BF; /// Binary function corresponding to the entry. |
| ORCState ORC; /// Stack unwind info in ORC format. |
| |
| /// ORC entries are sorted by their IPs. Terminator entries (NullORC) |
| /// should precede other entries with the same address. |
| bool operator<(const ORCListEntry &Other) const { |
| if (IP < Other.IP) |
| return 1; |
| if (IP > Other.IP) |
| return 0; |
| return ORC == NullORC && Other.ORC != NullORC; |
| } |
| }; |
| |
| using ORCListType = std::vector<ORCListEntry>; |
| ORCListType ORCEntries; |
| |
| /// Number of entries in the input file ORC sections. |
| uint64_t NumORCEntries = 0; |
| |
| /// Section containing static keys jump table. |
| ErrorOr<BinarySection &> StaticKeysJumpSection = std::errc::bad_address; |
| uint64_t StaticKeysJumpTableAddress = 0; |
| static constexpr size_t STATIC_KEYS_JUMP_ENTRY_SIZE = 8; |
| |
| struct JumpInfoEntry { |
| bool Likely; |
| bool InitValue; |
| }; |
| SmallVector<JumpInfoEntry, 16> JumpInfo; |
| |
| /// Static key entries that need nop conversion. |
| DenseSet<uint32_t> NopIDs; |
| |
| /// Section containing static call table. |
| ErrorOr<BinarySection &> StaticCallSection = std::errc::bad_address; |
| uint64_t StaticCallTableAddress = 0; |
| static constexpr size_t STATIC_CALL_ENTRY_SIZE = 8; |
| |
| struct StaticCallInfo { |
| uint32_t ID; /// Identifier of the entry in the table. |
| BinaryFunction *Function; /// Function containing associated call. |
| MCSymbol *Label; /// Label attached to the call. |
| }; |
| using StaticCallListType = std::vector<StaticCallInfo>; |
| StaticCallListType StaticCallEntries; |
| |
| /// Section containing the Linux exception table. |
| ErrorOr<BinarySection &> ExceptionsSection = std::errc::bad_address; |
| static constexpr size_t EXCEPTION_TABLE_ENTRY_SIZE = 12; |
| |
| /// Functions with exception handling code. |
| DenseSet<BinaryFunction *> FunctionsWithExceptions; |
| |
| /// Section with paravirtual patch sites. |
| ErrorOr<BinarySection &> ParavirtualPatchSection = std::errc::bad_address; |
| |
| /// Alignment of paravirtual patch structures. |
| static constexpr size_t PARA_PATCH_ALIGN = 8; |
| |
| /// .altinstructions section. |
| ErrorOr<BinarySection &> AltInstrSection = std::errc::bad_address; |
| |
| /// Section containing Linux bug table. |
| ErrorOr<BinarySection &> BugTableSection = std::errc::bad_address; |
| |
| /// Size of bug_entry struct. |
| static constexpr size_t BUG_TABLE_ENTRY_SIZE = 12; |
| |
| /// List of bug entries per function. |
| using FunctionBugListType = |
| DenseMap<BinaryFunction *, SmallVector<uint32_t, 2>>; |
| FunctionBugListType FunctionBugList; |
| |
| /// .pci_fixup section. |
| ErrorOr<BinarySection &> PCIFixupSection = std::errc::bad_address; |
| static constexpr size_t PCI_FIXUP_ENTRY_SIZE = 16; |
| |
| Error detectLinuxKernelVersion(); |
| |
| /// Process linux kernel special sections and their relocations. |
| void processLKSections(); |
| |
| /// Process __ksymtab and __ksymtab_gpl. |
| void processLKKSymtab(bool IsGPL = false); |
| |
| // Create relocations in sections requiring fixups. |
| // |
| // Make sure functions that will not be emitted are marked as such before this |
| // function is executed. |
| void processInstructionFixups(); |
| |
| /// Process .smp_locks section. |
| Error processSMPLocks(); |
| |
| /// Read ORC unwind information and annotate instructions. |
| Error readORCTables(); |
| |
| /// Update ORC for functions once CFG is constructed. |
| Error processORCPostCFG(); |
| |
| /// Update ORC data in the binary. |
| Error rewriteORCTables(); |
| |
| /// Validate written ORC tables after binary emission. |
| Error validateORCTables(); |
| |
| /// Static call table handling. |
| Error readStaticCalls(); |
| Error rewriteStaticCalls(); |
| |
| Error readExceptionTable(); |
| Error rewriteExceptionTable(); |
| |
| /// Paravirtual instruction patch sites. |
| Error readParaInstructions(); |
| Error rewriteParaInstructions(); |
| |
| /// __bug_table section handling. |
| Error readBugTable(); |
| Error rewriteBugTable(); |
| |
| /// Do no process functions containing instruction annotated with |
| /// \p Annotation. |
| void skipFunctionsWithAnnotation(StringRef Annotation) const; |
| |
| /// Handle alternative instruction info from .altinstructions. |
| Error readAltInstructions(); |
| void processAltInstructionsPostCFG(); |
| Error tryReadAltInstructions(uint32_t AltInstFeatureSize, |
| bool AltInstHasPadLen, bool ParseOnly); |
| |
| /// Read .pci_fixup |
| Error readPCIFixupTable(); |
| |
| /// Handle static keys jump table. |
| Error readStaticKeysJumpTable(); |
| Error rewriteStaticKeysJumpTable(); |
| Error updateStaticKeysJumpTablePostEmit(); |
| |
| public: |
| LinuxKernelRewriter(BinaryContext &BC) |
| : MetadataRewriter("linux-kernel-rewriter", BC) {} |
| |
| Error preCFGInitializer() override { |
| if (Error E = detectLinuxKernelVersion()) |
| return E; |
| |
| processLKSections(); |
| |
| if (Error E = processSMPLocks()) |
| return E; |
| |
| if (Error E = readStaticCalls()) |
| return E; |
| |
| if (Error E = readExceptionTable()) |
| return E; |
| |
| if (Error E = readParaInstructions()) |
| return E; |
| |
| if (Error E = readBugTable()) |
| return E; |
| |
| if (Error E = readAltInstructions()) |
| return E; |
| |
| // Some ORC entries could be linked to alternative instruction |
| // sequences. Hence, we read ORC after .altinstructions. |
| if (Error E = readORCTables()) |
| return E; |
| |
| if (Error E = readPCIFixupTable()) |
| return E; |
| |
| if (Error E = readStaticKeysJumpTable()) |
| return E; |
| |
| return Error::success(); |
| } |
| |
| Error postCFGInitializer() override { |
| if (Error E = processORCPostCFG()) |
| return E; |
| |
| processAltInstructionsPostCFG(); |
| |
| return Error::success(); |
| } |
| |
| Error preEmitFinalizer() override { |
| // Since rewriteExceptionTable() can mark functions as non-simple, run it |
| // before other rewriters that depend on simple/emit status. |
| if (Error E = rewriteExceptionTable()) |
| return E; |
| |
| if (Error E = rewriteParaInstructions()) |
| return E; |
| |
| if (Error E = rewriteORCTables()) |
| return E; |
| |
| if (Error E = rewriteStaticCalls()) |
| return E; |
| |
| if (Error E = rewriteStaticKeysJumpTable()) |
| return E; |
| |
| if (Error E = rewriteBugTable()) |
| return E; |
| |
| processInstructionFixups(); |
| |
| return Error::success(); |
| } |
| |
| Error postEmitFinalizer() override { |
| if (Error E = updateStaticKeysJumpTablePostEmit()) |
| return E; |
| |
| if (Error E = validateORCTables()) |
| return E; |
| |
| return Error::success(); |
| } |
| }; |
| |
| Error LinuxKernelRewriter::detectLinuxKernelVersion() { |
| if (BinaryData *BD = BC.getBinaryDataByName("linux_banner")) { |
| const BinarySection &Section = BD->getSection(); |
| const std::string S = |
| Section.getContents().substr(BD->getOffset(), BD->getSize()).str(); |
| |
| const std::regex Re(R"---(Linux version ((\d+)\.(\d+)(\.(\d+))?))---"); |
| std::smatch Match; |
| if (std::regex_search(S, Match, Re)) { |
| const unsigned Major = std::stoi(Match[2].str()); |
| const unsigned Minor = std::stoi(Match[3].str()); |
| const unsigned Rev = Match[5].matched ? std::stoi(Match[5].str()) : 0; |
| LinuxKernelVersion = LKVersion(Major, Minor, Rev); |
| BC.outs() << "BOLT-INFO: Linux kernel version is " << Match[1].str() |
| << "\n"; |
| return Error::success(); |
| } |
| } |
| return createStringError(errc::executable_format_error, |
| "Linux kernel version is unknown"); |
| } |
| |
| void LinuxKernelRewriter::processLKSections() { |
| processLKKSymtab(); |
| processLKKSymtab(true); |
| } |
| |
| /// Process __ksymtab[_gpl] sections of Linux Kernel. |
| /// This section lists all the vmlinux symbols that kernel modules can access. |
| /// |
| /// All the entries are 4 bytes each and hence we can read them by one by one |
| /// and ignore the ones that are not pointing to the .text section. All pointers |
| /// are PC relative offsets. Always, points to the beginning of the function. |
| void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) { |
| StringRef SectionName = "__ksymtab"; |
| if (IsGPL) |
| SectionName = "__ksymtab_gpl"; |
| ErrorOr<BinarySection &> SectionOrError = |
| BC.getUniqueSectionByName(SectionName); |
| assert(SectionOrError && |
| "__ksymtab[_gpl] section not found in Linux Kernel binary"); |
| const uint64_t SectionSize = SectionOrError->getSize(); |
| const uint64_t SectionAddress = SectionOrError->getAddress(); |
| assert((SectionSize % 4) == 0 && |
| "The size of the __ksymtab[_gpl] section should be a multiple of 4"); |
| |
| for (uint64_t I = 0; I < SectionSize; I += 4) { |
| const uint64_t EntryAddress = SectionAddress + I; |
| ErrorOr<int64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4); |
| assert(Offset && "Reading valid PC-relative offset for a ksymtab entry"); |
| const int32_t SignedOffset = *Offset; |
| const uint64_t RefAddress = EntryAddress + SignedOffset; |
| BinaryFunction *BF = BC.getBinaryFunctionAtAddress(RefAddress); |
| if (!BF) |
| continue; |
| |
| BC.addRelocation(EntryAddress, BF->getSymbol(), Relocation::getPC32(), 0, |
| *Offset); |
| } |
| } |
| |
| /// .smp_locks section contains PC-relative references to instructions with LOCK |
| /// prefix. The prefix can be converted to NOP at boot time on non-SMP systems. |
| Error LinuxKernelRewriter::processSMPLocks() { |
| ErrorOr<BinarySection &> SMPLocksSection = |
| BC.getUniqueSectionByName(".smp_locks"); |
| if (!SMPLocksSection) |
| return Error::success(); |
| |
| const uint64_t SectionSize = SMPLocksSection->getSize(); |
| const uint64_t SectionAddress = SMPLocksSection->getAddress(); |
| if (SectionSize % SMP_LOCKS_ENTRY_SIZE) |
| return createStringError(errc::executable_format_error, |
| "bad size of .smp_locks section"); |
| |
| AddressExtractor AE(SMPLocksSection->getContents(), SectionAddress, |
| BC.AsmInfo->isLittleEndian(), |
| BC.AsmInfo->getCodePointerSize()); |
| AddressExtractor::Cursor Cursor(0); |
| while (Cursor && Cursor.tell() < SectionSize) { |
| const uint64_t Offset = Cursor.tell(); |
| const uint64_t IP = AE.getPCRelAddress32(Cursor); |
| |
| // Consume the status of the cursor. |
| if (!Cursor) |
| return createStringError(errc::executable_format_error, |
| "error while reading .smp_locks: %s", |
| toString(Cursor.takeError()).c_str()); |
| |
| if (opts::DumpSMPLocks) |
| BC.outs() << "SMP lock at 0x: " << Twine::utohexstr(IP) << '\n'; |
| |
| BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(IP); |
| if (!BF || !BC.shouldEmit(*BF)) |
| continue; |
| |
| MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress()); |
| if (!Inst) |
| return createStringError(errc::executable_format_error, |
| "no instruction matches lock at 0x%" PRIx64, IP); |
| |
| // Check for duplicate entries. |
| if (BC.MIB->hasAnnotation(*Inst, "SMPLock")) |
| return createStringError(errc::executable_format_error, |
| "duplicate SMP lock at 0x%" PRIx64, IP); |
| |
| BC.MIB->addAnnotation(*Inst, "SMPLock", true); |
| MCSymbol *Label = |
| BC.MIB->getOrCreateInstLabel(*Inst, "__SMPLock_", BC.Ctx.get()); |
| |
| Fixups.push_back({*SMPLocksSection, Offset, *BF, *Label, |
| /*IsPCRelative*/ true}); |
| } |
| |
| const uint64_t NumEntries = SectionSize / SMP_LOCKS_ENTRY_SIZE; |
| BC.outs() << "BOLT-INFO: parsed " << NumEntries << " SMP lock entries\n"; |
| |
| return Error::success(); |
| } |
| |
| void LinuxKernelRewriter::processInstructionFixups() { |
| for (InstructionFixup &Fixup : Fixups) { |
| if (!BC.shouldEmit(Fixup.BF)) |
| continue; |
| |
| Fixup.Section.addRelocation(Fixup.Offset, &Fixup.Label, |
| Fixup.IsPCRelative ? ELF::R_X86_64_PC32 |
| : ELF::R_X86_64_64, |
| /*Addend*/ 0); |
| } |
| } |
| |
| Error LinuxKernelRewriter::readORCTables() { |
| // NOTE: we should ignore relocations for orc tables as the tables are sorted |
| // post-link time and relocations are not updated. |
| ORCUnwindSection = BC.getUniqueSectionByName(".orc_unwind"); |
| ORCUnwindIPSection = BC.getUniqueSectionByName(".orc_unwind_ip"); |
| |
| if (!ORCUnwindSection && !ORCUnwindIPSection) |
| return Error::success(); |
| |
| if (!ORCUnwindSection || !ORCUnwindIPSection) |
| return createStringError(errc::executable_format_error, |
| "missing ORC section"); |
| |
| NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE; |
| if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE || |
| ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE) |
| return createStringError(errc::executable_format_error, |
| "ORC entries number mismatch detected"); |
| |
| DataExtractor OrcDE(ORCUnwindSection->getContents(), |
| BC.AsmInfo->isLittleEndian(), |
| BC.AsmInfo->getCodePointerSize()); |
| AddressExtractor IPAE( |
| ORCUnwindIPSection->getContents(), ORCUnwindIPSection->getAddress(), |
| BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); |
| DataExtractor::Cursor ORCCursor(0); |
| DataExtractor::Cursor IPCursor(0); |
| uint64_t PrevIP = 0; |
| for (uint32_t Index = 0; Index < NumORCEntries; ++Index) { |
| const uint64_t IP = IPAE.getPCRelAddress32(IPCursor); |
| // Consume the status of the cursor. |
| if (!IPCursor) |
| return createStringError(errc::executable_format_error, |
| "out of bounds while reading ORC IP table: %s", |
| toString(IPCursor.takeError()).c_str()); |
| |
| if (IP < PrevIP && opts::Verbosity) |
| BC.errs() << "BOLT-WARNING: out of order IP 0x" << Twine::utohexstr(IP) |
| << " detected while reading ORC\n"; |
| |
| PrevIP = IP; |
| |
| // Store all entries, includes those we are not going to update as the |
| // tables need to be sorted globally before being written out. |
| ORCEntries.push_back(ORCListEntry()); |
| ORCListEntry &Entry = ORCEntries.back(); |
| |
| Entry.IP = IP; |
| Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor); |
| Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor); |
| Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor); |
| Entry.BF = nullptr; |
| |
| // Consume the status of the cursor. |
| if (!ORCCursor) |
| return createStringError(errc::executable_format_error, |
| "out of bounds while reading ORC: %s", |
| toString(ORCCursor.takeError()).c_str()); |
| |
| if (Entry.ORC == NullORC) |
| continue; |
| |
| BinaryFunction *&BF = Entry.BF; |
| BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true); |
| |
| // If the entry immediately pointing past the end of the function is not |
| // the terminator entry, then it does not belong to this function. |
| if (BF && BF->getAddress() + BF->getSize() == IP) |
| BF = 0; |
| |
| if (!BF) { |
| if (opts::Verbosity) |
| BC.errs() << "BOLT-WARNING: no binary function found matching ORC 0x" |
| << Twine::utohexstr(IP) << ": " << Entry.ORC << '\n'; |
| continue; |
| } |
| |
| BF->setHasORC(true); |
| |
| if (!BF->hasInstructions()) |
| continue; |
| |
| const uint64_t Offset = IP - BF->getAddress(); |
| MCInst *Inst = BF->getInstructionAtOffset(Offset); |
| if (!Inst) { |
| // Check if there is an alternative instruction(s) at this IP. Multiple |
| // alternative instructions can take a place of a single original |
| // instruction and each alternative can have a separate ORC entry. |
| // Since ORC table is shared between all alternative sequences, there's |
| // a requirement that only one (out of many) sequences can have an |
| // instruction from the ORC table to avoid ambiguities/conflicts. |
| // |
| // For now, we have limited support for alternatives. I.e. we still print |
| // functions with them, but will not change the code in the output binary. |
| // As such, we can ignore alternative ORC entries. They will be preserved |
| // in the binary, but will not get printed in the instruction stream. |
| Inst = BF->getInstructionContainingOffset(Offset); |
| if (Inst || BC.MIB->hasAnnotation(*Inst, "AltInst")) |
| continue; |
| |
| return createStringError( |
| errc::executable_format_error, |
| "no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP); |
| } |
| |
| // Some addresses will have two entries associated with them. The first |
| // one being a "weak" section terminator. Since we ignore the terminator, |
| // we should only assign one entry per instruction. |
| if (BC.MIB->hasAnnotation(*Inst, "ORC")) |
| return createStringError( |
| errc::executable_format_error, |
| "duplicate non-terminal ORC IP 0x%" PRIx64 " in .orc_unwind_ip", IP); |
| |
| BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC); |
| } |
| |
| BC.outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n"; |
| |
| if (opts::DumpORC) { |
| BC.outs() << "BOLT-INFO: ORC unwind information:\n"; |
| for (const ORCListEntry &E : ORCEntries) { |
| BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC; |
| if (E.BF) |
| BC.outs() << ": " << *E.BF; |
| BC.outs() << '\n'; |
| } |
| } |
| |
| // Add entries for functions that don't have explicit ORC info at the start. |
| // We'll have the correct info for them even if ORC for the preceding function |
| // changes. |
| ORCListType NewEntries; |
| for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { |
| auto It = llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) { |
| return E.IP <= BF.getAddress(); |
| }); |
| if (It != ORCEntries.begin()) |
| --It; |
| |
| if (It->BF == &BF) |
| continue; |
| |
| if (It->ORC == NullORC && It->IP == BF.getAddress()) { |
| assert(!It->BF); |
| It->BF = &BF; |
| continue; |
| } |
| |
| NewEntries.push_back({BF.getAddress(), &BF, It->ORC}); |
| if (It->ORC != NullORC) |
| BF.setHasORC(true); |
| } |
| |
| llvm::copy(NewEntries, std::back_inserter(ORCEntries)); |
| llvm::sort(ORCEntries); |
| |
| if (opts::DumpORC) { |
| BC.outs() << "BOLT-INFO: amended ORC unwind information:\n"; |
| for (const ORCListEntry &E : ORCEntries) { |
| BC.outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC; |
| if (E.BF) |
| BC.outs() << ": " << *E.BF; |
| BC.outs() << '\n'; |
| } |
| } |
| |
| return Error::success(); |
| } |
| |
| Error LinuxKernelRewriter::processORCPostCFG() { |
| if (!NumORCEntries) |
| return Error::success(); |
| |
| // Propagate ORC to the rest of the function. We can annotate every |
| // instruction in every function, but to minimize the overhead, we annotate |
| // the first instruction in every basic block to reflect the state at the |
| // entry. This way, the ORC state can be calculated based on annotations |
| // regardless of the basic block layout. Note that if we insert/delete |
| // instructions, we must take care to attach ORC info to the new/deleted ones. |
| for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { |
| |
| std::optional<ORCState> CurrentState; |
| for (BinaryBasicBlock &BB : BF) { |
| for (MCInst &Inst : BB) { |
| ErrorOr<ORCState> State = |
| BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC"); |
| |
| if (State) { |
| CurrentState = *State; |
| continue; |
| } |
| |
| // Get state for the start of the function. |
| if (!CurrentState) { |
| // A terminator entry (NullORC) can match the function address. If |
| // there's also a non-terminator entry, it will be placed after the |
| // terminator. Hence, we are looking for the last ORC entry that |
| // matches the address. |
| auto It = |
| llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) { |
| return E.IP <= BF.getAddress(); |
| }); |
| if (It != ORCEntries.begin()) |
| --It; |
| |
| assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) && |
| "ORC info at function entry expected."); |
| |
| if (It->ORC == NullORC && BF.hasORC()) { |
| BC.errs() << "BOLT-WARNING: ORC unwind info excludes prologue for " |
| << BF << '\n'; |
| } |
| |
| It->BF = &BF; |
| |
| CurrentState = It->ORC; |
| if (It->ORC != NullORC) |
| BF.setHasORC(true); |
| } |
| |
| // While printing ORC, attach info to every instruction for convenience. |
| if (opts::PrintORC || &Inst == &BB.front()) |
| BC.MIB->addAnnotation(Inst, "ORC", *CurrentState); |
| } |
| } |
| } |
| |
| return Error::success(); |
| } |
| |
| Error LinuxKernelRewriter::rewriteORCTables() { |
| if (!NumORCEntries) |
| return Error::success(); |
| |
| // Update ORC sections in-place. As we change the code, the number of ORC |
| // entries may increase for some functions. However, as we remove terminator |
| // redundancy (see below), more space is freed up and we should always be able |
| // to fit new ORC tables in the reserved space. |
| auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter { |
| const size_t Size = Section.getSize(); |
| uint8_t *NewContents = new uint8_t[Size]; |
| Section.updateContents(NewContents, Size); |
| Section.setOutputFileOffset(Section.getInputFileOffset()); |
| return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian() |
| ? endianness::little |
| : endianness::big); |
| }; |
| BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection); |
| BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection); |
| |
| uint64_t NumEmitted = 0; |
| std::optional<ORCState> LastEmittedORC; |
| auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC, |
| MCSymbol *Label = 0, bool Force = false) -> Error { |
| if (LastEmittedORC && ORC == *LastEmittedORC && !Force) |
| return Error::success(); |
| |
| LastEmittedORC = ORC; |
| |
| if (++NumEmitted > NumORCEntries) |
| return createStringError(errc::executable_format_error, |
| "exceeded the number of allocated ORC entries"); |
| |
| if (Label) |
| ORCUnwindIPSection->addRelocation(UnwindIPWriter.getOffset(), Label, |
| Relocation::getPC32(), /*Addend*/ 0); |
| |
| const int32_t IPValue = |
| IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset(); |
| if (Error E = UnwindIPWriter.writeInteger(IPValue)) |
| return E; |
| |
| if (Error E = UnwindWriter.writeInteger(ORC.SPOffset)) |
| return E; |
| if (Error E = UnwindWriter.writeInteger(ORC.BPOffset)) |
| return E; |
| if (Error E = UnwindWriter.writeInteger(ORC.Info)) |
| return E; |
| |
| return Error::success(); |
| }; |
| |
| // Emit new ORC entries for the emitted function. |
| auto emitORC = [&](const FunctionFragment &FF) -> Error { |
| ORCState CurrentState = NullORC; |
| for (BinaryBasicBlock *BB : FF) { |
| for (MCInst &Inst : *BB) { |
| ErrorOr<ORCState> ErrorOrState = |
| BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC"); |
| if (!ErrorOrState || *ErrorOrState == CurrentState) |
| continue; |
| |
| // Issue label for the instruction. |
| MCSymbol *Label = |
| BC.MIB->getOrCreateInstLabel(Inst, "__ORC_", BC.Ctx.get()); |
| |
| if (Error E = emitORCEntry(0, *ErrorOrState, Label)) |
| return E; |
| |
| CurrentState = *ErrorOrState; |
| } |
| } |
| |
| return Error::success(); |
| }; |
| |
| // Emit ORC entries for cold fragments. We assume that these fragments are |
| // emitted contiguously in memory using reserved space in the kernel. This |
| // assumption is validated in post-emit pass validateORCTables() where we |
| // check that ORC entries are sorted by their addresses. |
| auto emitColdORC = [&]() -> Error { |
| for (BinaryFunction &BF : |
| llvm::make_second_range(BC.getBinaryFunctions())) { |
| if (!BC.shouldEmit(BF)) |
| continue; |
| for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) |
| if (Error E = emitORC(FF)) |
| return E; |
| } |
| |
| return Error::success(); |
| }; |
| |
| bool ShouldEmitCold = !BC.BOLTReserved.empty(); |
| for (ORCListEntry &Entry : ORCEntries) { |
| if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) { |
| if (Error E = emitColdORC()) |
| return E; |
| |
| // Emit terminator entry at the end of the reserved region. |
| if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC)) |
| return E; |
| |
| ShouldEmitCold = false; |
| } |
| |
| // Emit original entries for functions that we haven't modified. |
| if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) { |
| // Emit terminator only if it marks the start of a function. |
| if (Entry.ORC == NullORC && !Entry.BF) |
| continue; |
| if (Error E = emitORCEntry(Entry.IP, Entry.ORC)) |
| return E; |
| continue; |
| } |
| |
| // Emit all ORC entries for a function referenced by an entry and skip over |
| // the rest of entries for this function by resetting its ORC attribute. |
| if (Entry.BF->hasORC()) { |
| if (Error E = emitORC(Entry.BF->getLayout().getMainFragment())) |
| return E; |
| Entry.BF->setHasORC(false); |
| } |
| } |
| |
| LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted |
| << " ORC entries\n"); |
| |
| // Populate ORC tables with a terminator entry with max address to match the |
| // original table sizes. |
| const uint64_t LastIP = std::numeric_limits<uint64_t>::max(); |
| while (UnwindWriter.bytesRemaining()) { |
| if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true)) |
| return E; |
| } |
| |
| return Error::success(); |
| } |
| |
| Error LinuxKernelRewriter::validateORCTables() { |
| if (!ORCUnwindIPSection) |
| return Error::success(); |
| |
| AddressExtractor IPAE( |
| ORCUnwindIPSection->getOutputContents(), ORCUnwindIPSection->getAddress(), |
| BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); |
| AddressExtractor::Cursor IPCursor(0); |
| uint64_t PrevIP = 0; |
| for (uint32_t Index = 0; Index < NumORCEntries; ++Index) { |
| const uint64_t IP = IPAE.getPCRelAddress32(IPCursor); |
| if (!IPCursor) |
| return createStringError(errc::executable_format_error, |
| "out of bounds while reading ORC IP table: %s", |
| toString(IPCursor.takeError()).c_str()); |
| |
| assert(IP >= PrevIP && "Unsorted ORC table detected"); |
| (void)PrevIP; |
| PrevIP = IP; |
| } |
| |
| return Error::success(); |
| } |
| |
| /// The static call site table is created by objtool and contains entries in the |
| /// following format: |
| /// |
| /// struct static_call_site { |
| /// s32 addr; |
| /// s32 key; |
| /// }; |
| /// |
| Error LinuxKernelRewriter::readStaticCalls() { |
| const BinaryData *StaticCallTable = |
| BC.getBinaryDataByName("__start_static_call_sites"); |
| if (!StaticCallTable) |
| return Error::success(); |
| |
| StaticCallTableAddress = StaticCallTable->getAddress(); |
| |
| const BinaryData *Stop = BC.getBinaryDataByName("__stop_static_call_sites"); |
| if (!Stop) |
| return createStringError(errc::executable_format_error, |
| "missing __stop_static_call_sites symbol"); |
| |
| ErrorOr<BinarySection &> ErrorOrSection = |
| BC.getSectionForAddress(StaticCallTableAddress); |
| if (!ErrorOrSection) |
| return createStringError(errc::executable_format_error, |
| "no section matching __start_static_call_sites"); |
| |
| StaticCallSection = *ErrorOrSection; |
| if (!StaticCallSection->containsAddress(Stop->getAddress() - 1)) |
| return createStringError(errc::executable_format_error, |
| "__stop_static_call_sites not in the same section " |
| "as __start_static_call_sites"); |
| |
| if ((Stop->getAddress() - StaticCallTableAddress) % STATIC_CALL_ENTRY_SIZE) |
| return createStringError(errc::executable_format_error, |
| "static call table size error"); |
| |
| const uint64_t SectionAddress = StaticCallSection->getAddress(); |
| AddressExtractor AE(StaticCallSection->getContents(), SectionAddress, |
| BC.AsmInfo->isLittleEndian(), |
| BC.AsmInfo->getCodePointerSize()); |
| AddressExtractor::Cursor Cursor(StaticCallTableAddress - SectionAddress); |
| uint32_t EntryID = 0; |
| while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { |
| const uint64_t CallAddress = AE.getPCRelAddress32(Cursor); |
| const uint64_t KeyAddress = AE.getPCRelAddress32(Cursor); |
| |
| // Consume the status of the cursor. |
| if (!Cursor) |
| return createStringError(errc::executable_format_error, |
| "out of bounds while reading static calls: %s", |
| toString(Cursor.takeError()).c_str()); |
| |
| ++EntryID; |
| |
| if (opts::DumpStaticCalls) { |
| BC.outs() << "Static Call Site: " << EntryID << '\n'; |
| BC.outs() << "\tCallAddress: 0x" << Twine::utohexstr(CallAddress) |
| << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) |
| << '\n'; |
| } |
| |
| BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(CallAddress); |
| if (!BF) |
| continue; |
| |
| if (!BC.shouldEmit(*BF)) |
| continue; |
| |
| if (!BF->hasInstructions()) |
| continue; |
| |
| MCInst *Inst = BF->getInstructionAtOffset(CallAddress - BF->getAddress()); |
| if (!Inst) |
| return createStringError(errc::executable_format_error, |
| "no instruction at call site address 0x%" PRIx64, |
| CallAddress); |
| |
| // Check for duplicate entries. |
| if (BC.MIB->hasAnnotation(*Inst, "StaticCall")) |
| return createStringError(errc::executable_format_error, |
| "duplicate static call site at 0x%" PRIx64, |
| CallAddress); |
| |
| BC.MIB->addAnnotation(*Inst, "StaticCall", EntryID); |
| |
| MCSymbol *Label = |
| BC.MIB->getOrCreateInstLabel(*Inst, "__SC_", BC.Ctx.get()); |
| |
| StaticCallEntries.push_back({EntryID, BF, Label}); |
| } |
| |
| BC.outs() << "BOLT-INFO: parsed " << StaticCallEntries.size() |
| << " static call entries\n"; |
| |
| return Error::success(); |
| } |
| |
| /// The static call table is sorted during boot time in |
| /// static_call_sort_entries(). This makes it possible to update existing |
| /// entries in-place ignoring their relative order. |
| Error LinuxKernelRewriter::rewriteStaticCalls() { |
| if (!StaticCallTableAddress || !StaticCallSection) |
| return Error::success(); |
| |
| for (auto &Entry : StaticCallEntries) { |
| if (!Entry.Function) |
| continue; |
| |
| BinaryFunction &BF = *Entry.Function; |
| if (!BC.shouldEmit(BF)) |
| continue; |
| |
| // Create a relocation against the label. |
| const uint64_t EntryOffset = StaticCallTableAddress - |
| StaticCallSection->getAddress() + |
| (Entry.ID - 1) * STATIC_CALL_ENTRY_SIZE; |
| StaticCallSection->addRelocation(EntryOffset, Entry.Label, |
| ELF::R_X86_64_PC32, /*Addend*/ 0); |
| } |
| |
| return Error::success(); |
| } |
| |
| /// Instructions that access user-space memory can cause page faults. These |
| /// faults will be handled by the kernel and execution will resume at the fixup |
| /// code location if the address was invalid. The kernel uses the exception |
| /// table to match the faulting instruction to its fixup. The table consists of |
| /// the following entries: |
| /// |
| /// struct exception_table_entry { |
| /// int insn; |
| /// int fixup; |
| /// int data; |
| /// }; |
| /// |
| /// More info at: |
| /// https://www.kernel.org/doc/Documentation/x86/exception-tables.txt |
| Error LinuxKernelRewriter::readExceptionTable() { |
| ExceptionsSection = BC.getUniqueSectionByName("__ex_table"); |
| if (!ExceptionsSection) |
| return Error::success(); |
| |
| if (ExceptionsSection->getSize() % EXCEPTION_TABLE_ENTRY_SIZE) |
| return createStringError(errc::executable_format_error, |
| "exception table size error"); |
| |
| AddressExtractor AE( |
| ExceptionsSection->getContents(), ExceptionsSection->getAddress(), |
| BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); |
| AddressExtractor::Cursor Cursor(0); |
| uint32_t EntryID = 0; |
| while (Cursor && Cursor.tell() < ExceptionsSection->getSize()) { |
| const uint64_t InstAddress = AE.getPCRelAddress32(Cursor); |
| const uint64_t FixupAddress = AE.getPCRelAddress32(Cursor); |
| const uint64_t Data = AE.getU32(Cursor); |
| |
| // Consume the status of the cursor. |
| if (!Cursor) |
| return createStringError( |
| errc::executable_format_error, |
| "out of bounds while reading exception table: %s", |
| toString(Cursor.takeError()).c_str()); |
| |
| ++EntryID; |
| |
| if (opts::DumpExceptions) { |
| BC.outs() << "Exception Entry: " << EntryID << '\n'; |
| BC.outs() << "\tInsn: 0x" << Twine::utohexstr(InstAddress) << '\n' |
| << "\tFixup: 0x" << Twine::utohexstr(FixupAddress) << '\n' |
| << "\tData: 0x" << Twine::utohexstr(Data) << '\n'; |
| } |
| |
| MCInst *Inst = nullptr; |
| MCSymbol *FixupLabel = nullptr; |
| |
| BinaryFunction *InstBF = BC.getBinaryFunctionContainingAddress(InstAddress); |
| if (InstBF && BC.shouldEmit(*InstBF)) { |
| Inst = InstBF->getInstructionAtOffset(InstAddress - InstBF->getAddress()); |
| if (!Inst) |
| return createStringError(errc::executable_format_error, |
| "no instruction at address 0x%" PRIx64 |
| " in exception table", |
| InstAddress); |
| BC.MIB->addAnnotation(*Inst, "ExceptionEntry", EntryID); |
| FunctionsWithExceptions.insert(InstBF); |
| } |
| |
| if (!InstBF && opts::Verbosity) { |
| BC.outs() << "BOLT-INFO: no function matches instruction at 0x" |
| << Twine::utohexstr(InstAddress) |
| << " referenced by Linux exception table\n"; |
| } |
| |
| BinaryFunction *FixupBF = |
| BC.getBinaryFunctionContainingAddress(FixupAddress); |
| if (FixupBF && BC.shouldEmit(*FixupBF)) { |
| const uint64_t Offset = FixupAddress - FixupBF->getAddress(); |
| if (!FixupBF->getInstructionAtOffset(Offset)) |
| return createStringError(errc::executable_format_error, |
| "no instruction at fixup address 0x%" PRIx64 |
| " in exception table", |
| FixupAddress); |
| FixupLabel = Offset ? FixupBF->addEntryPointAtOffset(Offset) |
| : FixupBF->getSymbol(); |
| if (Inst) |
| BC.MIB->addAnnotation(*Inst, "Fixup", FixupLabel->getName()); |
| FunctionsWithExceptions.insert(FixupBF); |
| } |
| |
| if (!FixupBF && opts::Verbosity) { |
| BC.outs() << "BOLT-INFO: no function matches fixup code at 0x" |
| << Twine::utohexstr(FixupAddress) |
| << " referenced by Linux exception table\n"; |
| } |
| } |
| |
| BC.outs() << "BOLT-INFO: parsed " |
| << ExceptionsSection->getSize() / EXCEPTION_TABLE_ENTRY_SIZE |
| << " exception table entries\n"; |
| |
| return Error::success(); |
| } |
| |
| /// Depending on the value of CONFIG_BUILDTIME_TABLE_SORT, the kernel expects |
| /// the exception table to be sorted. Hence we have to sort it after code |
| /// reordering. |
| Error LinuxKernelRewriter::rewriteExceptionTable() { |
| // Disable output of functions with exceptions before rewrite support is |
| // added. |
| for (BinaryFunction *BF : FunctionsWithExceptions) |
| BF->setSimple(false); |
| |
| return Error::success(); |
| } |
| |
| /// .parainsrtuctions section contains information for patching parvirtual call |
| /// instructions during runtime. The entries in the section are in the form: |
| /// |
| /// struct paravirt_patch_site { |
| /// u8 *instr; /* original instructions */ |
| /// u8 type; /* type of this instruction */ |
| /// u8 len; /* length of original instruction */ |
| /// }; |
| /// |
| /// Note that the structures are aligned at 8-byte boundary. |
| Error LinuxKernelRewriter::readParaInstructions() { |
| ParavirtualPatchSection = BC.getUniqueSectionByName(".parainstructions"); |
| if (!ParavirtualPatchSection) |
| return Error::success(); |
| |
| DataExtractor DE(ParavirtualPatchSection->getContents(), |
| BC.AsmInfo->isLittleEndian(), |
| BC.AsmInfo->getCodePointerSize()); |
| uint32_t EntryID = 0; |
| DataExtractor::Cursor Cursor(0); |
| while (Cursor && !DE.eof(Cursor)) { |
| const uint64_t NextOffset = alignTo(Cursor.tell(), Align(PARA_PATCH_ALIGN)); |
| if (!DE.isValidOffset(NextOffset)) |
| break; |
| |
| Cursor.seek(NextOffset); |
| |
| const uint64_t InstrLocation = DE.getU64(Cursor); |
| const uint8_t Type = DE.getU8(Cursor); |
| const uint8_t Len = DE.getU8(Cursor); |
| |
| if (!Cursor) |
| return createStringError( |
| errc::executable_format_error, |
| "out of bounds while reading .parainstructions: %s", |
| toString(Cursor.takeError()).c_str()); |
| |
| ++EntryID; |
| |
| if (opts::DumpParavirtualPatchSites) { |
| BC.outs() << "Paravirtual patch site: " << EntryID << '\n'; |
| BC.outs() << "\tInstr: 0x" << Twine::utohexstr(InstrLocation) |
| << "\n\tType: 0x" << Twine::utohexstr(Type) << "\n\tLen: 0x" |
| << Twine::utohexstr(Len) << '\n'; |
| } |
| |
| BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstrLocation); |
| if (!BF && opts::Verbosity) { |
| BC.outs() << "BOLT-INFO: no function matches address 0x" |
| << Twine::utohexstr(InstrLocation) |
| << " referenced by paravirutal patch site\n"; |
| } |
| |
| if (BF && BC.shouldEmit(*BF)) { |
| MCInst *Inst = |
| BF->getInstructionAtOffset(InstrLocation - BF->getAddress()); |
| if (!Inst) |
| return createStringError(errc::executable_format_error, |
| "no instruction at address 0x%" PRIx64 |
| " in paravirtual call site %d", |
| InstrLocation, EntryID); |
| BC.MIB->addAnnotation(*Inst, "ParaSite", EntryID); |
| } |
| } |
| |
| BC.outs() << "BOLT-INFO: parsed " << EntryID << " paravirtual patch sites\n"; |
| |
| return Error::success(); |
| } |
| |
| void LinuxKernelRewriter::skipFunctionsWithAnnotation( |
| StringRef Annotation) const { |
| for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { |
| if (!BC.shouldEmit(BF)) |
| continue; |
| for (const BinaryBasicBlock &BB : BF) { |
| const bool HasAnnotation = llvm::any_of(BB, [&](const MCInst &Inst) { |
| return BC.MIB->hasAnnotation(Inst, Annotation); |
| }); |
| if (HasAnnotation) { |
| BF.setSimple(false); |
| break; |
| } |
| } |
| } |
| } |
| |
| Error LinuxKernelRewriter::rewriteParaInstructions() { |
| // Disable output of functions with paravirtual instructions before the |
| // rewrite support is complete. |
| skipFunctionsWithAnnotation("ParaSite"); |
| |
| return Error::success(); |
| } |
| |
| /// Process __bug_table section. |
| /// This section contains information useful for kernel debugging, mostly |
| /// utilized by WARN()/WARN_ON() macros and deprecated BUG()/BUG_ON(). |
| /// |
| /// Each entry in the section is a struct bug_entry that contains a pointer to |
| /// the ud2 instruction corresponding to the bug, corresponding file name (both |
| /// pointers use PC relative offset addressing), line number, and flags. |
| /// The definition of the struct bug_entry can be found in |
| /// `include/asm-generic/bug.h`. The first entry in the struct is an instruction |
| /// address encoded as a PC-relative offset. In theory, it could be an absolute |
| /// address if CONFIG_GENERIC_BUG_RELATIVE_POINTERS is not set, but in practice |
| /// the kernel code relies on it being a relative offset on x86-64. |
| Error LinuxKernelRewriter::readBugTable() { |
| BugTableSection = BC.getUniqueSectionByName("__bug_table"); |
| if (!BugTableSection) |
| return Error::success(); |
| |
| if (BugTableSection->getSize() % BUG_TABLE_ENTRY_SIZE) |
| return createStringError(errc::executable_format_error, |
| "bug table size error"); |
| |
| AddressExtractor AE( |
| BugTableSection->getContents(), BugTableSection->getAddress(), |
| BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); |
| AddressExtractor::Cursor Cursor(0); |
| uint32_t EntryID = 0; |
| while (Cursor && Cursor.tell() < BugTableSection->getSize()) { |
| const uint64_t Pos = Cursor.tell(); |
| const uint64_t InstAddress = AE.getPCRelAddress32(Cursor); |
| Cursor.seek(Pos + BUG_TABLE_ENTRY_SIZE); |
| |
| if (!Cursor) |
| return createStringError(errc::executable_format_error, |
| "out of bounds while reading __bug_table: %s", |
| toString(Cursor.takeError()).c_str()); |
| |
| ++EntryID; |
| |
| BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(InstAddress); |
| if (!BF && opts::Verbosity) { |
| BC.outs() << "BOLT-INFO: no function matches address 0x" |
| << Twine::utohexstr(InstAddress) |
| << " referenced by bug table\n"; |
| } |
| |
| if (BF && BC.shouldEmit(*BF)) { |
| MCInst *Inst = BF->getInstructionAtOffset(InstAddress - BF->getAddress()); |
| if (!Inst) |
| return createStringError(errc::executable_format_error, |
| "no instruction at address 0x%" PRIx64 |
| " referenced by bug table entry %d", |
| InstAddress, EntryID); |
| BC.MIB->addAnnotation(*Inst, "BugEntry", EntryID); |
| |
| FunctionBugList[BF].push_back(EntryID); |
| } |
| } |
| |
| BC.outs() << "BOLT-INFO: parsed " << EntryID << " bug table entries\n"; |
| |
| return Error::success(); |
| } |
| |
| /// find_bug() uses linear search to match an address to an entry in the bug |
| /// table. Hence, there is no need to sort entries when rewriting the table. |
| /// When we need to erase an entry, we set its instruction address to zero. |
| Error LinuxKernelRewriter::rewriteBugTable() { |
| if (!BugTableSection) |
| return Error::success(); |
| |
| for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { |
| if (!BC.shouldEmit(BF)) |
| continue; |
| |
| if (!FunctionBugList.count(&BF)) |
| continue; |
| |
| // Bugs that will be emitted for this function. |
| DenseSet<uint32_t> EmittedIDs; |
| for (BinaryBasicBlock &BB : BF) { |
| for (MCInst &Inst : BB) { |
| if (!BC.MIB->hasAnnotation(Inst, "BugEntry")) |
| continue; |
| const uint32_t ID = BC.MIB->getAnnotationAs<uint32_t>(Inst, "BugEntry"); |
| EmittedIDs.insert(ID); |
| |
| // Create a relocation entry for this bug entry. |
| MCSymbol *Label = |
| BC.MIB->getOrCreateInstLabel(Inst, "__BUG_", BC.Ctx.get()); |
| const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE; |
| BugTableSection->addRelocation(EntryOffset, Label, ELF::R_X86_64_PC32, |
| /*Addend*/ 0); |
| } |
| } |
| |
| // Clear bug entries that were not emitted for this function, e.g. as a |
| // result of DCE, but setting their instruction address to zero. |
| for (const uint32_t ID : FunctionBugList[&BF]) { |
| if (!EmittedIDs.count(ID)) { |
| const uint64_t EntryOffset = (ID - 1) * BUG_TABLE_ENTRY_SIZE; |
| BugTableSection->addRelocation(EntryOffset, nullptr, ELF::R_X86_64_PC32, |
| /*Addend*/ 0); |
| } |
| } |
| } |
| |
| return Error::success(); |
| } |
| |
| /// The kernel can replace certain instruction sequences depending on hardware |
| /// it is running on and features specified during boot time. The information |
| /// about alternative instruction sequences is stored in .altinstructions |
| /// section. The format of entries in this section is defined in |
| /// arch/x86/include/asm/alternative.h: |
| /// |
| /// struct alt_instr { |
| /// s32 instr_offset; |
| /// s32 repl_offset; |
| /// uXX feature; |
| /// u8 instrlen; |
| /// u8 replacementlen; |
| /// u8 padlen; // present in older kernels |
| /// } __packed; |
| /// |
| /// Note that the structure is packed. |
| /// |
| /// Since the size of the "feature" field could be either u16 or u32, and |
| /// "padlen" presence is unknown, we attempt to parse .altinstructions section |
| /// using all possible combinations (four at this time). Since we validate the |
| /// contents of the section and its size, the detection works quite well. |
| /// Still, we leave the user the opportunity to specify these features on the |
| /// command line and skip the guesswork. |
| Error LinuxKernelRewriter::readAltInstructions() { |
| AltInstrSection = BC.getUniqueSectionByName(".altinstructions"); |
| if (!AltInstrSection) |
| return Error::success(); |
| |
| // Presence of "padlen" field. |
| std::vector<bool> PadLenVariants; |
| if (opts::AltInstHasPadLen.getNumOccurrences()) |
| PadLenVariants.push_back(opts::AltInstHasPadLen); |
| else |
| PadLenVariants = {false, true}; |
| |
| // Size (in bytes) variants of "feature" field. |
| std::vector<uint32_t> FeatureSizeVariants; |
| if (opts::AltInstFeatureSize.getNumOccurrences()) |
| FeatureSizeVariants.push_back(opts::AltInstFeatureSize); |
| else |
| FeatureSizeVariants = {2, 4}; |
| |
| for (bool AltInstHasPadLen : PadLenVariants) { |
| for (uint32_t AltInstFeatureSize : FeatureSizeVariants) { |
| LLVM_DEBUG({ |
| dbgs() << "BOLT-DEBUG: trying AltInstHasPadLen = " << AltInstHasPadLen |
| << "; AltInstFeatureSize = " << AltInstFeatureSize << ";\n"; |
| }); |
| if (Error E = tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen, |
| /*ParseOnly*/ true)) { |
| consumeError(std::move(E)); |
| continue; |
| } |
| |
| LLVM_DEBUG(dbgs() << "Matched .altinstructions format\n"); |
| |
| if (!opts::AltInstHasPadLen.getNumOccurrences()) |
| BC.outs() << "BOLT-INFO: setting --" << opts::AltInstHasPadLen.ArgStr |
| << '=' << AltInstHasPadLen << '\n'; |
| |
| if (!opts::AltInstFeatureSize.getNumOccurrences()) |
| BC.outs() << "BOLT-INFO: setting --" << opts::AltInstFeatureSize.ArgStr |
| << '=' << AltInstFeatureSize << '\n'; |
| |
| return tryReadAltInstructions(AltInstFeatureSize, AltInstHasPadLen, |
| /*ParseOnly*/ false); |
| } |
| } |
| |
| // We couldn't match the format. Read again to properly propagate the error |
| // to the user. |
| return tryReadAltInstructions(opts::AltInstFeatureSize, |
| opts::AltInstHasPadLen, /*ParseOnly*/ false); |
| } |
| |
| Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize, |
| bool AltInstHasPadLen, |
| bool ParseOnly) { |
| AddressExtractor AE( |
| AltInstrSection->getContents(), AltInstrSection->getAddress(), |
| BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); |
| AddressExtractor::Cursor Cursor(0); |
| uint64_t EntryID = 0; |
| while (Cursor && !AE.eof(Cursor)) { |
| const uint64_t OrgInstAddress = AE.getPCRelAddress32(Cursor); |
| const uint64_t AltInstAddress = AE.getPCRelAddress32(Cursor); |
| const uint64_t Feature = AE.getUnsigned(Cursor, AltInstFeatureSize); |
| const uint8_t OrgSize = AE.getU8(Cursor); |
| const uint8_t AltSize = AE.getU8(Cursor); |
| |
| // Older kernels may have the padlen field. |
| const uint8_t PadLen = AltInstHasPadLen ? AE.getU8(Cursor) : 0; |
| |
| if (!Cursor) |
| return createStringError( |
| errc::executable_format_error, |
| "out of bounds while reading .altinstructions: %s", |
| toString(Cursor.takeError()).c_str()); |
| |
| ++EntryID; |
| |
| if (opts::DumpAltInstructions) { |
| BC.outs() << "Alternative instruction entry: " << EntryID |
| << "\n\tOrg: 0x" << Twine::utohexstr(OrgInstAddress) |
| << "\n\tAlt: 0x" << Twine::utohexstr(AltInstAddress) |
| << "\n\tFeature: 0x" << Twine::utohexstr(Feature) |
| << "\n\tOrgSize: " << (int)OrgSize |
| << "\n\tAltSize: " << (int)AltSize << '\n'; |
| if (AltInstHasPadLen) |
| BC.outs() << "\tPadLen: " << (int)PadLen << '\n'; |
| } |
| |
| if (AltSize > OrgSize) |
| return createStringError(errc::executable_format_error, |
| "error reading .altinstructions"); |
| |
| BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(OrgInstAddress); |
| if (!BF && opts::Verbosity) { |
| BC.outs() << "BOLT-INFO: no function matches address 0x" |
| << Twine::utohexstr(OrgInstAddress) |
| << " of instruction from .altinstructions\n"; |
| } |
| |
| BinaryFunction *AltBF = |
| BC.getBinaryFunctionContainingAddress(AltInstAddress); |
| if (!ParseOnly && AltBF && BC.shouldEmit(*AltBF)) { |
| BC.errs() |
| << "BOLT-WARNING: alternative instruction sequence found in function " |
| << *AltBF << '\n'; |
| AltBF->setIgnored(); |
| } |
| |
| if (!BF || !BF->hasInstructions()) |
| continue; |
| |
| if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize()) |
| return createStringError(errc::executable_format_error, |
| "error reading .altinstructions"); |
| |
| MCInst *Inst = |
| BF->getInstructionAtOffset(OrgInstAddress - BF->getAddress()); |
| if (!Inst) |
| return createStringError(errc::executable_format_error, |
| "no instruction at address 0x%" PRIx64 |
| " referenced by .altinstructions entry %d", |
| OrgInstAddress, EntryID); |
| |
| if (ParseOnly) |
| continue; |
| |
| // There could be more than one alternative instruction sequences for the |
| // same original instruction. Annotate each alternative separately. |
| std::string AnnotationName = "AltInst"; |
| unsigned N = 2; |
| while (BC.MIB->hasAnnotation(*Inst, AnnotationName)) |
| AnnotationName = "AltInst" + std::to_string(N++); |
| |
| BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); |
| |
| // Annotate all instructions from the original sequence. Note that it's not |
| // the most efficient way to look for instructions in the address range, |
| // but since alternative instructions are uncommon, it will do for now. |
| for (uint32_t Offset = 1; Offset < OrgSize; ++Offset) { |
| Inst = BF->getInstructionAtOffset(OrgInstAddress + Offset - |
| BF->getAddress()); |
| if (Inst) |
| BC.MIB->addAnnotation(*Inst, AnnotationName, EntryID); |
| } |
| } |
| |
| if (!ParseOnly) |
| BC.outs() << "BOLT-INFO: parsed " << EntryID |
| << " alternative instruction entries\n"; |
| |
| return Error::success(); |
| } |
| |
| void LinuxKernelRewriter::processAltInstructionsPostCFG() { |
| // Disable optimization and output of functions with alt instructions before |
| // the rewrite support is complete. Alt instructions can modify the control |
| // flow, hence we may end up deleting seemingly unreachable code. |
| skipFunctionsWithAnnotation("AltInst"); |
| } |
| |
| /// When the Linux kernel needs to handle an error associated with a given PCI |
| /// device, it uses a table stored in .pci_fixup section to locate a fixup code |
| /// specific to the vendor and the problematic device. The section contains a |
| /// list of the following structures defined in include/linux/pci.h: |
| /// |
| /// struct pci_fixup { |
| /// u16 vendor; /* Or PCI_ANY_ID */ |
| /// u16 device; /* Or PCI_ANY_ID */ |
| /// u32 class; /* Or PCI_ANY_ID */ |
| /// unsigned int class_shift; /* should be 0, 8, 16 */ |
| /// int hook_offset; |
| /// }; |
| /// |
| /// Normally, the hook will point to a function start and we don't have to |
| /// update the pointer if we are not relocating functions. Hence, while reading |
| /// the table we validate this assumption. If a function has a fixup code in the |
| /// middle of its body, we issue a warning and ignore it. |
| Error LinuxKernelRewriter::readPCIFixupTable() { |
| PCIFixupSection = BC.getUniqueSectionByName(".pci_fixup"); |
| if (!PCIFixupSection) |
| return Error::success(); |
| |
| if (PCIFixupSection->getSize() % PCI_FIXUP_ENTRY_SIZE) |
| return createStringError(errc::executable_format_error, |
| "PCI fixup table size error"); |
| |
| AddressExtractor AE( |
| PCIFixupSection->getContents(), PCIFixupSection->getAddress(), |
| BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize()); |
| AddressExtractor::Cursor Cursor(0); |
| uint64_t EntryID = 0; |
| while (Cursor && !AE.eof(Cursor)) { |
| const uint16_t Vendor = AE.getU16(Cursor); |
| const uint16_t Device = AE.getU16(Cursor); |
| const uint32_t Class = AE.getU32(Cursor); |
| const uint32_t ClassShift = AE.getU32(Cursor); |
| const uint64_t HookAddress = AE.getPCRelAddress32(Cursor); |
| |
| if (!Cursor) |
| return createStringError(errc::executable_format_error, |
| "out of bounds while reading .pci_fixup: %s", |
| toString(Cursor.takeError()).c_str()); |
| |
| ++EntryID; |
| |
| if (opts::DumpPCIFixups) { |
| BC.outs() << "PCI fixup entry: " << EntryID << "\n\tVendor 0x" |
| << Twine::utohexstr(Vendor) << "\n\tDevice: 0x" |
| << Twine::utohexstr(Device) << "\n\tClass: 0x" |
| << Twine::utohexstr(Class) << "\n\tClassShift: 0x" |
| << Twine::utohexstr(ClassShift) << "\n\tHookAddress: 0x" |
| << Twine::utohexstr(HookAddress) << '\n'; |
| } |
| |
| BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(HookAddress); |
| if (!BF && opts::Verbosity) { |
| BC.outs() << "BOLT-INFO: no function matches address 0x" |
| << Twine::utohexstr(HookAddress) |
| << " of hook from .pci_fixup\n"; |
| } |
| |
| if (!BF || !BC.shouldEmit(*BF)) |
| continue; |
| |
| if (const uint64_t Offset = HookAddress - BF->getAddress()) { |
| BC.errs() << "BOLT-WARNING: PCI fixup detected in the middle of function " |
| << *BF << " at offset 0x" << Twine::utohexstr(Offset) << '\n'; |
| BF->setSimple(false); |
| } |
| } |
| |
| BC.outs() << "BOLT-INFO: parsed " << EntryID << " PCI fixup entries\n"; |
| |
| return Error::success(); |
| } |
| |
| /// Runtime code modification used by static keys is the most ubiquitous |
| /// self-modifying feature of the Linux kernel. The idea is to eliminate the |
| /// condition check and associated conditional jump on a hot path if that |
| /// condition (based on a boolean value of a static key) does not change often. |
| /// Whenever the condition changes, the kernel runtime modifies all code paths |
| /// associated with that key flipping the code between nop and (unconditional) |
| /// jump. The information about the code is stored in a static key jump table |
| /// and contains the list of entries of the following type from |
| /// include/linux/jump_label.h: |
| // |
| /// struct jump_entry { |
| /// s32 code; |
| /// s32 target; |
| /// long key; // key may be far away from the core kernel under KASLR |
| /// }; |
| /// |
| /// The list does not have to be stored in any sorted way, but it is sorted at |
| /// boot time (or module initialization time) first by "key" and then by "code". |
| /// jump_label_sort_entries() is responsible for sorting the table. |
| /// |
| /// The key in jump_entry structure uses lower two bits of the key address |
| /// (which itself is aligned) to store extra information. We are interested in |
| /// the lower bit which indicates if the key is likely to be set on the code |
| /// path associated with this jump_entry. |
| /// |
| /// static_key_{enable,disable}() functions modify the code based on key and |
| /// jump table entries. |
| /// |
| /// jump_label_update() updates all code entries for a given key. Batch mode is |
| /// used for x86. |
| /// |
| /// The actual patching happens in text_poke_bp_batch() that overrides the first |
| /// byte of the sequence with int3 before proceeding with actual code |
| /// replacement. |
| Error LinuxKernelRewriter::readStaticKeysJumpTable() { |
| const BinaryData *StaticKeysJumpTable = |
| BC.getBinaryDataByName("__start___jump_table"); |
| if (!StaticKeysJumpTable) |
| return Error::success(); |
| |
| StaticKeysJumpTableAddress = StaticKeysJumpTable->getAddress(); |
| |
| const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table"); |
| if (!Stop) |
| return createStringError(errc::executable_format_error, |
| "missing __stop___jump_table symbol"); |
| |
| ErrorOr<BinarySection &> ErrorOrSection = |
| BC.getSectionForAddress(StaticKeysJumpTableAddress); |
| if (!ErrorOrSection) |
| return createStringError(errc::executable_format_error, |
| "no section matching __start___jump_table"); |
| |
| StaticKeysJumpSection = *ErrorOrSection; |
| if (!StaticKeysJumpSection->containsAddress(Stop->getAddress() - 1)) |
| return createStringError(errc::executable_format_error, |
| "__stop___jump_table not in the same section " |
| "as __start___jump_table"); |
| |
| if ((Stop->getAddress() - StaticKeysJumpTableAddress) % |
| STATIC_KEYS_JUMP_ENTRY_SIZE) |
| return createStringError(errc::executable_format_error, |
| "static keys jump table size error"); |
| |
| const uint64_t SectionAddress = StaticKeysJumpSection->getAddress(); |
| AddressExtractor AE(StaticKeysJumpSection->getContents(), SectionAddress, |
| BC.AsmInfo->isLittleEndian(), |
| BC.AsmInfo->getCodePointerSize()); |
| AddressExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress); |
| uint32_t EntryID = 0; |
| while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { |
| const uint64_t JumpAddress = AE.getPCRelAddress32(Cursor); |
| const uint64_t TargetAddress = AE.getPCRelAddress32(Cursor); |
| const uint64_t KeyAddress = AE.getPCRelAddress64(Cursor); |
| |
| // Consume the status of the cursor. |
| if (!Cursor) |
| return createStringError( |
| errc::executable_format_error, |
| "out of bounds while reading static keys jump table: %s", |
| toString(Cursor.takeError()).c_str()); |
| |
| ++EntryID; |
| |
| JumpInfo.push_back(JumpInfoEntry()); |
| JumpInfoEntry &Info = JumpInfo.back(); |
| Info.Likely = KeyAddress & 1; |
| |
| if (opts::DumpStaticKeys) { |
| BC.outs() << "Static key jump entry: " << EntryID |
| << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress) |
| << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress) |
| << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) |
| << "\n\tIsLikely: " << Info.Likely << '\n'; |
| } |
| |
| BinaryFunction *BF = BC.getBinaryFunctionContainingAddress(JumpAddress); |
| if (!BF && opts::Verbosity) { |
| BC.outs() |
| << "BOLT-INFO: no function matches address 0x" |
| << Twine::utohexstr(JumpAddress) |
| << " of jump instruction referenced from static keys jump table\n"; |
| } |
| |
| if (!BF || !BC.shouldEmit(*BF)) |
| continue; |
| |
| MCInst *Inst = BF->getInstructionAtOffset(JumpAddress - BF->getAddress()); |
| if (!Inst) |
| return createStringError( |
| errc::executable_format_error, |
| "no instruction at static keys jump site address 0x%" PRIx64, |
| JumpAddress); |
| |
| if (!BF->containsAddress(TargetAddress)) |
| return createStringError( |
| errc::executable_format_error, |
| "invalid target of static keys jump at 0x%" PRIx64 " : 0x%" PRIx64, |
| JumpAddress, TargetAddress); |
| |
| const bool IsBranch = BC.MIB->isBranch(*Inst); |
| if (!IsBranch && !BC.MIB->isNoop(*Inst)) |
| return createStringError(errc::executable_format_error, |
| "jump or nop expected at address 0x%" PRIx64, |
| JumpAddress); |
| |
| const uint64_t Size = BC.computeInstructionSize(*Inst); |
| if (Size != 2 && Size != 5) { |
| return createStringError( |
| errc::executable_format_error, |
| "unexpected static keys jump size at address 0x%" PRIx64, |
| JumpAddress); |
| } |
| |
| MCSymbol *Target = BF->registerBranch(JumpAddress, TargetAddress); |
| MCInst StaticKeyBranch; |
| |
| // Create a conditional branch instruction. The actual conditional code type |
| // should not matter as long as it's a valid code. The instruction should be |
| // treated as a conditional branch for control-flow purposes. Before we emit |
| // the code, it will be converted to a different instruction in |
| // rewriteStaticKeysJumpTable(). |
| // |
| // NB: for older kernels, under LongJumpLabels option, we create long |
| // conditional branch to guarantee that code size estimation takes |
| // into account the extra bytes needed for long branch that will be used |
| // by the kernel patching code. Newer kernels can work with both short |
| // and long branches. The code for long conditional branch is larger |
| // than unconditional one, so we are pessimistic in our estimations. |
| if (opts::LongJumpLabels) |
| BC.MIB->createLongCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get()); |
| else |
| BC.MIB->createCondBranch(StaticKeyBranch, Target, 0, BC.Ctx.get()); |
| BC.MIB->moveAnnotations(std::move(*Inst), StaticKeyBranch); |
| BC.MIB->setDynamicBranch(StaticKeyBranch, EntryID); |
| *Inst = StaticKeyBranch; |
| |
| // IsBranch = InitialValue ^ LIKELY |
| // |
| // 0 0 0 |
| // 1 0 1 |
| // 1 1 0 |
| // 0 1 1 |
| // |
| // => InitialValue = IsBranch ^ LIKELY |
| Info.InitValue = IsBranch ^ Info.Likely; |
| |
| // Add annotations to facilitate manual code analysis. |
| BC.MIB->addAnnotation(*Inst, "Likely", Info.Likely); |
| BC.MIB->addAnnotation(*Inst, "InitValue", Info.InitValue); |
| if (!BC.MIB->getSize(*Inst)) |
| BC.MIB->setSize(*Inst, Size); |
| |
| if (!BC.MIB->getOffset(*Inst)) |
| BC.MIB->setOffset(*Inst, JumpAddress - BF->getAddress()); |
| |
| if (opts::LongJumpLabels) |
| BC.MIB->setSize(*Inst, 5); |
| } |
| |
| BC.outs() << "BOLT-INFO: parsed " << EntryID << " static keys jump entries\n"; |
| |
| return Error::success(); |
| } |
| |
| // Pre-emit pass. Convert dynamic branch instructions into jumps that could be |
| // relaxed. In post-emit pass we will convert those jumps into nops when |
| // necessary. We do the unconditional conversion into jumps so that the jumps |
| // can be relaxed and the optimal size of jump/nop instruction is selected. |
| Error LinuxKernelRewriter::rewriteStaticKeysJumpTable() { |
| if (!StaticKeysJumpSection) |
| return Error::success(); |
| |
| uint64_t NumShort = 0; |
| uint64_t NumLong = 0; |
| for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { |
| if (!BC.shouldEmit(BF)) |
| continue; |
| |
| for (BinaryBasicBlock &BB : BF) { |
| for (MCInst &Inst : BB) { |
| if (!BC.MIB->isDynamicBranch(Inst)) |
| continue; |
| |
| const uint32_t EntryID = *BC.MIB->getDynamicBranchID(Inst); |
| MCSymbol *Target = |
| const_cast<MCSymbol *>(BC.MIB->getTargetSymbol(Inst)); |
| assert(Target && "Target symbol should be set."); |
| |
| const JumpInfoEntry &Info = JumpInfo[EntryID - 1]; |
| const bool IsBranch = Info.Likely ^ Info.InitValue; |
| |
| uint32_t Size = *BC.MIB->getSize(Inst); |
| if (Size == 2) |
| ++NumShort; |
| else if (Size == 5) |
| ++NumLong; |
| else |
| llvm_unreachable("Wrong size for static keys jump instruction."); |
| |
| MCInst NewInst; |
| // Replace the instruction with unconditional jump even if it needs to |
| // be nop in the binary. |
| if (opts::LongJumpLabels) { |
| BC.MIB->createLongUncondBranch(NewInst, Target, BC.Ctx.get()); |
| } else { |
| // Newer kernels can handle short and long jumps for static keys. |
| // Optimistically, emit short jump and check if it gets relaxed into |
| // a long one during post-emit. Only then convert the jump to a nop. |
| BC.MIB->createUncondBranch(NewInst, Target, BC.Ctx.get()); |
| } |
| |
| BC.MIB->moveAnnotations(std::move(Inst), NewInst); |
| Inst = NewInst; |
| |
| // Mark the instruction for nop conversion. |
| if (!IsBranch) |
| NopIDs.insert(EntryID); |
| |
| MCSymbol *Label = |
| BC.MIB->getOrCreateInstLabel(Inst, "__SK_", BC.Ctx.get()); |
| |
| // Create a relocation against the label. |
| const uint64_t EntryOffset = StaticKeysJumpTableAddress - |
| StaticKeysJumpSection->getAddress() + |
| (EntryID - 1) * 16; |
| StaticKeysJumpSection->addRelocation(EntryOffset, Label, |
| ELF::R_X86_64_PC32, |
| /*Addend*/ 0); |
| StaticKeysJumpSection->addRelocation(EntryOffset + 4, Target, |
| ELF::R_X86_64_PC32, /*Addend*/ 0); |
| } |
| } |
| } |
| |
| BC.outs() << "BOLT-INFO: the input contains " << NumShort << " short and " |
| << NumLong << " long static keys jumps in optimized functions\n"; |
| |
| return Error::success(); |
| } |
| |
| // Post-emit pass of static keys jump section. Convert jumps to nops. |
| Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() { |
| if (!StaticKeysJumpSection || !StaticKeysJumpSection->isFinalized()) |
| return Error::success(); |
| |
| const uint64_t SectionAddress = StaticKeysJumpSection->getAddress(); |
| AddressExtractor AE(StaticKeysJumpSection->getOutputContents(), |
| SectionAddress, BC.AsmInfo->isLittleEndian(), |
| BC.AsmInfo->getCodePointerSize()); |
| AddressExtractor::Cursor Cursor(StaticKeysJumpTableAddress - SectionAddress); |
| const BinaryData *Stop = BC.getBinaryDataByName("__stop___jump_table"); |
| uint32_t EntryID = 0; |
| uint64_t NumShort = 0; |
| uint64_t NumLong = 0; |
| while (Cursor && Cursor.tell() < Stop->getAddress() - SectionAddress) { |
| const uint64_t JumpAddress = AE.getPCRelAddress32(Cursor); |
| const uint64_t TargetAddress = AE.getPCRelAddress32(Cursor); |
| const uint64_t KeyAddress = AE.getPCRelAddress64(Cursor); |
| |
| // Consume the status of the cursor. |
| if (!Cursor) |
| return createStringError(errc::executable_format_error, |
| "out of bounds while updating static keys: %s", |
| toString(Cursor.takeError()).c_str()); |
| |
| ++EntryID; |
| |
| LLVM_DEBUG({ |
| dbgs() << "\n\tJumpAddress: 0x" << Twine::utohexstr(JumpAddress) |
| << "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress) |
| << "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) << '\n'; |
| }); |
| (void)TargetAddress; |
| (void)KeyAddress; |
| |
| BinaryFunction *BF = |
| BC.getBinaryFunctionContainingAddress(JumpAddress, |
| /*CheckPastEnd*/ false, |
| /*UseMaxSize*/ true); |
| assert(BF && "Cannot get function for modified static key."); |
| |
| if (!BF->isEmitted()) |
| continue; |
| |
| // Disassemble instruction to collect stats even if nop-conversion is |
| // unnecessary. |
| MutableArrayRef<uint8_t> Contents = MutableArrayRef<uint8_t>( |
| reinterpret_cast<uint8_t *>(BF->getImageAddress()), BF->getImageSize()); |
| assert(Contents.size() && "Non-empty function image expected."); |
| |
| MCInst Inst; |
| uint64_t Size; |
| const uint64_t JumpOffset = JumpAddress - BF->getAddress(); |
| if (!BC.DisAsm->getInstruction(Inst, Size, Contents.slice(JumpOffset), 0, |
| nulls())) { |
| llvm_unreachable("Unable to disassemble jump instruction."); |
| } |
| assert(BC.MIB->isBranch(Inst) && "Branch instruction expected."); |
| |
| if (Size == 2) |
| ++NumShort; |
| else if (Size == 5) |
| ++NumLong; |
| else |
| llvm_unreachable("Unexpected size for static keys jump instruction."); |
| |
| // Check if we need to convert jump instruction into a nop. |
| if (!NopIDs.contains(EntryID)) |
| continue; |
| |
| SmallString<15> NopCode; |
| raw_svector_ostream VecOS(NopCode); |
| BC.MAB->writeNopData(VecOS, Size, BC.STI.get()); |
| for (uint64_t I = 0; I < Size; ++I) |
| Contents[JumpOffset + I] = NopCode[I]; |
| } |
| |
| BC.outs() << "BOLT-INFO: written " << NumShort << " short and " << NumLong |
| << " long static keys jumps in optimized functions\n"; |
| |
| return Error::success(); |
| } |
| |
| } // namespace |
| |
| std::unique_ptr<MetadataRewriter> |
| llvm::bolt::createLinuxKernelRewriter(BinaryContext &BC) { |
| return std::make_unique<LinuxKernelRewriter>(BC); |
| } |