| //===- ICF.cpp ------------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "ICF.h" |
| #include "ConcatOutputSection.h" |
| #include "Config.h" |
| #include "InputSection.h" |
| #include "SymbolTable.h" |
| #include "Symbols.h" |
| #include "UnwindInfoSection.h" |
| |
| #include "lld/Common/CommonLinkerContext.h" |
| #include "llvm/Support/LEB128.h" |
| #include "llvm/Support/Parallel.h" |
| #include "llvm/Support/TimeProfiler.h" |
| #include "llvm/Support/xxhash.h" |
| |
| #include <atomic> |
| |
| using namespace llvm; |
| using namespace lld; |
| using namespace lld::macho; |
| |
| static constexpr bool verboseDiagnostics = false; |
| |
| class ICF { |
| public: |
| ICF(std::vector<ConcatInputSection *> &inputs); |
| void run(); |
| |
| using EqualsFn = bool (ICF::*)(const ConcatInputSection *, |
| const ConcatInputSection *); |
| void segregate(size_t begin, size_t end, EqualsFn); |
| size_t findBoundary(size_t begin, size_t end); |
| void forEachClassRange(size_t begin, size_t end, |
| llvm::function_ref<void(size_t, size_t)> func); |
| void forEachClass(llvm::function_ref<void(size_t, size_t)> func); |
| |
| bool equalsConstant(const ConcatInputSection *ia, |
| const ConcatInputSection *ib); |
| bool equalsVariable(const ConcatInputSection *ia, |
| const ConcatInputSection *ib); |
| |
| // ICF needs a copy of the inputs vector because its equivalence-class |
| // segregation algorithm destroys the proper sequence. |
| std::vector<ConcatInputSection *> icfInputs; |
| |
| unsigned icfPass = 0; |
| std::atomic<bool> icfRepeat{false}; |
| std::atomic<uint64_t> equalsConstantCount{0}; |
| std::atomic<uint64_t> equalsVariableCount{0}; |
| }; |
| |
| ICF::ICF(std::vector<ConcatInputSection *> &inputs) { |
| icfInputs.assign(inputs.begin(), inputs.end()); |
| } |
| |
| // ICF = Identical Code Folding |
| // |
| // We only fold __TEXT,__text, so this is really "code" folding, and not |
| // "COMDAT" folding. String and scalar constant literals are deduplicated |
| // elsewhere. |
| // |
| // Summary of segments & sections: |
| // |
| // The __TEXT segment is readonly at the MMU. Some sections are already |
| // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are |
| // synthetic and inherently free of duplicates (__TEXT,__stubs & |
| // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const, |
| // because doing so induces many test failures. |
| // |
| // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and |
| // thus ineligible for ICF. |
| // |
| // The __DATA_CONST segment is read/write at the MMU, but is logically const to |
| // the application after dyld applies fixups to pointer data. We currently |
| // fold only the __DATA_CONST,__cfstring section. |
| // |
| // The __DATA segment is read/write at the MMU, and as application-writeable |
| // data, none of its sections are eligible for ICF. |
| // |
| // Please see the large block comment in lld/ELF/ICF.cpp for an explanation |
| // of the segregation algorithm. |
| // |
| // FIXME(gkm): implement keep-unique attributes |
| // FIXME(gkm): implement address-significance tables for MachO object files |
| |
| // Compare "non-moving" parts of two ConcatInputSections, namely everything |
| // except references to other ConcatInputSections. |
| bool ICF::equalsConstant(const ConcatInputSection *ia, |
| const ConcatInputSection *ib) { |
| if (verboseDiagnostics) |
| ++equalsConstantCount; |
| // We can only fold within the same OutputSection. |
| if (ia->parent != ib->parent) |
| return false; |
| if (ia->data.size() != ib->data.size()) |
| return false; |
| if (ia->data != ib->data) |
| return false; |
| if (ia->relocs.size() != ib->relocs.size()) |
| return false; |
| auto f = [](const Reloc &ra, const Reloc &rb) { |
| if (ra.type != rb.type) |
| return false; |
| if (ra.pcrel != rb.pcrel) |
| return false; |
| if (ra.length != rb.length) |
| return false; |
| if (ra.offset != rb.offset) |
| return false; |
| if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>()) |
| return false; |
| |
| InputSection *isecA, *isecB; |
| |
| uint64_t valueA = 0; |
| uint64_t valueB = 0; |
| if (ra.referent.is<Symbol *>()) { |
| const auto *sa = ra.referent.get<Symbol *>(); |
| const auto *sb = rb.referent.get<Symbol *>(); |
| if (sa->kind() != sb->kind()) |
| return false; |
| // ICF runs before Undefineds are treated (and potentially converted into |
| // DylibSymbols). |
| if (isa<DylibSymbol>(sa) || isa<Undefined>(sa)) |
| return sa == sb && ra.addend == rb.addend; |
| assert(isa<Defined>(sa)); |
| const auto *da = cast<Defined>(sa); |
| const auto *db = cast<Defined>(sb); |
| if (!da->isec() || !db->isec()) { |
| assert(da->isAbsolute() && db->isAbsolute()); |
| return da->value + ra.addend == db->value + rb.addend; |
| } |
| isecA = da->isec(); |
| valueA = da->value; |
| isecB = db->isec(); |
| valueB = db->value; |
| } else { |
| isecA = ra.referent.get<InputSection *>(); |
| isecB = rb.referent.get<InputSection *>(); |
| } |
| |
| if (isecA->parent != isecB->parent) |
| return false; |
| // Sections with identical parents should be of the same kind. |
| assert(isecA->kind() == isecB->kind()); |
| // We will compare ConcatInputSection contents in equalsVariable. |
| if (isa<ConcatInputSection>(isecA)) |
| return ra.addend == rb.addend; |
| // Else we have two literal sections. References to them are equal iff their |
| // offsets in the output section are equal. |
| if (ra.referent.is<Symbol *>()) |
| // For symbol relocs, we compare the contents at the symbol address. We |
| // don't do `getOffset(value + addend)` because value + addend may not be |
| // a valid offset in the literal section. |
| return isecA->getOffset(valueA) == isecB->getOffset(valueB) && |
| ra.addend == rb.addend; |
| else { |
| assert(valueA == 0 && valueB == 0); |
| // For section relocs, we compare the content at the section offset. |
| return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend); |
| } |
| }; |
| return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), |
| f); |
| } |
| |
| // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not |
| // handled by equalsConstant(). |
| bool ICF::equalsVariable(const ConcatInputSection *ia, |
| const ConcatInputSection *ib) { |
| if (verboseDiagnostics) |
| ++equalsVariableCount; |
| assert(ia->relocs.size() == ib->relocs.size()); |
| auto f = [this](const Reloc &ra, const Reloc &rb) { |
| // We already filtered out mismatching values/addends in equalsConstant. |
| if (ra.referent == rb.referent) |
| return true; |
| const ConcatInputSection *isecA, *isecB; |
| if (ra.referent.is<Symbol *>()) { |
| // Matching DylibSymbols are already filtered out by the |
| // identical-referent check above. Non-matching DylibSymbols were filtered |
| // out in equalsConstant(). So we can safely cast to Defined here. |
| const auto *da = cast<Defined>(ra.referent.get<Symbol *>()); |
| const auto *db = cast<Defined>(rb.referent.get<Symbol *>()); |
| if (da->isAbsolute()) |
| return true; |
| isecA = dyn_cast<ConcatInputSection>(da->isec()); |
| if (!isecA) |
| return true; // literal sections were checked in equalsConstant. |
| isecB = cast<ConcatInputSection>(db->isec()); |
| } else { |
| const auto *sa = ra.referent.get<InputSection *>(); |
| const auto *sb = rb.referent.get<InputSection *>(); |
| isecA = dyn_cast<ConcatInputSection>(sa); |
| if (!isecA) |
| return true; |
| isecB = cast<ConcatInputSection>(sb); |
| } |
| return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2]; |
| }; |
| if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f)) |
| return false; |
| |
| // If there are symbols with associated unwind info, check that the unwind |
| // info matches. For simplicity, we only handle the case where there are only |
| // symbols at offset zero within the section (which is typically the case with |
| // .subsections_via_symbols.) |
| auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; }; |
| const auto *itA = llvm::find_if(ia->symbols, hasUnwind); |
| const auto *itB = llvm::find_if(ib->symbols, hasUnwind); |
| if (itA == ia->symbols.end()) |
| return itB == ib->symbols.end(); |
| if (itB == ib->symbols.end()) |
| return false; |
| const Defined *da = *itA; |
| const Defined *db = *itB; |
| if (da->unwindEntry()->icfEqClass[icfPass % 2] != |
| db->unwindEntry()->icfEqClass[icfPass % 2] || |
| da->value != 0 || db->value != 0) |
| return false; |
| auto isZero = [](Defined *d) { return d->value == 0; }; |
| return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) == |
| ia->symbols.end() && |
| std::find_if_not(std::next(itB), ib->symbols.end(), isZero) == |
| ib->symbols.end(); |
| } |
| |
| // Find the first InputSection after BEGIN whose equivalence class differs |
| size_t ICF::findBoundary(size_t begin, size_t end) { |
| uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2]; |
| for (size_t i = begin + 1; i < end; ++i) |
| if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2]) |
| return i; |
| return end; |
| } |
| |
| // Invoke FUNC on subranges with matching equivalence class |
| void ICF::forEachClassRange(size_t begin, size_t end, |
| llvm::function_ref<void(size_t, size_t)> func) { |
| while (begin < end) { |
| size_t mid = findBoundary(begin, end); |
| func(begin, mid); |
| begin = mid; |
| } |
| } |
| |
| // Split icfInputs into shards, then parallelize invocation of FUNC on subranges |
| // with matching equivalence class |
| void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) { |
| // Only use threads when the benefits outweigh the overhead. |
| const size_t threadingThreshold = 1024; |
| if (icfInputs.size() < threadingThreshold) { |
| forEachClassRange(0, icfInputs.size(), func); |
| ++icfPass; |
| return; |
| } |
| |
| // Shard into non-overlapping intervals, and call FUNC in parallel. The |
| // sharding must be completed before any calls to FUNC are made so that FUNC |
| // can modify the InputSection in its shard without causing data races. |
| const size_t shards = 256; |
| size_t step = icfInputs.size() / shards; |
| size_t boundaries[shards + 1]; |
| boundaries[0] = 0; |
| boundaries[shards] = icfInputs.size(); |
| parallelFor(1, shards, [&](size_t i) { |
| boundaries[i] = findBoundary((i - 1) * step, icfInputs.size()); |
| }); |
| parallelFor(1, shards + 1, [&](size_t i) { |
| if (boundaries[i - 1] < boundaries[i]) { |
| forEachClassRange(boundaries[i - 1], boundaries[i], func); |
| } |
| }); |
| ++icfPass; |
| } |
| |
| void ICF::run() { |
| // Into each origin-section hash, combine all reloc referent section hashes. |
| for (icfPass = 0; icfPass < 2; ++icfPass) { |
| parallelForEach(icfInputs, [&](ConcatInputSection *isec) { |
| uint32_t hash = isec->icfEqClass[icfPass % 2]; |
| for (const Reloc &r : isec->relocs) { |
| if (auto *sym = r.referent.dyn_cast<Symbol *>()) { |
| if (auto *defined = dyn_cast<Defined>(sym)) { |
| if (defined->isec()) { |
| if (auto *referentIsec = |
| dyn_cast<ConcatInputSection>(defined->isec())) |
| hash += defined->value + referentIsec->icfEqClass[icfPass % 2]; |
| else |
| hash += defined->isec()->kind() + |
| defined->isec()->getOffset(defined->value); |
| } else { |
| hash += defined->value; |
| } |
| } else { |
| // ICF runs before Undefined diags |
| assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym)); |
| } |
| } |
| } |
| // Set MSB to 1 to avoid collisions with non-hashed classes. |
| isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31); |
| }); |
| } |
| |
| llvm::stable_sort( |
| icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) { |
| return a->icfEqClass[0] < b->icfEqClass[0]; |
| }); |
| forEachClass([&](size_t begin, size_t end) { |
| segregate(begin, end, &ICF::equalsConstant); |
| }); |
| |
| // Split equivalence groups by comparing relocations until convergence |
| do { |
| icfRepeat = false; |
| forEachClass([&](size_t begin, size_t end) { |
| segregate(begin, end, &ICF::equalsVariable); |
| }); |
| } while (icfRepeat); |
| log("ICF needed " + Twine(icfPass) + " iterations"); |
| if (verboseDiagnostics) { |
| log("equalsConstant() called " + Twine(equalsConstantCount) + " times"); |
| log("equalsVariable() called " + Twine(equalsVariableCount) + " times"); |
| } |
| |
| // Fold sections within equivalence classes |
| forEachClass([&](size_t begin, size_t end) { |
| if (end - begin < 2) |
| return; |
| ConcatInputSection *beginIsec = icfInputs[begin]; |
| for (size_t i = begin + 1; i < end; ++i) |
| beginIsec->foldIdentical(icfInputs[i]); |
| }); |
| } |
| |
| // Split an equivalence class into smaller classes. |
| void ICF::segregate(size_t begin, size_t end, EqualsFn equals) { |
| while (begin < end) { |
| // Divide [begin, end) into two. Let mid be the start index of the |
| // second group. |
| auto bound = std::stable_partition( |
| icfInputs.begin() + begin + 1, icfInputs.begin() + end, |
| [&](ConcatInputSection *isec) { |
| return (this->*equals)(icfInputs[begin], isec); |
| }); |
| size_t mid = bound - icfInputs.begin(); |
| |
| // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an |
| // equivalence class ID because every group ends with a unique index. |
| for (size_t i = begin; i < mid; ++i) |
| icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid; |
| |
| // If we created a group, we need to iterate the main loop again. |
| if (mid != end) |
| icfRepeat = true; |
| |
| begin = mid; |
| } |
| } |
| |
| void macho::markSymAsAddrSig(Symbol *s) { |
| if (auto *d = dyn_cast_or_null<Defined>(s)) |
| if (d->isec()) |
| d->isec()->keepUnique = true; |
| } |
| |
| void macho::markAddrSigSymbols() { |
| TimeTraceScope timeScope("Mark addrsig symbols"); |
| for (InputFile *file : inputFiles) { |
| ObjFile *obj = dyn_cast<ObjFile>(file); |
| if (!obj) |
| continue; |
| |
| Section *addrSigSection = obj->addrSigSection; |
| if (!addrSigSection) |
| continue; |
| assert(addrSigSection->subsections.size() == 1); |
| |
| const InputSection *isec = addrSigSection->subsections[0].isec; |
| |
| for (const Reloc &r : isec->relocs) { |
| if (auto *sym = r.referent.dyn_cast<Symbol *>()) |
| markSymAsAddrSig(sym); |
| else |
| error(toString(isec) + ": unexpected section relocation"); |
| } |
| } |
| } |
| |
| void macho::foldIdenticalSections(bool onlyCfStrings) { |
| TimeTraceScope timeScope("Fold Identical Code Sections"); |
| // The ICF equivalence-class segregation algorithm relies on pre-computed |
| // hashes of InputSection::data for the ConcatOutputSection::inputs and all |
| // sections referenced by their relocs. We could recursively traverse the |
| // relocs to find every referenced InputSection, but that precludes easy |
| // parallelization. Therefore, we hash every InputSection here where we have |
| // them all accessible as simple vectors. |
| |
| // If an InputSection is ineligible for ICF, we give it a unique ID to force |
| // it into an unfoldable singleton equivalence class. Begin the unique-ID |
| // space at inputSections.size(), so that it will never intersect with |
| // equivalence-class IDs which begin at 0. Since hashes & unique IDs never |
| // coexist with equivalence-class IDs, this is not necessary, but might help |
| // someone keep the numbers straight in case we ever need to debug the |
| // ICF::segregate() |
| std::vector<ConcatInputSection *> foldable; |
| uint64_t icfUniqueID = inputSections.size(); |
| for (ConcatInputSection *isec : inputSections) { |
| bool isFoldableWithAddendsRemoved = isCfStringSection(isec) || |
| isClassRefsSection(isec) || |
| isSelRefsSection(isec); |
| // NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we |
| // can still fold it. |
| bool hasFoldableFlags = (isSelRefsSection(isec) || |
| sectionType(isec->getFlags()) == MachO::S_REGULAR); |
| // FIXME: consider non-code __text sections as foldable? |
| bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) && |
| (isCodeSection(isec) || isFoldableWithAddendsRemoved || |
| isGccExceptTabSection(isec)) && |
| !isec->keepUnique && !isec->hasAltEntry && |
| !isec->shouldOmitFromOutput() && hasFoldableFlags; |
| if (isFoldable) { |
| foldable.push_back(isec); |
| for (Defined *d : isec->symbols) |
| if (d->unwindEntry()) |
| foldable.push_back(d->unwindEntry()); |
| |
| // Some sections have embedded addends that foil ICF's hashing / equality |
| // checks. (We can ignore embedded addends when doing ICF because the same |
| // information gets recorded in our Reloc structs.) We therefore create a |
| // mutable copy of the section data and zero out the embedded addends |
| // before performing any hashing / equality checks. |
| if (isFoldableWithAddendsRemoved) { |
| // We have to do this copying serially as the BumpPtrAllocator is not |
| // thread-safe. FIXME: Make a thread-safe allocator. |
| MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc()); |
| for (const Reloc &r : isec->relocs) |
| target->relocateOne(copy.data() + r.offset, r, /*va=*/0, |
| /*relocVA=*/0); |
| isec->data = copy; |
| } |
| } else if (!isEhFrameSection(isec)) { |
| // EH frames are gathered as foldables from unwindEntry above; give a |
| // unique ID to everything else. |
| isec->icfEqClass[0] = ++icfUniqueID; |
| } |
| } |
| parallelForEach(foldable, [](ConcatInputSection *isec) { |
| assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID! |
| // Turn-on the top bit to guarantee that valid hashes have no collisions |
| // with the small-integer unique IDs for ICF-ineligible sections |
| isec->icfEqClass[0] = xxh3_64bits(isec->data) | (1ull << 31); |
| }); |
| // Now that every input section is either hashed or marked as unique, run the |
| // segregation algorithm to detect foldable subsections. |
| ICF(foldable).run(); |
| } |