1//===- ICF.cpp ------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ICF.h"
10#include "ConcatOutputSection.h"
11#include "Config.h"
12#include "InputSection.h"
13#include "SymbolTable.h"
14#include "Symbols.h"
15#include "UnwindInfoSection.h"
16
17#include "lld/Common/CommonLinkerContext.h"
18#include "llvm/Support/LEB128.h"
19#include "llvm/Support/Parallel.h"
20#include "llvm/Support/TimeProfiler.h"
21#include "llvm/Support/xxhash.h"
22
23#include <atomic>
24
25using namespace llvm;
26using namespace lld;
27using namespace lld::macho;
28
29static constexpr bool verboseDiagnostics = false;
30
31class ICF {
32public:
33  ICF(std::vector<ConcatInputSection *> &inputs);
34  void run();
35
36  using EqualsFn = bool (ICF::*)(const ConcatInputSection *,
37                                 const ConcatInputSection *);
38  void segregate(size_t begin, size_t end, EqualsFn);
39  size_t findBoundary(size_t begin, size_t end);
40  void forEachClassRange(size_t begin, size_t end,
41                         llvm::function_ref<void(size_t, size_t)> func);
42  void forEachClass(llvm::function_ref<void(size_t, size_t)> func);
43
44  bool equalsConstant(const ConcatInputSection *ia,
45                      const ConcatInputSection *ib);
46  bool equalsVariable(const ConcatInputSection *ia,
47                      const ConcatInputSection *ib);
48
49  // ICF needs a copy of the inputs vector because its equivalence-class
50  // segregation algorithm destroys the proper sequence.
51  std::vector<ConcatInputSection *> icfInputs;
52
53  unsigned icfPass = 0;
54  std::atomic<bool> icfRepeat{false};
55  std::atomic<uint64_t> equalsConstantCount{0};
56  std::atomic<uint64_t> equalsVariableCount{0};
57};
58
59ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
60  icfInputs.assign(inputs.begin(), inputs.end());
61}
62
63// ICF = Identical Code Folding
64//
65// We only fold __TEXT,__text, so this is really "code" folding, and not
66// "COMDAT" folding. String and scalar constant literals are deduplicated
67// elsewhere.
68//
69// Summary of segments & sections:
70//
71// The __TEXT segment is readonly at the MMU. Some sections are already
72// deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
73// synthetic and inherently free of duplicates (__TEXT,__stubs &
74// __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
75// because doing so induces many test failures.
76//
77// The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
78// thus ineligible for ICF.
79//
80// The __DATA_CONST segment is read/write at the MMU, but is logically const to
81// the application after dyld applies fixups to pointer data. We currently
82// fold only the __DATA_CONST,__cfstring section.
83//
84// The __DATA segment is read/write at the MMU, and as application-writeable
85// data, none of its sections are eligible for ICF.
86//
87// Please see the large block comment in lld/ELF/ICF.cpp for an explanation
88// of the segregation algorithm.
89//
90// FIXME(gkm): implement keep-unique attributes
91// FIXME(gkm): implement address-significance tables for MachO object files
92
93// Compare "non-moving" parts of two ConcatInputSections, namely everything
94// except references to other ConcatInputSections.
95bool ICF::equalsConstant(const ConcatInputSection *ia,
96                         const ConcatInputSection *ib) {
97  if (verboseDiagnostics)
98    ++equalsConstantCount;
99  // We can only fold within the same OutputSection.
100  if (ia->parent != ib->parent)
101    return false;
102  if (ia->data.size() != ib->data.size())
103    return false;
104  if (ia->data != ib->data)
105    return false;
106  if (ia->relocs.size() != ib->relocs.size())
107    return false;
108  auto f = [](const Reloc &ra, const Reloc &rb) {
109    if (ra.type != rb.type)
110      return false;
111    if (ra.pcrel != rb.pcrel)
112      return false;
113    if (ra.length != rb.length)
114      return false;
115    if (ra.offset != rb.offset)
116      return false;
117    if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())
118      return false;
119
120    InputSection *isecA, *isecB;
121
122    uint64_t valueA = 0;
123    uint64_t valueB = 0;
124    if (ra.referent.is<Symbol *>()) {
125      const auto *sa = ra.referent.get<Symbol *>();
126      const auto *sb = rb.referent.get<Symbol *>();
127      if (sa->kind() != sb->kind())
128        return false;
129      // ICF runs before Undefineds are treated (and potentially converted into
130      // DylibSymbols).
131      if (isa<DylibSymbol>(sa) || isa<Undefined>(sa))
132        return sa == sb && ra.addend == rb.addend;
133      assert(isa<Defined>(sa));
134      const auto *da = cast<Defined>(sa);
135      const auto *db = cast<Defined>(sb);
136      if (!da->isec || !db->isec) {
137        assert(da->isAbsolute() && db->isAbsolute());
138        return da->value + ra.addend == db->value + rb.addend;
139      }
140      isecA = da->isec;
141      valueA = da->value;
142      isecB = db->isec;
143      valueB = db->value;
144    } else {
145      isecA = ra.referent.get<InputSection *>();
146      isecB = rb.referent.get<InputSection *>();
147    }
148
149    if (isecA->parent != isecB->parent)
150      return false;
151    // Sections with identical parents should be of the same kind.
152    assert(isecA->kind() == isecB->kind());
153    // We will compare ConcatInputSection contents in equalsVariable.
154    if (isa<ConcatInputSection>(isecA))
155      return ra.addend == rb.addend;
156    // Else we have two literal sections. References to them are equal iff their
157    // offsets in the output section are equal.
158    if (ra.referent.is<Symbol *>())
159      // For symbol relocs, we compare the contents at the symbol address. We
160      // don't do `getOffset(value + addend)` because value + addend may not be
161      // a valid offset in the literal section.
162      return isecA->getOffset(valueA) == isecB->getOffset(valueB) &&
163             ra.addend == rb.addend;
164    else {
165      assert(valueA == 0 && valueB == 0);
166      // For section relocs, we compare the content at the section offset.
167      return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);
168    }
169  };
170  return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
171                    f);
172}
173
174// Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
175// handled by equalsConstant().
176bool ICF::equalsVariable(const ConcatInputSection *ia,
177                         const ConcatInputSection *ib) {
178  if (verboseDiagnostics)
179    ++equalsVariableCount;
180  assert(ia->relocs.size() == ib->relocs.size());
181  auto f = [this](const Reloc &ra, const Reloc &rb) {
182    // We already filtered out mismatching values/addends in equalsConstant.
183    if (ra.referent == rb.referent)
184      return true;
185    const ConcatInputSection *isecA, *isecB;
186    if (ra.referent.is<Symbol *>()) {
187      // Matching DylibSymbols are already filtered out by the
188      // identical-referent check above. Non-matching DylibSymbols were filtered
189      // out in equalsConstant(). So we can safely cast to Defined here.
190      const auto *da = cast<Defined>(ra.referent.get<Symbol *>());
191      const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
192      if (da->isAbsolute())
193        return true;
194      isecA = dyn_cast<ConcatInputSection>(da->isec);
195      if (!isecA)
196        return true; // literal sections were checked in equalsConstant.
197      isecB = cast<ConcatInputSection>(db->isec);
198    } else {
199      const auto *sa = ra.referent.get<InputSection *>();
200      const auto *sb = rb.referent.get<InputSection *>();
201      isecA = dyn_cast<ConcatInputSection>(sa);
202      if (!isecA)
203        return true;
204      isecB = cast<ConcatInputSection>(sb);
205    }
206    return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
207  };
208  if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))
209    return false;
210
211  // If there are symbols with associated unwind info, check that the unwind
212  // info matches. For simplicity, we only handle the case where there are only
213  // symbols at offset zero within the section (which is typically the case with
214  // .subsections_via_symbols.)
215  auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; };
216  const auto *itA = llvm::find_if(ia->symbols, hasUnwind);
217  const auto *itB = llvm::find_if(ib->symbols, hasUnwind);
218  if (itA == ia->symbols.end())
219    return itB == ib->symbols.end();
220  if (itB == ib->symbols.end())
221    return false;
222  const Defined *da = *itA;
223  const Defined *db = *itB;
224  if (da->unwindEntry->icfEqClass[icfPass % 2] !=
225          db->unwindEntry->icfEqClass[icfPass % 2] ||
226      da->value != 0 || db->value != 0)
227    return false;
228  auto isZero = [](Defined *d) { return d->value == 0; };
229  return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
230             ia->symbols.end() &&
231         std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
232             ib->symbols.end();
233}
234
235// Find the first InputSection after BEGIN whose equivalence class differs
236size_t ICF::findBoundary(size_t begin, size_t end) {
237  uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
238  for (size_t i = begin + 1; i < end; ++i)
239    if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
240      return i;
241  return end;
242}
243
244// Invoke FUNC on subranges with matching equivalence class
245void ICF::forEachClassRange(size_t begin, size_t end,
246                            llvm::function_ref<void(size_t, size_t)> func) {
247  while (begin < end) {
248    size_t mid = findBoundary(begin, end);
249    func(begin, mid);
250    begin = mid;
251  }
252}
253
254// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
255// with matching equivalence class
256void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
257  // Only use threads when the benefits outweigh the overhead.
258  const size_t threadingThreshold = 1024;
259  if (icfInputs.size() < threadingThreshold) {
260    forEachClassRange(0, icfInputs.size(), func);
261    ++icfPass;
262    return;
263  }
264
265  // Shard into non-overlapping intervals, and call FUNC in parallel.  The
266  // sharding must be completed before any calls to FUNC are made so that FUNC
267  // can modify the InputSection in its shard without causing data races.
268  const size_t shards = 256;
269  size_t step = icfInputs.size() / shards;
270  size_t boundaries[shards + 1];
271  boundaries[0] = 0;
272  boundaries[shards] = icfInputs.size();
273  parallelFor(1, shards, [&](size_t i) {
274    boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
275  });
276  parallelFor(1, shards + 1, [&](size_t i) {
277    if (boundaries[i - 1] < boundaries[i]) {
278      forEachClassRange(boundaries[i - 1], boundaries[i], func);
279    }
280  });
281  ++icfPass;
282}
283
284void ICF::run() {
285  // Into each origin-section hash, combine all reloc referent section hashes.
286  for (icfPass = 0; icfPass < 2; ++icfPass) {
287    parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
288      uint32_t hash = isec->icfEqClass[icfPass % 2];
289      for (const Reloc &r : isec->relocs) {
290        if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
291          if (auto *defined = dyn_cast<Defined>(sym)) {
292            if (defined->isec) {
293              if (auto *referentIsec =
294                      dyn_cast<ConcatInputSection>(defined->isec))
295                hash += defined->value + referentIsec->icfEqClass[icfPass % 2];
296              else
297                hash += defined->isec->kind() +
298                        defined->isec->getOffset(defined->value);
299            } else {
300              hash += defined->value;
301            }
302          } else {
303            // ICF runs before Undefined diags
304            assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym));
305          }
306        }
307      }
308      // Set MSB to 1 to avoid collisions with non-hashed classes.
309      isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31);
310    });
311  }
312
313  llvm::stable_sort(
314      icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
315        return a->icfEqClass[0] < b->icfEqClass[0];
316      });
317  forEachClass([&](size_t begin, size_t end) {
318    segregate(begin, end, &ICF::equalsConstant);
319  });
320
321  // Split equivalence groups by comparing relocations until convergence
322  do {
323    icfRepeat = false;
324    forEachClass([&](size_t begin, size_t end) {
325      segregate(begin, end, &ICF::equalsVariable);
326    });
327  } while (icfRepeat);
328  log("ICF needed " + Twine(icfPass) + " iterations");
329  if (verboseDiagnostics) {
330    log("equalsConstant() called " + Twine(equalsConstantCount) + " times");
331    log("equalsVariable() called " + Twine(equalsVariableCount) + " times");
332  }
333
334  // Fold sections within equivalence classes
335  forEachClass([&](size_t begin, size_t end) {
336    if (end - begin < 2)
337      return;
338    ConcatInputSection *beginIsec = icfInputs[begin];
339    for (size_t i = begin + 1; i < end; ++i)
340      beginIsec->foldIdentical(icfInputs[i]);
341  });
342}
343
344// Split an equivalence class into smaller classes.
345void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
346  while (begin < end) {
347    // Divide [begin, end) into two. Let mid be the start index of the
348    // second group.
349    auto bound = std::stable_partition(
350        icfInputs.begin() + begin + 1, icfInputs.begin() + end,
351        [&](ConcatInputSection *isec) {
352          return (this->*equals)(icfInputs[begin], isec);
353        });
354    size_t mid = bound - icfInputs.begin();
355
356    // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
357    // equivalence class ID because every group ends with a unique index.
358    for (size_t i = begin; i < mid; ++i)
359      icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;
360
361    // If we created a group, we need to iterate the main loop again.
362    if (mid != end)
363      icfRepeat = true;
364
365    begin = mid;
366  }
367}
368
369void macho::markSymAsAddrSig(Symbol *s) {
370  if (auto *d = dyn_cast_or_null<Defined>(s))
371    if (d->isec)
372      d->isec->keepUnique = true;
373}
374
375void macho::markAddrSigSymbols() {
376  TimeTraceScope timeScope("Mark addrsig symbols");
377  for (InputFile *file : inputFiles) {
378    ObjFile *obj = dyn_cast<ObjFile>(file);
379    if (!obj)
380      continue;
381
382    Section *addrSigSection = obj->addrSigSection;
383    if (!addrSigSection)
384      continue;
385    assert(addrSigSection->subsections.size() == 1);
386
387    const InputSection *isec = addrSigSection->subsections[0].isec;
388
389    for (const Reloc &r : isec->relocs) {
390      if (auto *sym = r.referent.dyn_cast<Symbol *>())
391        markSymAsAddrSig(sym);
392      else
393        error(toString(isec) + ": unexpected section relocation");
394    }
395  }
396}
397
398void macho::foldIdenticalSections(bool onlyCfStrings) {
399  TimeTraceScope timeScope("Fold Identical Code Sections");
400  // The ICF equivalence-class segregation algorithm relies on pre-computed
401  // hashes of InputSection::data for the ConcatOutputSection::inputs and all
402  // sections referenced by their relocs. We could recursively traverse the
403  // relocs to find every referenced InputSection, but that precludes easy
404  // parallelization. Therefore, we hash every InputSection here where we have
405  // them all accessible as simple vectors.
406
407  // If an InputSection is ineligible for ICF, we give it a unique ID to force
408  // it into an unfoldable singleton equivalence class.  Begin the unique-ID
409  // space at inputSections.size(), so that it will never intersect with
410  // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
411  // coexist with equivalence-class IDs, this is not necessary, but might help
412  // someone keep the numbers straight in case we ever need to debug the
413  // ICF::segregate()
414  std::vector<ConcatInputSection *> foldable;
415  uint64_t icfUniqueID = inputSections.size();
416  for (ConcatInputSection *isec : inputSections) {
417    bool isFoldableWithAddendsRemoved = isCfStringSection(isec) ||
418                                        isClassRefsSection(isec) ||
419                                        isSelRefsSection(isec);
420    // NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we
421    // can still fold it.
422    bool hasFoldableFlags = (isSelRefsSection(isec) ||
423                             sectionType(isec->getFlags()) == MachO::S_REGULAR);
424    // FIXME: consider non-code __text sections as foldable?
425    bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&
426                      (isCodeSection(isec) || isFoldableWithAddendsRemoved ||
427                       isGccExceptTabSection(isec)) &&
428                      !isec->keepUnique && !isec->hasAltEntry &&
429                      !isec->shouldOmitFromOutput() && hasFoldableFlags;
430    if (isFoldable) {
431      foldable.push_back(isec);
432      for (Defined *d : isec->symbols)
433        if (d->unwindEntry)
434          foldable.push_back(d->unwindEntry);
435
436      // Some sections have embedded addends that foil ICF's hashing / equality
437      // checks. (We can ignore embedded addends when doing ICF because the same
438      // information gets recorded in our Reloc structs.) We therefore create a
439      // mutable copy of the section data and zero out the embedded addends
440      // before performing any hashing / equality checks.
441      if (isFoldableWithAddendsRemoved) {
442        // We have to do this copying serially as the BumpPtrAllocator is not
443        // thread-safe. FIXME: Make a thread-safe allocator.
444        MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc());
445        for (const Reloc &r : isec->relocs)
446          target->relocateOne(copy.data() + r.offset, r, /*va=*/0,
447                              /*relocVA=*/0);
448        isec->data = copy;
449      }
450    } else if (!isEhFrameSection(isec)) {
451      // EH frames are gathered as foldables from unwindEntry above; give a
452      // unique ID to everything else.
453      isec->icfEqClass[0] = ++icfUniqueID;
454    }
455  }
456  parallelForEach(foldable, [](ConcatInputSection *isec) {
457    assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!
458    // Turn-on the top bit to guarantee that valid hashes have no collisions
459    // with the small-integer unique IDs for ICF-ineligible sections
460    isec->icfEqClass[0] = xxh3_64bits(isec->data) | (1ull << 31);
461  });
462  // Now that every input section is either hashed or marked as unique, run the
463  // segregation algorithm to detect foldable subsections.
464  ICF(foldable).run();
465}
466