1//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9///
10/// \file Converts from in-memory normalized mach-o to in-memory Atoms.
11///
12///                  +------------+
13///                  | normalized |
14///                  +------------+
15///                        |
16///                        |
17///                        v
18///                    +-------+
19///                    | Atoms |
20///                    +-------+
21
22#include "ArchHandler.h"
23#include "Atoms.h"
24#include "File.h"
25#include "MachONormalizedFile.h"
26#include "MachONormalizedFileBinaryUtils.h"
27#include "lld/Common/LLVM.h"
28#include "lld/Core/Error.h"
29#include "llvm/BinaryFormat/Dwarf.h"
30#include "llvm/BinaryFormat/MachO.h"
31#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
32#include "llvm/Support/DataExtractor.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/Error.h"
35#include "llvm/Support/Format.h"
36#include "llvm/Support/LEB128.h"
37#include "llvm/Support/raw_ostream.h"
38
39using namespace llvm::MachO;
40using namespace lld::mach_o::normalized;
41
42#define DEBUG_TYPE "normalized-file-to-atoms"
43
44namespace lld {
45namespace mach_o {
46
47
48namespace { // anonymous
49
50
51#define ENTRY(seg, sect, type, atomType) \
52  {seg, sect, type, DefinedAtom::atomType }
53
54struct MachORelocatableSectionToAtomType {
55  StringRef                 segmentName;
56  StringRef                 sectionName;
57  SectionType               sectionType;
58  DefinedAtom::ContentType  atomType;
59};
60
61const MachORelocatableSectionToAtomType sectsToAtomType[] = {
62  ENTRY("__TEXT", "__text",           S_REGULAR,          typeCode),
63  ENTRY("__TEXT", "__text",           S_REGULAR,          typeResolver),
64  ENTRY("__TEXT", "__cstring",        S_CSTRING_LITERALS, typeCString),
65  ENTRY("",       "",                 S_CSTRING_LITERALS, typeCString),
66  ENTRY("__TEXT", "__ustring",        S_REGULAR,          typeUTF16String),
67  ENTRY("__TEXT", "__const",          S_REGULAR,          typeConstant),
68  ENTRY("__TEXT", "__const_coal",     S_COALESCED,        typeConstant),
69  ENTRY("__TEXT", "__eh_frame",       S_COALESCED,        typeCFI),
70  ENTRY("__TEXT", "__eh_frame",       S_REGULAR,          typeCFI),
71  ENTRY("__TEXT", "__literal4",       S_4BYTE_LITERALS,   typeLiteral4),
72  ENTRY("__TEXT", "__literal8",       S_8BYTE_LITERALS,   typeLiteral8),
73  ENTRY("__TEXT", "__literal16",      S_16BYTE_LITERALS,  typeLiteral16),
74  ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR,          typeLSDA),
75  ENTRY("__DATA", "__data",           S_REGULAR,          typeData),
76  ENTRY("__DATA", "__datacoal_nt",    S_COALESCED,        typeData),
77  ENTRY("__DATA", "__const",          S_REGULAR,          typeConstData),
78  ENTRY("__DATA", "__cfstring",       S_REGULAR,          typeCFString),
79  ENTRY("__DATA", "__mod_init_func",  S_MOD_INIT_FUNC_POINTERS,
80                                                          typeInitializerPtr),
81  ENTRY("__DATA", "__mod_term_func",  S_MOD_TERM_FUNC_POINTERS,
82                                                          typeTerminatorPtr),
83  ENTRY("__DATA", "__got",            S_NON_LAZY_SYMBOL_POINTERS,
84                                                          typeGOT),
85  ENTRY("__DATA", "__bss",            S_ZEROFILL,         typeZeroFill),
86  ENTRY("",       "",                 S_NON_LAZY_SYMBOL_POINTERS,
87                                                          typeGOT),
88  ENTRY("__DATA", "__interposing",    S_INTERPOSING,      typeInterposingTuples),
89  ENTRY("__DATA", "__thread_vars",    S_THREAD_LOCAL_VARIABLES,
90                                                          typeThunkTLV),
91  ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
92  ENTRY("__DATA", "__thread_bss",     S_THREAD_LOCAL_ZEROFILL,
93                                                        typeTLVInitialZeroFill),
94  ENTRY("__DATA", "__objc_imageinfo", S_REGULAR,          typeObjCImageInfo),
95  ENTRY("__DATA", "__objc_catlist",   S_REGULAR,          typeObjC2CategoryList),
96  ENTRY("",       "",                 S_INTERPOSING,      typeInterposingTuples),
97  ENTRY("__LD",   "__compact_unwind", S_REGULAR,
98                                                         typeCompactUnwindInfo),
99  ENTRY("",       "",                 S_REGULAR,          typeUnknown)
100};
101#undef ENTRY
102
103
104/// Figures out ContentType of a mach-o section.
105DefinedAtom::ContentType atomTypeFromSection(const Section &section,
106                                             bool &customSectionName) {
107  // First look for match of name and type. Empty names in table are wildcards.
108  customSectionName = false;
109  for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
110                                 p->atomType != DefinedAtom::typeUnknown; ++p) {
111    if (p->sectionType != section.type)
112      continue;
113    if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty())
114      continue;
115    if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty())
116      continue;
117    customSectionName = p->segmentName.empty() && p->sectionName.empty();
118    return p->atomType;
119  }
120  // Look for code denoted by section attributes
121  if (section.attributes & S_ATTR_PURE_INSTRUCTIONS)
122    return DefinedAtom::typeCode;
123
124  return DefinedAtom::typeUnknown;
125}
126
127enum AtomizeModel {
128  atomizeAtSymbols,
129  atomizeFixedSize,
130  atomizePointerSize,
131  atomizeUTF8,
132  atomizeUTF16,
133  atomizeCFI,
134  atomizeCU,
135  atomizeCFString
136};
137
138/// Returns info on how to atomize a section of the specified ContentType.
139void sectionParseInfo(DefinedAtom::ContentType atomType,
140                      unsigned int &sizeMultiple,
141                      DefinedAtom::Scope &scope,
142                      DefinedAtom::Merge &merge,
143                      AtomizeModel &atomizeModel) {
144  struct ParseInfo {
145    DefinedAtom::ContentType  atomType;
146    unsigned int              sizeMultiple;
147    DefinedAtom::Scope        scope;
148    DefinedAtom::Merge        merge;
149    AtomizeModel              atomizeModel;
150  };
151
152  #define ENTRY(type, size, scope, merge, model) \
153    {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
154
155  static const ParseInfo parseInfo[] = {
156    ENTRY(typeCode,              1, scopeGlobal,          mergeNo,
157                                                            atomizeAtSymbols),
158    ENTRY(typeData,              1, scopeGlobal,          mergeNo,
159                                                            atomizeAtSymbols),
160    ENTRY(typeConstData,         1, scopeGlobal,          mergeNo,
161                                                            atomizeAtSymbols),
162    ENTRY(typeZeroFill,          1, scopeGlobal,          mergeNo,
163                                                            atomizeAtSymbols),
164    ENTRY(typeConstant,          1, scopeGlobal,          mergeNo,
165                                                            atomizeAtSymbols),
166    ENTRY(typeCString,           1, scopeLinkageUnit,     mergeByContent,
167                                                            atomizeUTF8),
168    ENTRY(typeUTF16String,       1, scopeLinkageUnit,     mergeByContent,
169                                                            atomizeUTF16),
170    ENTRY(typeCFI,               4, scopeTranslationUnit, mergeNo,
171                                                            atomizeCFI),
172    ENTRY(typeLiteral4,          4, scopeLinkageUnit,     mergeByContent,
173                                                            atomizeFixedSize),
174    ENTRY(typeLiteral8,          8, scopeLinkageUnit,     mergeByContent,
175                                                            atomizeFixedSize),
176    ENTRY(typeLiteral16,        16, scopeLinkageUnit,     mergeByContent,
177                                                            atomizeFixedSize),
178    ENTRY(typeCFString,          4, scopeLinkageUnit,     mergeByContent,
179                                                            atomizeCFString),
180    ENTRY(typeInitializerPtr,    4, scopeTranslationUnit, mergeNo,
181                                                            atomizePointerSize),
182    ENTRY(typeTerminatorPtr,     4, scopeTranslationUnit, mergeNo,
183                                                            atomizePointerSize),
184    ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
185                                                            atomizeCU),
186    ENTRY(typeGOT,               4, scopeLinkageUnit,     mergeByContent,
187                                                            atomizePointerSize),
188    ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
189                                                            atomizePointerSize),
190    ENTRY(typeUnknown,           1, scopeGlobal,          mergeNo,
191                                                            atomizeAtSymbols)
192  };
193  #undef ENTRY
194  const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
195  for (int i=0; i < tableLen; ++i) {
196    if (parseInfo[i].atomType == atomType) {
197      sizeMultiple = parseInfo[i].sizeMultiple;
198      scope        = parseInfo[i].scope;
199      merge        = parseInfo[i].merge;
200      atomizeModel = parseInfo[i].atomizeModel;
201      return;
202    }
203  }
204
205  // Unknown type is atomized by symbols.
206  sizeMultiple = 1;
207  scope = DefinedAtom::scopeGlobal;
208  merge = DefinedAtom::mergeNo;
209  atomizeModel = atomizeAtSymbols;
210}
211
212
213Atom::Scope atomScope(uint8_t scope) {
214  switch (scope) {
215  case N_EXT:
216    return Atom::scopeGlobal;
217  case N_PEXT:
218  case N_PEXT | N_EXT:
219    return Atom::scopeLinkageUnit;
220  case 0:
221    return Atom::scopeTranslationUnit;
222  }
223  llvm_unreachable("unknown scope value!");
224}
225
226void appendSymbolsInSection(const std::vector<Symbol> &inSymbols,
227                            uint32_t sectionIndex,
228                            SmallVector<const Symbol *, 64> &outSyms) {
229  for (const Symbol &sym : inSymbols) {
230    // Only look at definition symbols.
231    if ((sym.type & N_TYPE) != N_SECT)
232      continue;
233    if (sym.sect != sectionIndex)
234      continue;
235    outSyms.push_back(&sym);
236  }
237}
238
239void atomFromSymbol(DefinedAtom::ContentType atomType, const Section &section,
240                    MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
241                    uint16_t symbolDescFlags, Atom::Scope symbolScope,
242                    uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
243  // Mach-O symbol table does have size in it. Instead the size is the
244  // difference between this and the next symbol.
245  uint64_t size = nextSymbolAddr - symbolAddr;
246  uint64_t offset = symbolAddr - section.address;
247  bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable;
248  if (isZeroFillSection(section.type)) {
249    file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
250                                noDeadStrip, copyRefs, &section);
251  } else {
252    DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
253                              ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo;
254    bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
255    if (atomType == DefinedAtom::typeUnknown) {
256      // Mach-O needs a segment and section name.  Concatenate those two
257      // with a / separator (e.g. "seg/sect") to fit into the lld model
258      // of just a section name.
259      std::string segSectName = section.segmentName.str()
260                                + "/" + section.sectionName.str();
261      file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
262                                         merge, thumb, noDeadStrip, offset,
263                                         size, segSectName, true, &section);
264    } else {
265      if ((atomType == lld::DefinedAtom::typeCode) &&
266          (symbolDescFlags & N_SYMBOL_RESOLVER)) {
267        atomType = lld::DefinedAtom::typeResolver;
268      }
269      file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
270                          offset, size, thumb, noDeadStrip, copyRefs, &section);
271    }
272  }
273}
274
275llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
276                                   const Section &section,
277                                   const NormalizedFile &normalizedFile,
278                                   MachOFile &file, bool scatterable,
279                                   bool copyRefs) {
280  // Find section's index.
281  uint32_t sectIndex = 1;
282  for (auto &sect : normalizedFile.sections) {
283    if (&sect == &section)
284      break;
285    ++sectIndex;
286  }
287
288  // Find all symbols in this section.
289  SmallVector<const Symbol *, 64> symbols;
290  appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
291  appendSymbolsInSection(normalizedFile.localSymbols,  sectIndex, symbols);
292
293  // Sort symbols.
294  std::sort(symbols.begin(), symbols.end(),
295            [](const Symbol *lhs, const Symbol *rhs) -> bool {
296              if (lhs == rhs)
297                return false;
298              // First by address.
299              uint64_t lhsAddr = lhs->value;
300              uint64_t rhsAddr = rhs->value;
301              if (lhsAddr != rhsAddr)
302                return lhsAddr < rhsAddr;
303               // If same address, one is an alias so sort by scope.
304              Atom::Scope lScope = atomScope(lhs->scope);
305              Atom::Scope rScope = atomScope(rhs->scope);
306              if (lScope != rScope)
307                return lScope < rScope;
308              // If same address and scope, see if one might be better as
309              // the alias.
310              bool lPrivate = (lhs->name.front() == 'l');
311              bool rPrivate = (rhs->name.front() == 'l');
312              if (lPrivate != rPrivate)
313                return lPrivate;
314              // If same address and scope, sort by name.
315              return lhs->name < rhs->name;
316            });
317
318  // Debug logging of symbols.
319  // for (const Symbol *sym : symbols)
320  //  llvm::errs() << "  sym: "
321  //    << llvm::format("0x%08llx ", (uint64_t)sym->value)
322  //    << ", " << sym->name << "\n";
323
324  // If section has no symbols and no content, there are no atoms.
325  if (symbols.empty() && section.content.empty())
326    return llvm::Error::success();
327
328  if (symbols.empty()) {
329    // Section has no symbols, put all content in one anonymous atom.
330    atomFromSymbol(atomType, section, file, section.address, StringRef(),
331                  0, Atom::scopeTranslationUnit,
332                  section.address + section.content.size(),
333                  scatterable, copyRefs);
334  }
335  else if (symbols.front()->value != section.address) {
336    // Section has anonymous content before first symbol.
337    atomFromSymbol(atomType, section, file, section.address, StringRef(),
338                   0, Atom::scopeTranslationUnit, symbols.front()->value,
339                   scatterable, copyRefs);
340  }
341
342  const Symbol *lastSym = nullptr;
343  for (const Symbol *sym : symbols) {
344    if (lastSym != nullptr) {
345      // Ignore any assembler added "ltmpNNN" symbol at start of section
346      // if there is another symbol at the start.
347      if ((lastSym->value != sym->value)
348          || lastSym->value != section.address
349          || !lastSym->name.startswith("ltmp")) {
350        atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
351                       lastSym->desc, atomScope(lastSym->scope), sym->value,
352                       scatterable, copyRefs);
353      }
354    }
355    lastSym = sym;
356  }
357  if (lastSym != nullptr) {
358    atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
359                   lastSym->desc, atomScope(lastSym->scope),
360                   section.address + section.content.size(),
361                   scatterable, copyRefs);
362  }
363
364  // If object built without .subsections_via_symbols, add reference chain.
365  if (!scatterable) {
366    MachODefinedAtom *prevAtom = nullptr;
367    file.eachAtomInSection(section,
368                           [&](MachODefinedAtom *atom, uint64_t offset)->void {
369      if (prevAtom)
370        prevAtom->addReference(Reference::KindNamespace::all,
371                               Reference::KindArch::all,
372                               Reference::kindLayoutAfter, 0, atom, 0);
373      prevAtom = atom;
374    });
375  }
376
377  return llvm::Error::success();
378}
379
380llvm::Error processSection(DefinedAtom::ContentType atomType,
381                           const Section &section,
382                           bool customSectionName,
383                           const NormalizedFile &normalizedFile,
384                           MachOFile &file, bool scatterable,
385                           bool copyRefs) {
386  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
387  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
388
389  // Get info on how to atomize section.
390  unsigned int       sizeMultiple;
391  DefinedAtom::Scope scope;
392  DefinedAtom::Merge merge;
393  AtomizeModel       atomizeModel;
394  sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
395
396  // Validate section size.
397  if ((section.content.size() % sizeMultiple) != 0)
398    return llvm::make_error<GenericError>(Twine("Section ")
399                                          + section.segmentName
400                                          + "/" + section.sectionName
401                                          + " has size ("
402                                          + Twine(section.content.size())
403                                          + ") which is not a multiple of "
404                                          + Twine(sizeMultiple));
405
406  if (atomizeModel == atomizeAtSymbols) {
407    // Break section up into atoms each with a fixed size.
408    return processSymboledSection(atomType, section, normalizedFile, file,
409                                  scatterable, copyRefs);
410  } else {
411    unsigned int size;
412    for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
413      switch (atomizeModel) {
414      case atomizeFixedSize:
415        // Break section up into atoms each with a fixed size.
416        size = sizeMultiple;
417        break;
418      case atomizePointerSize:
419        // Break section up into atoms each the size of a pointer.
420        size = is64 ? 8 : 4;
421        break;
422      case atomizeUTF8:
423        // Break section up into zero terminated c-strings.
424        size = 0;
425        for (unsigned int i = offset; i < e; ++i) {
426          if (section.content[i] == 0) {
427            size = i + 1 - offset;
428            break;
429          }
430        }
431        break;
432      case atomizeUTF16:
433        // Break section up into zero terminated UTF16 strings.
434        size = 0;
435        for (unsigned int i = offset; i < e; i += 2) {
436          if ((section.content[i] == 0) && (section.content[i + 1] == 0)) {
437            size = i + 2 - offset;
438            break;
439          }
440        }
441        break;
442      case atomizeCFI:
443        // Break section up into dwarf unwind CFIs (FDE or CIE).
444        size = read32(&section.content[offset], isBig) + 4;
445        if (offset+size > section.content.size()) {
446          return llvm::make_error<GenericError>(Twine("Section ")
447                                                + section.segmentName
448                                                + "/" + section.sectionName
449                                                + " is malformed.  Size of CFI "
450                                                "starting at offset ("
451                                                + Twine(offset)
452                                                + ") is past end of section.");
453        }
454        break;
455      case atomizeCU:
456        // Break section up into compact unwind entries.
457        size = is64 ? 32 : 20;
458        break;
459      case atomizeCFString:
460        // Break section up into NS/CFString objects.
461        size = is64 ? 32 : 16;
462        break;
463      case atomizeAtSymbols:
464        break;
465      }
466      if (size == 0) {
467        return llvm::make_error<GenericError>(Twine("Section ")
468                                              + section.segmentName
469                                              + "/" + section.sectionName
470                                              + " is malformed.  The last atom "
471                                              "is not zero terminated.");
472      }
473      if (customSectionName) {
474        // Mach-O needs a segment and section name.  Concatenate those two
475        // with a / separator (e.g. "seg/sect") to fit into the lld model
476        // of just a section name.
477        std::string segSectName = section.segmentName.str()
478                                  + "/" + section.sectionName.str();
479        file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
480                                           merge, false, false, offset,
481                                           size, segSectName, true, &section);
482      } else {
483        file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
484                            false, false, copyRefs, &section);
485      }
486      offset += size;
487    }
488  }
489  return llvm::Error::success();
490}
491
492const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
493                                          uint64_t address) {
494  for (const Section &s : normalizedFile.sections) {
495    uint64_t sAddr = s.address;
496    if ((sAddr <= address) && (address < sAddr+s.content.size())) {
497      return &s;
498    }
499  }
500  return nullptr;
501}
502
503const MachODefinedAtom *
504findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
505                        uint64_t addr, Reference::Addend &addend) {
506  const Section *sect = nullptr;
507  sect = findSectionCoveringAddress(normalizedFile, addr);
508  if (!sect)
509    return nullptr;
510
511  uint32_t offsetInTarget;
512  uint64_t offsetInSect = addr - sect->address;
513  auto atom =
514      file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
515  addend = offsetInTarget;
516  return atom;
517}
518
519// Walks all relocations for a section in a normalized .o file and
520// creates corresponding lld::Reference objects.
521llvm::Error convertRelocs(const Section &section,
522                          const NormalizedFile &normalizedFile,
523                          bool scatterable,
524                          MachOFile &file,
525                          ArchHandler &handler) {
526  // Utility function for ArchHandler to find atom by its address.
527  auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
528                         const lld::Atom **atom, Reference::Addend *addend)
529                         -> llvm::Error {
530    if (sectIndex > normalizedFile.sections.size())
531      return llvm::make_error<GenericError>(Twine("out of range section "
532                                     "index (") + Twine(sectIndex) + ")");
533    const Section *sect = nullptr;
534    if (sectIndex == 0) {
535      sect = findSectionCoveringAddress(normalizedFile, addr);
536      if (!sect)
537        return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
538                                       + ") is not in any section"));
539    } else {
540      sect = &normalizedFile.sections[sectIndex-1];
541    }
542    uint32_t offsetInTarget;
543    uint64_t offsetInSect = addr - sect->address;
544    *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
545    *addend = offsetInTarget;
546    return llvm::Error::success();
547  };
548
549  // Utility function for ArchHandler to find atom by its symbol index.
550  auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
551                           -> llvm::Error {
552    // Find symbol from index.
553    const Symbol *sym = nullptr;
554    uint32_t numStabs  = normalizedFile.stabsSymbols.size();
555    uint32_t numLocal  = normalizedFile.localSymbols.size();
556    uint32_t numGlobal = normalizedFile.globalSymbols.size();
557    uint32_t numUndef  = normalizedFile.undefinedSymbols.size();
558    assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?");
559    if (symbolIndex < numStabs+numLocal) {
560      sym = &normalizedFile.localSymbols[symbolIndex-numStabs];
561    } else if (symbolIndex < numStabs+numLocal+numGlobal) {
562      sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal];
563    } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) {
564      sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal-
565                                             numGlobal];
566    } else {
567      return llvm::make_error<GenericError>(Twine("symbol index (")
568                                     + Twine(symbolIndex) + ") out of range");
569    }
570
571    // Find atom from symbol.
572    if ((sym->type & N_TYPE) == N_SECT) {
573      if (sym->sect > normalizedFile.sections.size())
574        return llvm::make_error<GenericError>(Twine("symbol section index (")
575                                        + Twine(sym->sect) + ") out of range ");
576      const Section &symSection = normalizedFile.sections[sym->sect-1];
577      uint64_t targetOffsetInSect = sym->value - symSection.address;
578      MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
579                                                            targetOffsetInSect);
580      if (target) {
581        *result = target;
582        return llvm::Error::success();
583      }
584      return llvm::make_error<GenericError>("no atom found for defined symbol");
585    } else if ((sym->type & N_TYPE) == N_UNDF) {
586      const lld::Atom *target = file.findUndefAtom(sym->name);
587      if (target) {
588        *result = target;
589        return llvm::Error::success();
590      }
591      return llvm::make_error<GenericError>("no undefined atom found for sym");
592    } else {
593      // Search undefs
594      return llvm::make_error<GenericError>("no atom found for symbol");
595    }
596  };
597
598  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
599  // Use old-school iterator so that paired relocations can be grouped.
600  for (auto it=section.relocations.begin(), e=section.relocations.end();
601                                                                it != e; ++it) {
602    const Relocation &reloc = *it;
603    // Find atom this relocation is in.
604    if (reloc.offset > section.content.size())
605      return llvm::make_error<GenericError>(
606                                    Twine("r_address (") + Twine(reloc.offset)
607                                    + ") is larger than section size ("
608                                    + Twine(section.content.size()) + ")");
609    uint32_t offsetInAtom;
610    MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
611                                                            reloc.offset,
612                                                            &offsetInAtom);
613    assert(inAtom && "r_address in range, should have found atom");
614    uint64_t fixupAddress = section.address + reloc.offset;
615
616    const lld::Atom *target = nullptr;
617    Reference::Addend addend = 0;
618    Reference::KindValue kind;
619    if (handler.isPairedReloc(reloc)) {
620      // Handle paired relocations together.
621      const Relocation &reloc2 = *++it;
622      auto relocErr = handler.getPairReferenceInfo(
623          reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
624          atomByAddr, atomBySymbol, &kind, &target, &addend);
625      if (relocErr) {
626        return handleErrors(std::move(relocErr),
627                            [&](std::unique_ptr<GenericError> GE) {
628          return llvm::make_error<GenericError>(
629            Twine("bad relocation (") + GE->getMessage()
630             + ") in section "
631             + section.segmentName + "/" + section.sectionName
632             + " (r1_address=" + Twine::utohexstr(reloc.offset)
633             + ", r1_type=" + Twine(reloc.type)
634             + ", r1_extern=" + Twine(reloc.isExtern)
635             + ", r1_length=" + Twine((int)reloc.length)
636             + ", r1_pcrel=" + Twine(reloc.pcRel)
637             + (!reloc.scattered ? (Twine(", r1_symbolnum=")
638                                    + Twine(reloc.symbol))
639                                 : (Twine(", r1_scattered=1, r1_value=")
640                                    + Twine(reloc.value)))
641             + ")"
642             + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
643             + ", r2_type=" + Twine(reloc2.type)
644             + ", r2_extern=" + Twine(reloc2.isExtern)
645             + ", r2_length=" + Twine((int)reloc2.length)
646             + ", r2_pcrel=" + Twine(reloc2.pcRel)
647             + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
648                                     + Twine(reloc2.symbol))
649                                  : (Twine(", r2_scattered=1, r2_value=")
650                                     + Twine(reloc2.value)))
651             + ")" );
652          });
653      }
654    }
655    else {
656      // Use ArchHandler to convert relocation record into information
657      // needed to instantiate an lld::Reference object.
658      auto relocErr = handler.getReferenceInfo(
659          reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
660          atomBySymbol, &kind, &target, &addend);
661      if (relocErr) {
662        return handleErrors(std::move(relocErr),
663                            [&](std::unique_ptr<GenericError> GE) {
664          return llvm::make_error<GenericError>(
665            Twine("bad relocation (") + GE->getMessage()
666             + ") in section "
667             + section.segmentName + "/" + section.sectionName
668             + " (r_address=" + Twine::utohexstr(reloc.offset)
669             + ", r_type=" + Twine(reloc.type)
670             + ", r_extern=" + Twine(reloc.isExtern)
671             + ", r_length=" + Twine((int)reloc.length)
672             + ", r_pcrel=" + Twine(reloc.pcRel)
673             + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
674                                 : (Twine(", r_scattered=1, r_value=")
675                                    + Twine(reloc.value)))
676             + ")" );
677          });
678      }
679    }
680    // Instantiate an lld::Reference object and add to its atom.
681    inAtom->addReference(Reference::KindNamespace::mach_o,
682                         handler.kindArch(),
683                         kind, offsetInAtom, target, addend);
684  }
685
686  return llvm::Error::success();
687}
688
689bool isDebugInfoSection(const Section &section) {
690  if ((section.attributes & S_ATTR_DEBUG) == 0)
691    return false;
692  return section.segmentName.equals("__DWARF");
693}
694
695static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) {
696  std::string strName = name.str();
697  for (auto *atom : file.defined())
698    if (atom->name() == strName)
699      return atom;
700  return nullptr;
701}
702
703static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) {
704  char *strCopy = alloc.Allocate<char>(str.size() + 1);
705  memcpy(strCopy, str.data(), str.size());
706  strCopy[str.size()] = '\0';
707  return strCopy;
708}
709
710llvm::Error parseStabs(MachOFile &file,
711                       const NormalizedFile &normalizedFile,
712                       bool copyRefs) {
713
714  if (normalizedFile.stabsSymbols.empty())
715    return llvm::Error::success();
716
717  // FIXME: Kill this off when we can move to sane yaml parsing.
718  std::unique_ptr<BumpPtrAllocator> allocator;
719  if (copyRefs)
720    allocator = std::make_unique<BumpPtrAllocator>();
721
722  enum { start, inBeginEnd } state = start;
723
724  const Atom *currentAtom = nullptr;
725  uint64_t currentAtomAddress = 0;
726  StabsDebugInfo::StabsList stabsList;
727  for (const auto &stabSym : normalizedFile.stabsSymbols) {
728    Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc,
729              stabSym.value, stabSym.name);
730    switch (state) {
731    case start:
732      switch (static_cast<StabType>(stabSym.type)) {
733      case N_BNSYM:
734        state = inBeginEnd;
735        currentAtomAddress = stabSym.value;
736        Reference::Addend addend;
737        currentAtom = findAtomCoveringAddress(normalizedFile, file,
738                                              currentAtomAddress, addend);
739        if (addend != 0)
740          return llvm::make_error<GenericError>(
741                   "Non-zero addend for BNSYM '" + stabSym.name + "' in " +
742                   file.path());
743        if (currentAtom)
744          stab.atom = currentAtom;
745        else {
746          // FIXME: ld64 just issues a warning here - should we match that?
747          return llvm::make_error<GenericError>(
748                   "can't find atom for stabs BNSYM at " +
749                   Twine::utohexstr(stabSym.value) + " in " + file.path());
750        }
751        break;
752      case N_SO:
753      case N_OSO:
754        // Not associated with an atom, just copy.
755        if (copyRefs)
756          stab.str = copyDebugString(stabSym.name, *allocator);
757        else
758          stab.str = stabSym.name;
759        break;
760      case N_GSYM: {
761        auto colonIdx = stabSym.name.find(':');
762        if (colonIdx != StringRef::npos) {
763          StringRef name = stabSym.name.substr(0, colonIdx);
764          currentAtom = findDefinedAtomByName(file, "_" + name);
765          stab.atom = currentAtom;
766          if (copyRefs)
767            stab.str = copyDebugString(stabSym.name, *allocator);
768          else
769            stab.str = stabSym.name;
770        } else {
771          currentAtom = findDefinedAtomByName(file, stabSym.name);
772          stab.atom = currentAtom;
773          if (copyRefs)
774            stab.str = copyDebugString(stabSym.name, *allocator);
775          else
776            stab.str = stabSym.name;
777        }
778        if (stab.atom == nullptr)
779          return llvm::make_error<GenericError>(
780                   "can't find atom for N_GSYM stabs" + stabSym.name +
781                   " in " + file.path());
782        break;
783      }
784      case N_FUN:
785        return llvm::make_error<GenericError>(
786                 "old-style N_FUN stab '" + stabSym.name + "' unsupported");
787      default:
788        return llvm::make_error<GenericError>(
789                 "unrecognized stab symbol '" + stabSym.name + "'");
790      }
791      break;
792    case inBeginEnd:
793      stab.atom = currentAtom;
794      switch (static_cast<StabType>(stabSym.type)) {
795      case N_ENSYM:
796        state = start;
797        currentAtom = nullptr;
798        break;
799      case N_FUN:
800        // Just copy the string.
801        if (copyRefs)
802          stab.str = copyDebugString(stabSym.name, *allocator);
803        else
804          stab.str = stabSym.name;
805        break;
806      default:
807        return llvm::make_error<GenericError>(
808                 "unrecognized stab symbol '" + stabSym.name + "'");
809      }
810    }
811    llvm::dbgs() << "Adding to stabsList: " << stab << "\n";
812    stabsList.push_back(stab);
813  }
814
815  file.setDebugInfo(std::make_unique<StabsDebugInfo>(std::move(stabsList)));
816
817  // FIXME: Kill this off when we fix YAML memory ownership.
818  file.debugInfo()->setAllocator(std::move(allocator));
819
820  return llvm::Error::success();
821}
822
823static llvm::DataExtractor
824dataExtractorFromSection(const NormalizedFile &normalizedFile,
825                         const Section &S) {
826  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
827  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
828  StringRef SecData(reinterpret_cast<const char*>(S.content.data()),
829                    S.content.size());
830  return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4);
831}
832
833// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
834//        inspection" code if possible.
835static uint64_t getCUAbbrevOffset(llvm::DataExtractor abbrevData,
836                                  uint64_t abbrCode) {
837  uint64_t curCode;
838  uint64_t offset = 0;
839  while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) {
840    // Tag
841    abbrevData.getULEB128(&offset);
842    // DW_CHILDREN
843    abbrevData.getU8(&offset);
844    // Attributes
845    while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset))
846      ;
847  }
848  return offset;
849}
850
851// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
852//        inspection" code if possible.
853static Expected<const char *>
854getIndexedString(const NormalizedFile &normalizedFile,
855                 llvm::dwarf::Form form, llvm::DataExtractor infoData,
856                 uint64_t &infoOffset, const Section &stringsSection) {
857  if (form == llvm::dwarf::DW_FORM_string)
858   return infoData.getCStr(&infoOffset);
859  if (form != llvm::dwarf::DW_FORM_strp)
860    return llvm::make_error<GenericError>(
861        "string field encoded without DW_FORM_strp");
862  uint64_t stringOffset = infoData.getU32(&infoOffset);
863  llvm::DataExtractor stringsData =
864    dataExtractorFromSection(normalizedFile, stringsSection);
865  return stringsData.getCStr(&stringOffset);
866}
867
868// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
869//        inspection" code if possible.
870static llvm::Expected<TranslationUnitSource>
871readCompUnit(const NormalizedFile &normalizedFile,
872             const Section &info,
873             const Section &abbrev,
874             const Section &strings,
875             StringRef path) {
876  // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
877  //        inspection" code if possible.
878  uint64_t offset = 0;
879  llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32;
880  auto infoData = dataExtractorFromSection(normalizedFile, info);
881  uint32_t length = infoData.getU32(&offset);
882  if (length == llvm::dwarf::DW_LENGTH_DWARF64) {
883    Format = llvm::dwarf::DwarfFormat::DWARF64;
884    infoData.getU64(&offset);
885  }
886  else if (length >= llvm::dwarf::DW_LENGTH_lo_reserved)
887    return llvm::make_error<GenericError>("Malformed DWARF in " + path);
888
889  uint16_t version = infoData.getU16(&offset);
890
891  if (version < 2 || version > 4)
892    return llvm::make_error<GenericError>("Unsupported DWARF version in " +
893                                          path);
894
895  infoData.getU32(&offset); // Abbrev offset (should be zero)
896  uint8_t addrSize = infoData.getU8(&offset);
897
898  uint32_t abbrCode = infoData.getULEB128(&offset);
899  auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev);
900  uint64_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode);
901  uint64_t tag = abbrevData.getULEB128(&abbrevOffset);
902  if (tag != llvm::dwarf::DW_TAG_compile_unit)
903    return llvm::make_error<GenericError>("top level DIE is not a compile unit");
904  // DW_CHILDREN
905  abbrevData.getU8(&abbrevOffset);
906  uint32_t name;
907  llvm::dwarf::Form form;
908  llvm::dwarf::FormParams formParams = {version, addrSize, Format};
909  TranslationUnitSource tu;
910  while ((name = abbrevData.getULEB128(&abbrevOffset)) |
911         (form = static_cast<llvm::dwarf::Form>(
912             abbrevData.getULEB128(&abbrevOffset))) &&
913         (name != 0 || form != 0)) {
914    switch (name) {
915    case llvm::dwarf::DW_AT_name: {
916      if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
917                                        strings))
918          tu.name = *eName;
919      else
920        return eName.takeError();
921      break;
922    }
923    case llvm::dwarf::DW_AT_comp_dir: {
924      if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
925                                        strings))
926        tu.path = *eName;
927      else
928        return eName.takeError();
929      break;
930    }
931    default:
932      llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams);
933    }
934  }
935  return tu;
936}
937
938llvm::Error parseDebugInfo(MachOFile &file,
939                           const NormalizedFile &normalizedFile, bool copyRefs) {
940
941  // Find the interesting debug info sections.
942  const Section *debugInfo = nullptr;
943  const Section *debugAbbrev = nullptr;
944  const Section *debugStrings = nullptr;
945
946  for (auto &s : normalizedFile.sections) {
947    if (s.segmentName == "__DWARF") {
948      if (s.sectionName == "__debug_info")
949        debugInfo = &s;
950      else if (s.sectionName == "__debug_abbrev")
951        debugAbbrev = &s;
952      else if (s.sectionName == "__debug_str")
953        debugStrings = &s;
954    }
955  }
956
957  if (!debugInfo)
958    return parseStabs(file, normalizedFile, copyRefs);
959
960  if (debugInfo->content.size() == 0)
961    return llvm::Error::success();
962
963  if (debugInfo->content.size() < 12)
964    return llvm::make_error<GenericError>("Malformed __debug_info section in " +
965                                          file.path() + ": too small");
966
967  if (!debugAbbrev)
968    return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " +
969                                          file.path());
970
971  if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev,
972                                  *debugStrings, file.path())) {
973    // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML
974    //        memory ownership.
975    std::unique_ptr<BumpPtrAllocator> allocator;
976    if (copyRefs) {
977      allocator = std::make_unique<BumpPtrAllocator>();
978      tuOrErr->name = copyDebugString(tuOrErr->name, *allocator);
979      tuOrErr->path = copyDebugString(tuOrErr->path, *allocator);
980    }
981    file.setDebugInfo(std::make_unique<DwarfDebugInfo>(std::move(*tuOrErr)));
982    if (copyRefs)
983      file.debugInfo()->setAllocator(std::move(allocator));
984  } else
985    return tuOrErr.takeError();
986
987  return llvm::Error::success();
988}
989
990static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
991  if (is64)
992    return read64(addr, isBig);
993
994  int32_t res = read32(addr, isBig);
995  return res;
996}
997
998/// --- Augmentation String Processing ---
999
1000struct CIEInfo {
1001  bool _augmentationDataPresent = false;
1002  bool _mayHaveEH = false;
1003  uint32_t _offsetOfLSDA = ~0U;
1004  uint32_t _offsetOfPersonality = ~0U;
1005  uint32_t _offsetOfFDEPointerEncoding = ~0U;
1006  uint32_t _augmentationDataLength = ~0U;
1007};
1008
1009typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;
1010
1011static llvm::Error processAugmentationString(const uint8_t *augStr,
1012                                             CIEInfo &cieInfo,
1013                                             unsigned &len) {
1014
1015  if (augStr[0] == '\0') {
1016    len = 1;
1017    return llvm::Error::success();
1018  }
1019
1020  if (augStr[0] != 'z')
1021    return llvm::make_error<GenericError>("expected 'z' at start of "
1022                                          "augmentation string");
1023
1024  cieInfo._augmentationDataPresent = true;
1025  uint64_t idx = 1;
1026
1027  uint32_t offsetInAugmentationData = 0;
1028  while (augStr[idx] != '\0') {
1029    if (augStr[idx] == 'L') {
1030      cieInfo._offsetOfLSDA = offsetInAugmentationData;
1031      // This adds a single byte to the augmentation data.
1032      ++offsetInAugmentationData;
1033      ++idx;
1034      continue;
1035    }
1036    if (augStr[idx] == 'P') {
1037      cieInfo._offsetOfPersonality = offsetInAugmentationData;
1038      // This adds a single byte to the augmentation data for the encoding,
1039      // then a number of bytes for the pointer data.
1040      // FIXME: We are assuming 4 is correct here for the pointer size as we
1041      // always currently use delta32ToGOT.
1042      offsetInAugmentationData += 5;
1043      ++idx;
1044      continue;
1045    }
1046    if (augStr[idx] == 'R') {
1047      cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
1048      // This adds a single byte to the augmentation data.
1049      ++offsetInAugmentationData;
1050      ++idx;
1051      continue;
1052    }
1053    if (augStr[idx] == 'e') {
1054      if (augStr[idx + 1] != 'h')
1055        return llvm::make_error<GenericError>("expected 'eh' in "
1056                                              "augmentation string");
1057      cieInfo._mayHaveEH = true;
1058      idx += 2;
1059      continue;
1060    }
1061    ++idx;
1062  }
1063
1064  cieInfo._augmentationDataLength = offsetInAugmentationData;
1065
1066  len = idx + 1;
1067  return llvm::Error::success();
1068}
1069
1070static llvm::Error processCIE(const NormalizedFile &normalizedFile,
1071                              MachOFile &file,
1072                              mach_o::ArchHandler &handler,
1073                              const Section *ehFrameSection,
1074                              MachODefinedAtom *atom,
1075                              uint64_t offset,
1076                              CIEInfoMap &cieInfos) {
1077  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1078  const uint8_t *frameData = atom->rawContent().data();
1079
1080  CIEInfo cieInfo;
1081
1082  uint32_t size = read32(frameData, isBig);
1083  uint64_t cieIDField = size == 0xffffffffU
1084                          ? sizeof(uint32_t) + sizeof(uint64_t)
1085                          : sizeof(uint32_t);
1086  uint64_t versionField = cieIDField + sizeof(uint32_t);
1087  uint64_t augmentationStringField = versionField + sizeof(uint8_t);
1088
1089  unsigned augmentationStringLength = 0;
1090  if (auto err = processAugmentationString(frameData + augmentationStringField,
1091                                           cieInfo, augmentationStringLength))
1092    return err;
1093
1094  if (cieInfo._offsetOfPersonality != ~0U) {
1095    // If we have augmentation data for the personality function, then we may
1096    // need to implicitly generate its relocation.
1097
1098    // Parse the EH Data field which is pointer sized.
1099    uint64_t EHDataField = augmentationStringField + augmentationStringLength;
1100    const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1101    unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0);
1102
1103    // Parse Code Align Factor which is a ULEB128.
1104    uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
1105    unsigned lengthFieldSize = 0;
1106    llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);
1107
1108    // Parse Data Align Factor which is a SLEB128.
1109    uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
1110    llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);
1111
1112    // Parse Return Address Register which is a byte.
1113    uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;
1114
1115    // Parse the augmentation length which is a ULEB128.
1116    uint64_t AugmentationLengthField = ReturnAddressField + 1;
1117    uint64_t AugmentationLength =
1118      llvm::decodeULEB128(frameData + AugmentationLengthField,
1119                          &lengthFieldSize);
1120
1121    if (AugmentationLength != cieInfo._augmentationDataLength)
1122      return llvm::make_error<GenericError>("CIE augmentation data length "
1123                                            "mismatch");
1124
1125    // Get the start address of the augmentation data.
1126    uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;
1127
1128    // Parse the personality function from the augmentation data.
1129    uint64_t PersonalityField =
1130      AugmentationDataField + cieInfo._offsetOfPersonality;
1131
1132    // Parse the personality encoding.
1133    // FIXME: Verify that this is a 32-bit pcrel offset.
1134    uint64_t PersonalityFunctionField = PersonalityField + 1;
1135
1136    if (atom->begin() != atom->end()) {
1137      // If we have an explicit relocation, then make sure it matches this
1138      // offset as this is where we'd expect it to be applied to.
1139      DefinedAtom::reference_iterator CurrentRef = atom->begin();
1140      if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
1141        return llvm::make_error<GenericError>("CIE personality reloc at "
1142                                              "wrong offset");
1143
1144      if (++CurrentRef != atom->end())
1145        return llvm::make_error<GenericError>("CIE contains too many relocs");
1146    } else {
1147      // Implicitly generate the personality function reloc.  It's assumed to
1148      // be a delta32 offset to a GOT entry.
1149      // FIXME: Parse the encoding and check this.
1150      int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
1151      uint64_t funcAddress = ehFrameSection->address + offset +
1152                             PersonalityFunctionField;
1153      funcAddress += funcDelta;
1154
1155      const MachODefinedAtom *func = nullptr;
1156      Reference::Addend addend;
1157      func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
1158                                     addend);
1159      atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1160                         handler.unwindRefToPersonalityFunctionKind(),
1161                         PersonalityFunctionField, func, addend);
1162    }
1163  } else if (atom->begin() != atom->end()) {
1164    // Otherwise, we expect there to be no relocations in this atom as the only
1165    // relocation would have been to the personality function.
1166    return llvm::make_error<GenericError>("unexpected relocation in CIE");
1167  }
1168
1169
1170  cieInfos[atom] = std::move(cieInfo);
1171
1172  return llvm::Error::success();
1173}
1174
1175static llvm::Error processFDE(const NormalizedFile &normalizedFile,
1176                              MachOFile &file,
1177                              mach_o::ArchHandler &handler,
1178                              const Section *ehFrameSection,
1179                              MachODefinedAtom *atom,
1180                              uint64_t offset,
1181                              const CIEInfoMap &cieInfos) {
1182
1183  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1184  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1185
1186  // Compiler wasn't lazy and actually told us what it meant.
1187  // Unfortunately, the compiler may not have generated references for all of
1188  // [cie, func, lsda] and so we still need to parse the FDE and add references
1189  // for any the compiler didn't generate.
1190  if (atom->begin() != atom->end())
1191    atom->sortReferences();
1192
1193  DefinedAtom::reference_iterator CurrentRef = atom->begin();
1194
1195  // This helper returns the reference (if one exists) at the offset we are
1196  // currently processing.  It automatically increments the ref iterator if we
1197  // do return a ref, and throws an error if we pass over a ref without
1198  // comsuming it.
1199  auto currentRefGetter = [&CurrentRef,
1200                           &atom](uint64_t Offset)->const Reference* {
1201    // If there are no more refs found, then we are done.
1202    if (CurrentRef == atom->end())
1203      return nullptr;
1204
1205    const Reference *Ref = *CurrentRef;
1206
1207    // If we haven't reached the offset for this reference, then return that
1208    // we don't yet have a reference to process.
1209    if (Offset < Ref->offsetInAtom())
1210      return nullptr;
1211
1212    // If the offset is equal, then we want to process this ref.
1213    if (Offset == Ref->offsetInAtom()) {
1214      ++CurrentRef;
1215      return Ref;
1216    }
1217
1218    // The current ref is at an offset which is earlier than the current
1219    // offset, then we failed to consume it when we should have.  In this case
1220    // throw an error.
1221    llvm::report_fatal_error("Skipped reference when processing FDE");
1222  };
1223
1224  // Helper to either get the reference at this current location, and verify
1225  // that it is of the expected type, or add a reference of that type.
1226  // Returns the reference target.
1227  auto verifyOrAddReference = [&](uint64_t targetAddress,
1228                                  Reference::KindValue refKind,
1229                                  uint64_t refAddress,
1230                                  bool allowsAddend)->const Atom* {
1231    if (auto *ref = currentRefGetter(refAddress)) {
1232      // The compiler already emitted a relocation for the CIE ref.  This should
1233      // have been converted to the correct type of reference in
1234      // get[Pair]ReferenceInfo().
1235      assert(ref->kindValue() == refKind &&
1236             "Incorrect EHFrame reference kind");
1237      return ref->target();
1238    }
1239    Reference::Addend addend;
1240    auto *target = findAtomCoveringAddress(normalizedFile, file,
1241                                           targetAddress, addend);
1242    atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1243                       refKind, refAddress, target, addend);
1244
1245    if (!allowsAddend)
1246      assert(!addend && "EHFrame reference cannot have addend");
1247    return target;
1248  };
1249
1250  const uint8_t *startFrameData = atom->rawContent().data();
1251  const uint8_t *frameData = startFrameData;
1252
1253  uint32_t size = read32(frameData, isBig);
1254  uint64_t cieFieldInFDE = size == 0xffffffffU
1255    ? sizeof(uint32_t) + sizeof(uint64_t)
1256    : sizeof(uint32_t);
1257
1258  // Linker needs to fixup a reference from the FDE to its parent CIE (a
1259  // 32-bit byte offset backwards in the __eh_frame section).
1260  uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
1261  uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
1262  cieAddress -= cieDelta;
1263
1264  auto *cieRefTarget = verifyOrAddReference(cieAddress,
1265                                            handler.unwindRefToCIEKind(),
1266                                            cieFieldInFDE, false);
1267  const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
1268  assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
1269         "FDE's CIE field does not point at the start of a CIE.");
1270
1271  const CIEInfo &cieInfo = cieInfos.find(cie)->second;
1272
1273  // Linker needs to fixup reference from the FDE to the function it's
1274  // describing. FIXME: there are actually different ways to do this, and the
1275  // particular method used is specified in the CIE's augmentation fields
1276  // (hopefully)
1277  uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);
1278
1279  int64_t functionFromFDE = readSPtr(is64, isBig,
1280                                     frameData + rangeFieldInFDE);
1281  uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
1282  rangeStart += functionFromFDE;
1283
1284  verifyOrAddReference(rangeStart,
1285                       handler.unwindRefToFunctionKind(),
1286                       rangeFieldInFDE, true);
1287
1288  // Handle the augmentation data if there is any.
1289  if (cieInfo._augmentationDataPresent) {
1290    // First process the augmentation data length field.
1291    uint64_t augmentationDataLengthFieldInFDE =
1292      rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t));
1293    unsigned lengthFieldSize = 0;
1294    uint64_t augmentationDataLength =
1295      llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
1296                          &lengthFieldSize);
1297
1298    if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) {
1299
1300      // Look at the augmentation data field.
1301      uint64_t augmentationDataFieldInFDE =
1302        augmentationDataLengthFieldInFDE + lengthFieldSize;
1303
1304      int64_t lsdaFromFDE = readSPtr(is64, isBig,
1305                                     frameData + augmentationDataFieldInFDE);
1306      uint64_t lsdaStart =
1307        ehFrameSection->address + offset + augmentationDataFieldInFDE +
1308        lsdaFromFDE;
1309
1310      verifyOrAddReference(lsdaStart,
1311                           handler.unwindRefToFunctionKind(),
1312                           augmentationDataFieldInFDE, true);
1313    }
1314  }
1315
1316  return llvm::Error::success();
1317}
1318
1319llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
1320                                 MachOFile &file,
1321                                 mach_o::ArchHandler &handler) {
1322
1323  const Section *ehFrameSection = nullptr;
1324  for (auto &section : normalizedFile.sections)
1325    if (section.segmentName == "__TEXT" &&
1326        section.sectionName == "__eh_frame") {
1327      ehFrameSection = &section;
1328      break;
1329    }
1330
1331  // No __eh_frame so nothing to do.
1332  if (!ehFrameSection)
1333    return llvm::Error::success();
1334
1335  llvm::Error ehFrameErr = llvm::Error::success();
1336  CIEInfoMap cieInfos;
1337
1338  file.eachAtomInSection(*ehFrameSection,
1339                         [&](MachODefinedAtom *atom, uint64_t offset) -> void {
1340    assert(atom->contentType() == DefinedAtom::typeCFI);
1341
1342    // Bail out if we've encountered an error.
1343    if (ehFrameErr)
1344      return;
1345
1346    const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1347    if (ArchHandler::isDwarfCIE(isBig, atom))
1348      ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
1349                              atom, offset, cieInfos);
1350    else
1351      ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
1352                              atom, offset, cieInfos);
1353  });
1354
1355  return ehFrameErr;
1356}
1357
1358llvm::Error parseObjCImageInfo(const Section &sect,
1359                               const NormalizedFile &normalizedFile,
1360                               MachOFile &file) {
1361
1362  //	struct objc_image_info  {
1363  //		uint32_t	version;	// initially 0
1364  //		uint32_t	flags;
1365  //	};
1366
1367  ArrayRef<uint8_t> content = sect.content;
1368  if (content.size() != 8)
1369    return llvm::make_error<GenericError>(sect.segmentName + "/" +
1370                                          sect.sectionName +
1371                                          " in file " + file.path() +
1372                                          " should be 8 bytes in size");
1373
1374  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1375  uint32_t version = read32(content.data(), isBig);
1376  if (version)
1377    return llvm::make_error<GenericError>(sect.segmentName + "/" +
1378                                          sect.sectionName +
1379                                          " in file " + file.path() +
1380                                          " should have version=0");
1381
1382  uint32_t flags = read32(content.data() + 4, isBig);
1383  if (flags & (MachOLinkingContext::objc_supports_gc |
1384               MachOLinkingContext::objc_gc_only))
1385    return llvm::make_error<GenericError>(sect.segmentName + "/" +
1386                                          sect.sectionName +
1387                                          " in file " + file.path() +
1388                                          " uses GC.  This is not supported");
1389
1390  if (flags & MachOLinkingContext::objc_retainReleaseForSimulator)
1391    file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
1392  else
1393    file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);
1394
1395  file.setSwiftVersion((flags >> 8) & 0xFF);
1396
1397  return llvm::Error::success();
1398}
1399
1400/// Converts normalized mach-o file into an lld::File and lld::Atoms.
1401llvm::Expected<std::unique_ptr<lld::File>>
1402objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1403              bool copyRefs) {
1404  std::unique_ptr<MachOFile> file(new MachOFile(path));
1405  if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
1406    return std::move(ec);
1407  return std::unique_ptr<File>(std::move(file));
1408}
1409
1410llvm::Expected<std::unique_ptr<lld::File>>
1411dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1412             bool copyRefs) {
1413  // Instantiate SharedLibraryFile object.
1414  std::unique_ptr<MachODylibFile> file(new MachODylibFile(path));
1415  if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
1416    return std::move(ec);
1417  return std::unique_ptr<File>(std::move(file));
1418}
1419
1420} // anonymous namespace
1421
1422namespace normalized {
1423
1424static bool isObjCImageInfo(const Section &sect) {
1425  return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") ||
1426    (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo");
1427}
1428
1429llvm::Error
1430normalizedObjectToAtoms(MachOFile *file,
1431                        const NormalizedFile &normalizedFile,
1432                        bool copyRefs) {
1433  LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
1434                          << file->path() << "\n");
1435  bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);
1436
1437  // Create atoms from each section.
1438  for (auto &sect : normalizedFile.sections) {
1439
1440    // If this is a debug-info section parse it specially.
1441    if (isDebugInfoSection(sect))
1442      continue;
1443
1444    // If the file contains an objc_image_info struct, then we should parse the
1445    // ObjC flags and Swift version.
1446    if (isObjCImageInfo(sect)) {
1447      if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
1448        return ec;
1449      // We then skip adding atoms for this section as we use the ObjCPass to
1450      // re-emit this data after it has been aggregated for all files.
1451      continue;
1452    }
1453
1454    bool customSectionName;
1455    DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
1456                                                            customSectionName);
1457    if (auto ec =  processSection(atomType, sect, customSectionName,
1458                                  normalizedFile, *file, scatterable, copyRefs))
1459      return ec;
1460  }
1461  // Create atoms from undefined symbols.
1462  for (auto &sym : normalizedFile.undefinedSymbols) {
1463    // Undefined symbols with n_value != 0 are actually tentative definitions.
1464    if (sym.value == Hex64(0)) {
1465      file->addUndefinedAtom(sym.name, copyRefs);
1466    } else {
1467      file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
1468                                DefinedAtom::Alignment(1 << (sym.desc >> 8)),
1469                                copyRefs);
1470    }
1471  }
1472
1473  // Convert mach-o relocations to References
1474  std::unique_ptr<mach_o::ArchHandler> handler
1475                                     = ArchHandler::create(normalizedFile.arch);
1476  for (auto &sect : normalizedFile.sections) {
1477    if (isDebugInfoSection(sect))
1478      continue;
1479    if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable,
1480                                       *file, *handler))
1481      return ec;
1482  }
1483
1484  // Add additional arch-specific References
1485  file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void {
1486    handler->addAdditionalReferences(*atom);
1487  });
1488
1489  // Each __eh_frame section needs references to both __text (the function we're
1490  // providing unwind info for) and itself (FDE -> CIE). These aren't
1491  // represented in the relocations on some architectures, so we have to add
1492  // them back in manually there.
1493  if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
1494    return ec;
1495
1496  // Process mach-o data-in-code regions array. That information is encoded in
1497  // atoms as References at each transition point.
1498  unsigned nextIndex = 0;
1499  for (const DataInCode &entry : normalizedFile.dataInCode) {
1500    ++nextIndex;
1501    const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
1502    if (!s) {
1503      return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
1504                                                  + Twine(entry.offset)
1505                                                  + ") is not in any section"));
1506    }
1507    uint64_t offsetInSect = entry.offset - s->address;
1508    uint32_t offsetInAtom;
1509    MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
1510                                                           &offsetInAtom);
1511    if (offsetInAtom + entry.length > atom->size()) {
1512      return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
1513                                                  "(offset="
1514                                                  + Twine(entry.offset)
1515                                                  + ", length="
1516                                                  + Twine(entry.length)
1517                                                  + ") crosses atom boundary."));
1518    }
1519    // Add reference that marks start of data-in-code.
1520    atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1521                       handler->dataInCodeTransitionStart(*atom),
1522                       offsetInAtom, atom, entry.kind);
1523
1524    // Peek at next entry, if it starts where this one ends, skip ending ref.
1525    if (nextIndex < normalizedFile.dataInCode.size()) {
1526      const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
1527      if (nextEntry.offset == (entry.offset + entry.length))
1528        continue;
1529    }
1530
1531    // If data goes to end of function, skip ending ref.
1532    if ((offsetInAtom + entry.length) == atom->size())
1533      continue;
1534
1535    // Add reference that marks end of data-in-code.
1536    atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1537                       handler->dataInCodeTransitionEnd(*atom),
1538                       offsetInAtom+entry.length, atom, 0);
1539  }
1540
1541  // Cache some attributes on the file for use later.
1542  file->setFlags(normalizedFile.flags);
1543  file->setArch(normalizedFile.arch);
1544  file->setOS(normalizedFile.os);
1545  file->setMinVersion(normalizedFile.minOSverson);
1546  file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);
1547
1548  // Sort references in each atom to their canonical order.
1549  for (const DefinedAtom* defAtom : file->defined()) {
1550    reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
1551  }
1552
1553  if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs))
1554    return err;
1555
1556  return llvm::Error::success();
1557}
1558
1559llvm::Error
1560normalizedDylibToAtoms(MachODylibFile *file,
1561                       const NormalizedFile &normalizedFile,
1562                       bool copyRefs) {
1563  file->setInstallName(normalizedFile.installName);
1564  file->setCompatVersion(normalizedFile.compatVersion);
1565  file->setCurrentVersion(normalizedFile.currentVersion);
1566
1567  // Tell MachODylibFile object about all symbols it exports.
1568  if (!normalizedFile.exportInfo.empty()) {
1569    // If exports trie exists, use it instead of traditional symbol table.
1570    for (const Export &exp : normalizedFile.exportInfo) {
1571      bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1572      // StringRefs from export iterator are ephemeral, so force copy.
1573      file->addExportedSymbol(exp.name, weakDef, true);
1574    }
1575  } else {
1576    for (auto &sym : normalizedFile.globalSymbols) {
1577      assert((sym.scope & N_EXT) && "only expect external symbols here");
1578      bool weakDef = (sym.desc & N_WEAK_DEF);
1579      file->addExportedSymbol(sym.name, weakDef, copyRefs);
1580    }
1581  }
1582  // Tell MachODylibFile object about all dylibs it re-exports.
1583  for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
1584    if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
1585      file->addReExportedDylib(dep.path);
1586  }
1587  return llvm::Error::success();
1588}
1589
1590void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
1591                                          StringRef &segmentName,
1592                                          StringRef &sectionName,
1593                                          SectionType &sectionType,
1594                                          SectionAttr &sectionAttrs,
1595                                          bool &relocsToDefinedCanBeImplicit) {
1596
1597  for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
1598                                 p->atomType != DefinedAtom::typeUnknown; ++p) {
1599    if (p->atomType != atomType)
1600      continue;
1601    // Wild carded entries are ignored for reverse lookups.
1602    if (p->segmentName.empty() || p->sectionName.empty())
1603      continue;
1604    segmentName = p->segmentName;
1605    sectionName = p->sectionName;
1606    sectionType = p->sectionType;
1607    sectionAttrs = 0;
1608    relocsToDefinedCanBeImplicit = false;
1609    if (atomType == DefinedAtom::typeCode)
1610      sectionAttrs = S_ATTR_PURE_INSTRUCTIONS;
1611    if (atomType == DefinedAtom::typeCFI)
1612      relocsToDefinedCanBeImplicit = true;
1613    return;
1614  }
1615  llvm_unreachable("content type not yet supported");
1616}
1617
1618llvm::Expected<std::unique_ptr<lld::File>>
1619normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1620                  bool copyRefs) {
1621  switch (normalizedFile.fileType) {
1622  case MH_DYLIB:
1623  case MH_DYLIB_STUB:
1624    return dylibToAtoms(normalizedFile, path, copyRefs);
1625  case MH_OBJECT:
1626    return objectToAtoms(normalizedFile, path, copyRefs);
1627  default:
1628    llvm_unreachable("unhandled MachO file type!");
1629  }
1630}
1631
1632} // namespace normalized
1633} // namespace mach_o
1634} // namespace lld
1635