1//===- lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9///
10/// \file Converts from in-memory Atoms to in-memory normalized mach-o.
11///
12///                  +------------+
13///                  | normalized |
14///                  +------------+
15///                        ^
16///                        |
17///                        |
18///                    +-------+
19///                    | Atoms |
20///                    +-------+
21
22#include "ArchHandler.h"
23#include "DebugInfo.h"
24#include "MachONormalizedFile.h"
25#include "MachONormalizedFileBinaryUtils.h"
26#include "lld/Common/LLVM.h"
27#include "lld/Core/Error.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/BinaryFormat/MachO.h"
31#include "llvm/Support/Casting.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/Format.h"
35#include <map>
36#include <system_error>
37#include <unordered_set>
38
39using llvm::StringRef;
40using llvm::isa;
41using namespace llvm::MachO;
42using namespace lld::mach_o::normalized;
43using namespace lld;
44
45namespace {
46
47struct AtomInfo {
48  const DefinedAtom  *atom;
49  uint64_t            offsetInSection;
50};
51
52struct SectionInfo {
53  SectionInfo(StringRef seg, StringRef sect, SectionType type,
54              const MachOLinkingContext &ctxt, uint32_t attr,
55              bool relocsToDefinedCanBeImplicit);
56
57  StringRef                 segmentName;
58  StringRef                 sectionName;
59  SectionType               type;
60  uint32_t                  attributes;
61  uint64_t                  address;
62  uint64_t                  size;
63  uint16_t                  alignment;
64
65  /// If this is set, the any relocs in this section which point to defined
66  /// addresses can be implicitly generated.  This is the case for the
67  /// __eh_frame section where references to the function can be implicit if the
68  /// function is defined.
69  bool                      relocsToDefinedCanBeImplicit;
70
71
72  std::vector<AtomInfo>     atomsAndOffsets;
73  uint32_t                  normalizedSectionIndex;
74  uint32_t                  finalSectionIndex;
75};
76
77SectionInfo::SectionInfo(StringRef sg, StringRef sct, SectionType t,
78                         const MachOLinkingContext &ctxt, uint32_t attrs,
79                         bool relocsToDefinedCanBeImplicit)
80 : segmentName(sg), sectionName(sct), type(t), attributes(attrs),
81                 address(0), size(0), alignment(1),
82                 relocsToDefinedCanBeImplicit(relocsToDefinedCanBeImplicit),
83                 normalizedSectionIndex(0), finalSectionIndex(0) {
84  uint16_t align = 1;
85  if (ctxt.sectionAligned(segmentName, sectionName, align)) {
86    alignment = align;
87  }
88}
89
90struct SegmentInfo {
91  SegmentInfo(StringRef name);
92
93  StringRef                  name;
94  uint64_t                   address;
95  uint64_t                   size;
96  uint32_t                   init_access;
97  uint32_t                   max_access;
98  std::vector<SectionInfo*>  sections;
99  uint32_t                   normalizedSegmentIndex;
100};
101
102SegmentInfo::SegmentInfo(StringRef n)
103 : name(n), address(0), size(0), init_access(0), max_access(0),
104   normalizedSegmentIndex(0) {
105}
106
107class Util {
108public:
109  Util(const MachOLinkingContext &ctxt)
110      : _ctx(ctxt), _archHandler(ctxt.archHandler()), _entryAtom(nullptr),
111        _hasTLVDescriptors(false), _subsectionsViaSymbols(true) {}
112  ~Util();
113
114  void      processDefinedAtoms(const lld::File &atomFile);
115  void      processAtomAttributes(const DefinedAtom *atom);
116  void      assignAtomToSection(const DefinedAtom *atom);
117  void      organizeSections();
118  void      assignAddressesToSections(const NormalizedFile &file);
119  uint32_t  fileFlags();
120  void      copySegmentInfo(NormalizedFile &file);
121  void      copySectionInfo(NormalizedFile &file);
122  void      updateSectionInfo(NormalizedFile &file);
123  void      buildAtomToAddressMap();
124  llvm::Error synthesizeDebugNotes(NormalizedFile &file);
125  llvm::Error addSymbols(const lld::File &atomFile, NormalizedFile &file);
126  void      addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file);
127  void      addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file);
128  void      addExportInfo(const lld::File &, NormalizedFile &file);
129  void      addSectionRelocs(const lld::File &, NormalizedFile &file);
130  void      addFunctionStarts(const lld::File &, NormalizedFile &file);
131  void      buildDataInCodeArray(const lld::File &, NormalizedFile &file);
132  void      addDependentDylibs(const lld::File &, NormalizedFile &file);
133  void      copyEntryPointAddress(NormalizedFile &file);
134  void      copySectionContent(NormalizedFile &file);
135
136  bool allSourceFilesHaveMinVersions() const {
137    return _allSourceFilesHaveMinVersions;
138  }
139
140  uint32_t minVersion() const {
141    return _minVersion;
142  }
143
144  LoadCommandType minVersionCommandType() const {
145    return _minVersionCommandType;
146  }
147
148private:
149  typedef std::map<DefinedAtom::ContentType, SectionInfo*> TypeToSection;
150  typedef llvm::DenseMap<const Atom*, uint64_t> AtomToAddress;
151
152  struct DylibInfo { int ordinal; bool hasWeak; bool hasNonWeak; };
153  typedef llvm::StringMap<DylibInfo> DylibPathToInfo;
154
155  SectionInfo *sectionForAtom(const DefinedAtom*);
156  SectionInfo *getRelocatableSection(DefinedAtom::ContentType type);
157  SectionInfo *getFinalSection(DefinedAtom::ContentType type);
158  void         appendAtom(SectionInfo *sect, const DefinedAtom *atom);
159  SegmentInfo *segmentForName(StringRef segName);
160  void         layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr);
161  void         layoutSectionsInTextSegment(size_t, SegmentInfo *, uint64_t &);
162  void         copySectionContent(SectionInfo *si, ContentBytes &content);
163  uint16_t     descBits(const DefinedAtom* atom);
164  int          dylibOrdinal(const SharedLibraryAtom *sa);
165  void         segIndexForSection(const SectionInfo *sect,
166                             uint8_t &segmentIndex, uint64_t &segmentStartAddr);
167  const Atom  *targetOfLazyPointer(const DefinedAtom *lpAtom);
168  const Atom  *targetOfStub(const DefinedAtom *stubAtom);
169  llvm::Error getSymbolTableRegion(const DefinedAtom* atom,
170                                   bool &inGlobalsRegion,
171                                   SymbolScope &symbolScope);
172  void         appendSection(SectionInfo *si, NormalizedFile &file);
173  uint32_t     sectionIndexForAtom(const Atom *atom);
174  void fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset,
175                           NormalizedFile &file);
176
177  typedef llvm::DenseMap<const Atom*, uint32_t> AtomToIndex;
178  struct AtomAndIndex { const Atom *atom; uint32_t index; SymbolScope scope; };
179  struct AtomSorter {
180    bool operator()(const AtomAndIndex &left, const AtomAndIndex &right);
181  };
182  struct SegmentSorter {
183    bool operator()(const SegmentInfo *left, const SegmentInfo *right);
184    static unsigned weight(const SegmentInfo *);
185  };
186  struct TextSectionSorter {
187    bool operator()(const SectionInfo *left, const SectionInfo *right);
188    static unsigned weight(const SectionInfo *);
189  };
190
191  const MachOLinkingContext &_ctx;
192  mach_o::ArchHandler          &_archHandler;
193  llvm::BumpPtrAllocator        _allocator;
194  std::vector<SectionInfo*>     _sectionInfos;
195  std::vector<SegmentInfo*>     _segmentInfos;
196  TypeToSection                 _sectionMap;
197  std::vector<SectionInfo*>     _customSections;
198  AtomToAddress                 _atomToAddress;
199  DylibPathToInfo               _dylibInfo;
200  const DefinedAtom            *_entryAtom;
201  AtomToIndex                   _atomToSymbolIndex;
202  std::vector<const Atom *>     _machHeaderAliasAtoms;
203  bool                          _hasTLVDescriptors;
204  bool                          _subsectionsViaSymbols;
205  bool                          _allSourceFilesHaveMinVersions = true;
206  LoadCommandType               _minVersionCommandType = (LoadCommandType)0;
207  uint32_t                      _minVersion = 0;
208  std::vector<lld::mach_o::Stab> _stabs;
209};
210
211Util::~Util() {
212  // The SectionInfo structs are BumpPtr allocated, but atomsAndOffsets needs
213  // to be deleted.
214  for (SectionInfo *si : _sectionInfos) {
215    // clear() destroys vector elements, but does not deallocate.
216    // Instead use swap() to deallocate vector buffer.
217    std::vector<AtomInfo> empty;
218    si->atomsAndOffsets.swap(empty);
219  }
220  // The SegmentInfo structs are BumpPtr allocated, but sections needs
221  // to be deleted.
222  for (SegmentInfo *sgi : _segmentInfos) {
223    std::vector<SectionInfo*> empty2;
224    sgi->sections.swap(empty2);
225  }
226}
227
228SectionInfo *Util::getRelocatableSection(DefinedAtom::ContentType type) {
229  StringRef segmentName;
230  StringRef sectionName;
231  SectionType sectionType;
232  SectionAttr sectionAttrs;
233  bool relocsToDefinedCanBeImplicit;
234
235  // Use same table used by when parsing .o files.
236  relocatableSectionInfoForContentType(type, segmentName, sectionName,
237                                       sectionType, sectionAttrs,
238                                       relocsToDefinedCanBeImplicit);
239  // If we already have a SectionInfo with this name, re-use it.
240  // This can happen if two ContentType map to the same mach-o section.
241  for (auto sect : _sectionMap) {
242    if (sect.second->sectionName.equals(sectionName) &&
243        sect.second->segmentName.equals(segmentName)) {
244      return sect.second;
245    }
246  }
247  // Otherwise allocate new SectionInfo object.
248  auto *sect = new (_allocator)
249      SectionInfo(segmentName, sectionName, sectionType, _ctx, sectionAttrs,
250                  relocsToDefinedCanBeImplicit);
251  _sectionInfos.push_back(sect);
252  _sectionMap[type] = sect;
253  return sect;
254}
255
256#define ENTRY(seg, sect, type, atomType) \
257  {seg, sect, type, DefinedAtom::atomType }
258
259struct MachOFinalSectionFromAtomType {
260  StringRef                 segmentName;
261  StringRef                 sectionName;
262  SectionType               sectionType;
263  DefinedAtom::ContentType  atomType;
264};
265
266const MachOFinalSectionFromAtomType sectsToAtomType[] = {
267  ENTRY("__TEXT", "__text",           S_REGULAR,          typeCode),
268  ENTRY("__TEXT", "__text",           S_REGULAR,          typeMachHeader),
269  ENTRY("__TEXT", "__cstring",        S_CSTRING_LITERALS, typeCString),
270  ENTRY("__TEXT", "__ustring",        S_REGULAR,          typeUTF16String),
271  ENTRY("__TEXT", "__const",          S_REGULAR,          typeConstant),
272  ENTRY("__TEXT", "__const",          S_4BYTE_LITERALS,   typeLiteral4),
273  ENTRY("__TEXT", "__const",          S_8BYTE_LITERALS,   typeLiteral8),
274  ENTRY("__TEXT", "__const",          S_16BYTE_LITERALS,  typeLiteral16),
275  ENTRY("__TEXT", "__stubs",          S_SYMBOL_STUBS,     typeStub),
276  ENTRY("__TEXT", "__stub_helper",    S_REGULAR,          typeStubHelper),
277  ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR,          typeLSDA),
278  ENTRY("__TEXT", "__eh_frame",       S_COALESCED,        typeCFI),
279  ENTRY("__TEXT", "__unwind_info",    S_REGULAR,          typeProcessedUnwindInfo),
280  ENTRY("__DATA", "__data",           S_REGULAR,          typeData),
281  ENTRY("__DATA", "__const",          S_REGULAR,          typeConstData),
282  ENTRY("__DATA", "__cfstring",       S_REGULAR,          typeCFString),
283  ENTRY("__DATA", "__la_symbol_ptr",  S_LAZY_SYMBOL_POINTERS,
284                                                          typeLazyPointer),
285  ENTRY("__DATA", "__mod_init_func",  S_MOD_INIT_FUNC_POINTERS,
286                                                          typeInitializerPtr),
287  ENTRY("__DATA", "__mod_term_func",  S_MOD_TERM_FUNC_POINTERS,
288                                                          typeTerminatorPtr),
289  ENTRY("__DATA", "__got",            S_NON_LAZY_SYMBOL_POINTERS,
290                                                          typeGOT),
291  ENTRY("__DATA", "__nl_symbol_ptr",  S_NON_LAZY_SYMBOL_POINTERS,
292                                                          typeNonLazyPointer),
293  ENTRY("__DATA", "__thread_vars",    S_THREAD_LOCAL_VARIABLES,
294                                                          typeThunkTLV),
295  ENTRY("__DATA", "__thread_data",    S_THREAD_LOCAL_REGULAR,
296                                                          typeTLVInitialData),
297  ENTRY("__DATA", "__thread_ptrs",    S_THREAD_LOCAL_VARIABLE_POINTERS,
298                                                          typeTLVInitializerPtr),
299  ENTRY("__DATA", "__thread_bss",     S_THREAD_LOCAL_ZEROFILL,
300                                                         typeTLVInitialZeroFill),
301  ENTRY("__DATA", "__bss",            S_ZEROFILL,         typeZeroFill),
302  ENTRY("__DATA", "__interposing",    S_INTERPOSING,      typeInterposingTuples),
303};
304#undef ENTRY
305
306SectionInfo *Util::getFinalSection(DefinedAtom::ContentType atomType) {
307  for (auto &p : sectsToAtomType) {
308    if (p.atomType != atomType)
309      continue;
310    SectionAttr sectionAttrs = 0;
311    switch (atomType) {
312    case DefinedAtom::typeMachHeader:
313    case DefinedAtom::typeCode:
314    case DefinedAtom::typeStub:
315    case DefinedAtom::typeStubHelper:
316      sectionAttrs = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS;
317      break;
318    case DefinedAtom::typeThunkTLV:
319      _hasTLVDescriptors = true;
320      break;
321    default:
322      break;
323    }
324    // If we already have a SectionInfo with this name, re-use it.
325    // This can happen if two ContentType map to the same mach-o section.
326    for (auto sect : _sectionMap) {
327      if (sect.second->sectionName.equals(p.sectionName) &&
328          sect.second->segmentName.equals(p.segmentName)) {
329        return sect.second;
330      }
331    }
332    // Otherwise allocate new SectionInfo object.
333    auto *sect = new (_allocator) SectionInfo(
334        p.segmentName, p.sectionName, p.sectionType, _ctx, sectionAttrs,
335        /* relocsToDefinedCanBeImplicit */ false);
336    _sectionInfos.push_back(sect);
337    _sectionMap[atomType] = sect;
338    return sect;
339  }
340  llvm_unreachable("content type not yet supported");
341}
342
343SectionInfo *Util::sectionForAtom(const DefinedAtom *atom) {
344  if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) {
345    // Section for this atom is derived from content type.
346    DefinedAtom::ContentType type = atom->contentType();
347    auto pos = _sectionMap.find(type);
348    if ( pos != _sectionMap.end() )
349      return pos->second;
350    bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
351    return rMode ? getRelocatableSection(type) : getFinalSection(type);
352  } else {
353    // This atom needs to be in a custom section.
354    StringRef customName = atom->customSectionName();
355    // Look to see if we have already allocated the needed custom section.
356    for(SectionInfo *sect : _customSections) {
357      const DefinedAtom *firstAtom = sect->atomsAndOffsets.front().atom;
358      if (firstAtom->customSectionName().equals(customName)) {
359        return sect;
360      }
361    }
362    // Not found, so need to create a new custom section.
363    size_t seperatorIndex = customName.find('/');
364    assert(seperatorIndex != StringRef::npos);
365    StringRef segName = customName.slice(0, seperatorIndex);
366    StringRef sectName = customName.drop_front(seperatorIndex + 1);
367    auto *sect =
368        new (_allocator) SectionInfo(segName, sectName, S_REGULAR, _ctx,
369                                     0, /* relocsToDefinedCanBeImplicit */ false);
370    _customSections.push_back(sect);
371    _sectionInfos.push_back(sect);
372    return sect;
373  }
374}
375
376void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) {
377  // Figure out offset for atom in this section given alignment constraints.
378  uint64_t offset = sect->size;
379  DefinedAtom::Alignment atomAlign = atom->alignment();
380  uint64_t align = atomAlign.value;
381  uint64_t requiredModulus = atomAlign.modulus;
382  uint64_t currentModulus = (offset % align);
383  if ( currentModulus != requiredModulus ) {
384    if ( requiredModulus > currentModulus )
385      offset += requiredModulus-currentModulus;
386    else
387      offset += align+requiredModulus-currentModulus;
388  }
389  // Record max alignment of any atom in this section.
390  if (align > sect->alignment)
391    sect->alignment = atomAlign.value;
392  // Assign atom to this section with this offset.
393  AtomInfo ai = {atom, offset};
394  sect->atomsAndOffsets.push_back(ai);
395  // Update section size to include this atom.
396  sect->size = offset + atom->size();
397}
398
399void Util::processDefinedAtoms(const lld::File &atomFile) {
400  for (const DefinedAtom *atom : atomFile.defined()) {
401    processAtomAttributes(atom);
402    assignAtomToSection(atom);
403  }
404}
405
406void Util::processAtomAttributes(const DefinedAtom *atom) {
407  if (auto *machoFile = dyn_cast<mach_o::MachOFile>(&atom->file())) {
408    // If the file doesn't use subsections via symbols, then make sure we don't
409    // add that flag to the final output file if we have a relocatable file.
410    if (!machoFile->subsectionsViaSymbols())
411      _subsectionsViaSymbols = false;
412
413    // All the source files must have min versions for us to output an object
414    // file with a min version.
415    if (auto v = machoFile->minVersion())
416      _minVersion = std::max(_minVersion, v);
417    else
418      _allSourceFilesHaveMinVersions = false;
419
420    // If we don't have a platform load command, but one of the source files
421    // does, then take the one from the file.
422    if (!_minVersionCommandType)
423      if (auto v = machoFile->minVersionLoadCommandKind())
424        _minVersionCommandType = v;
425  }
426}
427
428void Util::assignAtomToSection(const DefinedAtom *atom) {
429  if (atom->contentType() == DefinedAtom::typeMachHeader) {
430    _machHeaderAliasAtoms.push_back(atom);
431    // Assign atom to this section with this offset.
432    AtomInfo ai = {atom, 0};
433    sectionForAtom(atom)->atomsAndOffsets.push_back(ai);
434  } else if (atom->contentType() == DefinedAtom::typeDSOHandle)
435    _machHeaderAliasAtoms.push_back(atom);
436  else
437    appendAtom(sectionForAtom(atom), atom);
438}
439
440SegmentInfo *Util::segmentForName(StringRef segName) {
441  for (SegmentInfo *si : _segmentInfos) {
442    if ( si->name.equals(segName) )
443      return si;
444  }
445  auto *info = new (_allocator) SegmentInfo(segName);
446
447  // Set the initial segment protection.
448  if (segName.equals("__TEXT"))
449    info->init_access = VM_PROT_READ | VM_PROT_EXECUTE;
450  else if (segName.equals("__PAGEZERO"))
451    info->init_access = 0;
452  else if (segName.equals("__LINKEDIT"))
453    info->init_access = VM_PROT_READ;
454  else {
455    // All others default to read-write
456    info->init_access = VM_PROT_READ | VM_PROT_WRITE;
457  }
458
459  // Set max segment protection
460  // Note, its overkill to use a switch statement here, but makes it so much
461  // easier to use switch coverage to catch new cases.
462  switch (_ctx.os()) {
463    case lld::MachOLinkingContext::OS::unknown:
464    case lld::MachOLinkingContext::OS::macOSX:
465    case lld::MachOLinkingContext::OS::iOS_simulator:
466      if (segName.equals("__PAGEZERO")) {
467        info->max_access = 0;
468        break;
469      }
470      // All others default to all
471      info->max_access = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
472      break;
473    case lld::MachOLinkingContext::OS::iOS:
474      // iPhoneOS always uses same protection for max and initial
475      info->max_access = info->init_access;
476      break;
477  }
478  _segmentInfos.push_back(info);
479  return info;
480}
481
482unsigned Util::SegmentSorter::weight(const SegmentInfo *seg) {
483 return llvm::StringSwitch<unsigned>(seg->name)
484    .Case("__PAGEZERO",  1)
485    .Case("__TEXT",      2)
486    .Case("__DATA",      3)
487    .Default(100);
488}
489
490bool Util::SegmentSorter::operator()(const SegmentInfo *left,
491                                  const SegmentInfo *right) {
492  return (weight(left) < weight(right));
493}
494
495unsigned Util::TextSectionSorter::weight(const SectionInfo *sect) {
496 return llvm::StringSwitch<unsigned>(sect->sectionName)
497    .Case("__text",         1)
498    .Case("__stubs",        2)
499    .Case("__stub_helper",  3)
500    .Case("__const",        4)
501    .Case("__cstring",      5)
502    .Case("__unwind_info",  98)
503    .Case("__eh_frame",     99)
504    .Default(10);
505}
506
507bool Util::TextSectionSorter::operator()(const SectionInfo *left,
508                                         const SectionInfo *right) {
509  return (weight(left) < weight(right));
510}
511
512void Util::organizeSections() {
513  // NOTE!: Keep this in sync with assignAddressesToSections.
514  switch (_ctx.outputMachOType()) {
515    case llvm::MachO::MH_EXECUTE:
516      // Main executables, need a zero-page segment
517      segmentForName("__PAGEZERO");
518      // Fall into next case.
519      LLVM_FALLTHROUGH;
520    case llvm::MachO::MH_DYLIB:
521    case llvm::MachO::MH_BUNDLE:
522      // All dynamic code needs TEXT segment to hold the load commands.
523      segmentForName("__TEXT");
524      break;
525    default:
526      break;
527  }
528  segmentForName("__LINKEDIT");
529
530  // Group sections into segments.
531  for (SectionInfo *si : _sectionInfos) {
532    SegmentInfo *seg = segmentForName(si->segmentName);
533    seg->sections.push_back(si);
534  }
535  // Sort segments.
536  std::sort(_segmentInfos.begin(), _segmentInfos.end(), SegmentSorter());
537
538  // Sort sections within segments.
539  for (SegmentInfo *seg : _segmentInfos) {
540    if (seg->name.equals("__TEXT")) {
541      std::sort(seg->sections.begin(), seg->sections.end(),
542                TextSectionSorter());
543    }
544  }
545
546  // Record final section indexes.
547  uint32_t segmentIndex = 0;
548  uint32_t sectionIndex = 1;
549  for (SegmentInfo *seg : _segmentInfos) {
550    seg->normalizedSegmentIndex = segmentIndex++;
551    for (SectionInfo *sect : seg->sections)
552      sect->finalSectionIndex = sectionIndex++;
553  }
554}
555
556void Util::layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr) {
557  seg->address = addr;
558  for (SectionInfo *sect : seg->sections) {
559    sect->address = llvm::alignTo(addr, sect->alignment);
560    addr = sect->address + sect->size;
561  }
562  seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize());
563}
564
565// __TEXT segment lays out backwards so padding is at front after load commands.
566void Util::layoutSectionsInTextSegment(size_t hlcSize, SegmentInfo *seg,
567                                                               uint64_t &addr) {
568  seg->address = addr;
569  // Walks sections starting at end to calculate padding for start.
570  int64_t taddr = 0;
571  for (auto it = seg->sections.rbegin(); it != seg->sections.rend(); ++it) {
572    SectionInfo *sect = *it;
573    taddr -= sect->size;
574    taddr = taddr & (0 - sect->alignment);
575  }
576  int64_t padding = taddr - hlcSize;
577  while (padding < 0)
578    padding += _ctx.pageSize();
579  // Start assigning section address starting at padded offset.
580  addr += (padding + hlcSize);
581  for (SectionInfo *sect : seg->sections) {
582    sect->address = llvm::alignTo(addr, sect->alignment);
583    addr = sect->address + sect->size;
584  }
585  seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize());
586}
587
588void Util::assignAddressesToSections(const NormalizedFile &file) {
589  // NOTE!: Keep this in sync with organizeSections.
590  size_t hlcSize = headerAndLoadCommandsSize(file,
591                                      _ctx.generateFunctionStartsLoadCommand());
592  uint64_t address = 0;
593  for (SegmentInfo *seg : _segmentInfos) {
594    if (seg->name.equals("__PAGEZERO")) {
595      seg->size = _ctx.pageZeroSize();
596      address += seg->size;
597    }
598    else if (seg->name.equals("__TEXT")) {
599      // _ctx.baseAddress()  == 0 implies it was either unspecified or
600      // pageZeroSize is also 0. In either case resetting address is safe.
601      address = _ctx.baseAddress() ? _ctx.baseAddress() : address;
602      layoutSectionsInTextSegment(hlcSize, seg, address);
603    } else
604      layoutSectionsInSegment(seg, address);
605
606    address = llvm::alignTo(address, _ctx.pageSize());
607  }
608  DEBUG_WITH_TYPE("WriterMachO-norm",
609    llvm::dbgs() << "assignAddressesToSections()\n";
610    for (SegmentInfo *sgi : _segmentInfos) {
611      llvm::dbgs()  << "   address=" << llvm::format("0x%08llX", sgi->address)
612                    << ", size="  << llvm::format("0x%08llX", sgi->size)
613                    << ", segment-name='" << sgi->name
614                    << "'\n";
615      for (SectionInfo *si : sgi->sections) {
616        llvm::dbgs()<< "      addr="  << llvm::format("0x%08llX", si->address)
617                    << ", size="  << llvm::format("0x%08llX", si->size)
618                    << ", section-name='" << si->sectionName
619                    << "\n";
620      }
621    }
622  );
623}
624
625void Util::copySegmentInfo(NormalizedFile &file) {
626  for (SegmentInfo *sgi : _segmentInfos) {
627    Segment seg;
628    seg.name    = sgi->name;
629    seg.address = sgi->address;
630    seg.size    = sgi->size;
631    seg.init_access  = sgi->init_access;
632    seg.max_access  = sgi->max_access;
633    file.segments.push_back(seg);
634  }
635}
636
637void Util::appendSection(SectionInfo *si, NormalizedFile &file) {
638   // Add new empty section to end of file.sections.
639  Section temp;
640  file.sections.push_back(std::move(temp));
641  Section* normSect = &file.sections.back();
642  // Copy fields to normalized section.
643  normSect->segmentName   = si->segmentName;
644  normSect->sectionName   = si->sectionName;
645  normSect->type          = si->type;
646  normSect->attributes    = si->attributes;
647  normSect->address       = si->address;
648  normSect->alignment     = si->alignment;
649  // Record where normalized section is.
650  si->normalizedSectionIndex = file.sections.size()-1;
651}
652
653void Util::copySectionContent(NormalizedFile &file) {
654  const bool r = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
655
656  // Utility function for ArchHandler to find address of atom in output file.
657  auto addrForAtom = [&] (const Atom &atom) -> uint64_t {
658    auto pos = _atomToAddress.find(&atom);
659    assert(pos != _atomToAddress.end());
660    return pos->second;
661  };
662
663  auto sectionAddrForAtom = [&] (const Atom &atom) -> uint64_t {
664    for (const SectionInfo *sectInfo : _sectionInfos)
665      for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets)
666        if (atomInfo.atom == &atom)
667          return sectInfo->address;
668    llvm_unreachable("atom not assigned to section");
669  };
670
671  for (SectionInfo *si : _sectionInfos) {
672    Section *normSect = &file.sections[si->normalizedSectionIndex];
673    if (isZeroFillSection(si->type)) {
674      const uint8_t *empty = nullptr;
675      normSect->content = llvm::makeArrayRef(empty, si->size);
676      continue;
677    }
678    // Copy content from atoms to content buffer for section.
679    llvm::MutableArrayRef<uint8_t> sectionContent;
680    if (si->size) {
681      uint8_t *sectContent = file.ownedAllocations.Allocate<uint8_t>(si->size);
682      sectionContent = llvm::MutableArrayRef<uint8_t>(sectContent, si->size);
683      normSect->content = sectionContent;
684    }
685    for (AtomInfo &ai : si->atomsAndOffsets) {
686      if (!ai.atom->size()) {
687        assert(ai.atom->begin() == ai.atom->end() &&
688               "Cannot have references without content");
689        continue;
690      }
691      auto atomContent = sectionContent.slice(ai.offsetInSection,
692                                              ai.atom->size());
693      _archHandler.generateAtomContent(*ai.atom, r, addrForAtom,
694                                       sectionAddrForAtom, _ctx.baseAddress(),
695                                       atomContent);
696    }
697  }
698}
699
700void Util::copySectionInfo(NormalizedFile &file) {
701  file.sections.reserve(_sectionInfos.size());
702  // Write sections grouped by segment.
703  for (SegmentInfo *sgi : _segmentInfos) {
704    for (SectionInfo *si : sgi->sections) {
705      appendSection(si, file);
706    }
707  }
708}
709
710void Util::updateSectionInfo(NormalizedFile &file) {
711  file.sections.reserve(_sectionInfos.size());
712  // sections grouped by segment.
713  for (SegmentInfo *sgi : _segmentInfos) {
714    Segment *normSeg = &file.segments[sgi->normalizedSegmentIndex];
715    normSeg->address = sgi->address;
716    normSeg->size = sgi->size;
717    for (SectionInfo *si : sgi->sections) {
718      Section *normSect = &file.sections[si->normalizedSectionIndex];
719      normSect->address = si->address;
720    }
721  }
722}
723
724void Util::copyEntryPointAddress(NormalizedFile &nFile) {
725  if (!_entryAtom) {
726    nFile.entryAddress = 0;
727    return;
728  }
729
730  if (_ctx.outputTypeHasEntry()) {
731    if (_archHandler.isThumbFunction(*_entryAtom))
732      nFile.entryAddress = (_atomToAddress[_entryAtom] | 1);
733    else
734      nFile.entryAddress = _atomToAddress[_entryAtom];
735  }
736}
737
738void Util::buildAtomToAddressMap() {
739  DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
740                   << "assign atom addresses:\n");
741  const bool lookForEntry = _ctx.outputTypeHasEntry();
742  for (SectionInfo *sect : _sectionInfos) {
743    for (const AtomInfo &info : sect->atomsAndOffsets) {
744      _atomToAddress[info.atom] = sect->address + info.offsetInSection;
745      if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) &&
746          (info.atom->size() != 0) &&
747          info.atom->name() == _ctx.entrySymbolName()) {
748        _entryAtom = info.atom;
749      }
750      DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
751                      << "   address="
752                      << llvm::format("0x%016X", _atomToAddress[info.atom])
753                      << llvm::format("    0x%09lX", info.atom)
754                      << ", file=#"
755                      << info.atom->file().ordinal()
756                      << ", atom=#"
757                      << info.atom->ordinal()
758                      << ", name="
759                      << info.atom->name()
760                      << ", type="
761                      << info.atom->contentType()
762                      << "\n");
763    }
764  }
765  DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
766                  << "assign header alias atom addresses:\n");
767  for (const Atom *atom : _machHeaderAliasAtoms) {
768    _atomToAddress[atom] = _ctx.baseAddress();
769#ifndef NDEBUG
770    if (auto *definedAtom = dyn_cast<DefinedAtom>(atom)) {
771      DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
772                      << "   address="
773                      << llvm::format("0x%016X", _atomToAddress[atom])
774                      << llvm::format("    0x%09lX", atom)
775                      << ", file=#"
776                      << definedAtom->file().ordinal()
777                      << ", atom=#"
778                      << definedAtom->ordinal()
779                      << ", name="
780                      << definedAtom->name()
781                      << ", type="
782                      << definedAtom->contentType()
783                      << "\n");
784    } else {
785      DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
786                      << "   address="
787                      << llvm::format("0x%016X", _atomToAddress[atom])
788                      << " atom=" << atom
789                      << " name=" << atom->name() << "\n");
790    }
791#endif
792  }
793}
794
795llvm::Error Util::synthesizeDebugNotes(NormalizedFile &file) {
796
797  // Bail out early if we don't need to generate a debug map.
798  if (_ctx.debugInfoMode() == MachOLinkingContext::DebugInfoMode::noDebugMap)
799    return llvm::Error::success();
800
801  std::vector<const DefinedAtom*> atomsNeedingDebugNotes;
802  std::set<const mach_o::MachOFile*> filesWithStabs;
803  bool objFileHasDwarf = false;
804  const File *objFile = nullptr;
805
806  for (SectionInfo *sect : _sectionInfos) {
807    for (const AtomInfo &info : sect->atomsAndOffsets) {
808      if (const DefinedAtom *atom = dyn_cast<DefinedAtom>(info.atom)) {
809
810        // FIXME: No stabs/debug-notes for symbols that wouldn't be in the
811        //        symbol table.
812        // FIXME: No stabs/debug-notes for kernel dtrace probes.
813
814        if (atom->contentType() == DefinedAtom::typeCFI ||
815            atom->contentType() == DefinedAtom::typeCString)
816          continue;
817
818        // Whenever we encounter a new file, update the 'objfileHasDwarf' flag.
819        if (&info.atom->file() != objFile) {
820          objFileHasDwarf = false;
821          if (const mach_o::MachOFile *atomFile =
822              dyn_cast<mach_o::MachOFile>(&info.atom->file())) {
823            if (atomFile->debugInfo()) {
824              if (isa<mach_o::DwarfDebugInfo>(atomFile->debugInfo()))
825                objFileHasDwarf = true;
826              else if (isa<mach_o::StabsDebugInfo>(atomFile->debugInfo()))
827                filesWithStabs.insert(atomFile);
828            }
829          }
830        }
831
832        // If this atom is from a file that needs dwarf, add it to the list.
833        if (objFileHasDwarf)
834          atomsNeedingDebugNotes.push_back(info.atom);
835      }
836    }
837  }
838
839  // Sort atoms needing debug notes by file ordinal, then atom ordinal.
840  std::sort(atomsNeedingDebugNotes.begin(), atomsNeedingDebugNotes.end(),
841            [](const DefinedAtom *lhs, const DefinedAtom *rhs) {
842              if (lhs->file().ordinal() != rhs->file().ordinal())
843                return (lhs->file().ordinal() < rhs->file().ordinal());
844              return (lhs->ordinal() < rhs->ordinal());
845            });
846
847  // FIXME: Handle <rdar://problem/17689030>: Add -add_ast_path option to \
848  //        linker which add N_AST stab entry to output
849  // See OutputFile::synthesizeDebugNotes in ObjectFile.cpp in ld64.
850
851  StringRef oldFileName = "";
852  StringRef oldDirPath = "";
853  bool wroteStartSO = false;
854  std::unordered_set<std::string> seenFiles;
855  for (const DefinedAtom *atom : atomsNeedingDebugNotes) {
856    const auto &atomFile = cast<mach_o::MachOFile>(atom->file());
857    assert(dyn_cast_or_null<lld::mach_o::DwarfDebugInfo>(atomFile.debugInfo())
858           && "file for atom needing debug notes does not contain dwarf");
859    auto &dwarf = cast<lld::mach_o::DwarfDebugInfo>(*atomFile.debugInfo());
860
861    auto &tu = dwarf.translationUnitSource();
862    StringRef newFileName = tu.name;
863    StringRef newDirPath = tu.path;
864
865    // Add an SO whenever the TU source file changes.
866    if (newFileName != oldFileName || newDirPath != oldDirPath) {
867      // Translation unit change, emit ending SO
868      if (oldFileName != "")
869        _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, ""));
870
871      oldFileName = newFileName;
872      oldDirPath = newDirPath;
873
874      // If newDirPath doesn't end with a '/' we need to add one:
875      if (newDirPath.back() != '/') {
876        char *p =
877          file.ownedAllocations.Allocate<char>(newDirPath.size() + 2);
878        memcpy(p, newDirPath.data(), newDirPath.size());
879        p[newDirPath.size()] = '/';
880        p[newDirPath.size() + 1] = '\0';
881        newDirPath = p;
882      }
883
884      // New translation unit, emit start SOs:
885      _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newDirPath));
886      _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newFileName));
887
888      // Synthesize OSO for start of file.
889      char *fullPath = nullptr;
890      {
891        SmallString<1024> pathBuf(atomFile.path());
892        if (auto EC = llvm::sys::fs::make_absolute(pathBuf))
893          return llvm::errorCodeToError(EC);
894        fullPath = file.ownedAllocations.Allocate<char>(pathBuf.size() + 1);
895        memcpy(fullPath, pathBuf.c_str(), pathBuf.size() + 1);
896      }
897
898      // Get mod time.
899      uint32_t modTime = 0;
900      llvm::sys::fs::file_status stat;
901      if (!llvm::sys::fs::status(fullPath, stat))
902        if (llvm::sys::fs::exists(stat))
903          modTime = llvm::sys::toTimeT(stat.getLastModificationTime());
904
905      _stabs.push_back(mach_o::Stab(nullptr, N_OSO, _ctx.getCPUSubType(), 1,
906                                    modTime, fullPath));
907      // <rdar://problem/6337329> linker should put cpusubtype in n_sect field
908      // of nlist entry for N_OSO debug note entries.
909      wroteStartSO = true;
910    }
911
912    if (atom->contentType() == DefinedAtom::typeCode) {
913      // Synthesize BNSYM and start FUN stabs.
914      _stabs.push_back(mach_o::Stab(atom, N_BNSYM, 1, 0, 0, ""));
915      _stabs.push_back(mach_o::Stab(atom, N_FUN, 1, 0, 0, atom->name()));
916      // Synthesize any SOL stabs needed
917      // FIXME: add SOL stabs.
918      _stabs.push_back(mach_o::Stab(nullptr, N_FUN, 0, 0,
919                                    atom->rawContent().size(), ""));
920      _stabs.push_back(mach_o::Stab(nullptr, N_ENSYM, 1, 0,
921                                    atom->rawContent().size(), ""));
922    } else {
923      if (atom->scope() == Atom::scopeTranslationUnit)
924        _stabs.push_back(mach_o::Stab(atom, N_STSYM, 1, 0, 0, atom->name()));
925      else
926        _stabs.push_back(mach_o::Stab(nullptr, N_GSYM, 1, 0, 0, atom->name()));
927    }
928  }
929
930  // Emit ending SO if necessary.
931  if (wroteStartSO)
932    _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, ""));
933
934  // Copy any stabs from .o file.
935  for (const auto *objFile : filesWithStabs) {
936    const auto &stabsList =
937      cast<mach_o::StabsDebugInfo>(objFile->debugInfo())->stabs();
938    for (auto &stab : stabsList) {
939      // FIXME: Drop stabs whose atoms have been dead-stripped.
940      _stabs.push_back(stab);
941    }
942  }
943
944  return llvm::Error::success();
945}
946
947uint16_t Util::descBits(const DefinedAtom* atom) {
948  uint16_t desc = 0;
949  switch (atom->merge()) {
950  case lld::DefinedAtom::mergeNo:
951  case lld::DefinedAtom::mergeAsTentative:
952    break;
953  case lld::DefinedAtom::mergeAsWeak:
954  case lld::DefinedAtom::mergeAsWeakAndAddressUsed:
955    desc |= N_WEAK_DEF;
956    break;
957  case lld::DefinedAtom::mergeSameNameAndSize:
958  case lld::DefinedAtom::mergeByLargestSection:
959  case lld::DefinedAtom::mergeByContent:
960    llvm_unreachable("Unsupported DefinedAtom::merge()");
961    break;
962  }
963  if (atom->contentType() == lld::DefinedAtom::typeResolver)
964    desc |= N_SYMBOL_RESOLVER;
965  if (atom->contentType() == lld::DefinedAtom::typeMachHeader)
966    desc |= REFERENCED_DYNAMICALLY;
967  if (_archHandler.isThumbFunction(*atom))
968    desc |= N_ARM_THUMB_DEF;
969  if (atom->deadStrip() == DefinedAtom::deadStripNever &&
970      _ctx.outputMachOType() == llvm::MachO::MH_OBJECT) {
971    if ((atom->contentType() != DefinedAtom::typeInitializerPtr)
972     && (atom->contentType() != DefinedAtom::typeTerminatorPtr))
973    desc |= N_NO_DEAD_STRIP;
974  }
975  return desc;
976}
977
978bool Util::AtomSorter::operator()(const AtomAndIndex &left,
979                                  const AtomAndIndex &right) {
980  return (left.atom->name().compare(right.atom->name()) < 0);
981}
982
983llvm::Error Util::getSymbolTableRegion(const DefinedAtom* atom,
984                                       bool &inGlobalsRegion,
985                                       SymbolScope &scope) {
986  bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
987  switch (atom->scope()) {
988  case Atom::scopeTranslationUnit:
989    scope = 0;
990    inGlobalsRegion = false;
991    return llvm::Error::success();
992  case Atom::scopeLinkageUnit:
993    if ((_ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) &&
994        _ctx.exportSymbolNamed(atom->name())) {
995      return llvm::make_error<GenericError>(
996                          Twine("cannot export hidden symbol ") + atom->name());
997    }
998    if (rMode) {
999      if (_ctx.keepPrivateExterns()) {
1000        // -keep_private_externs means keep in globals region as N_PEXT.
1001        scope = N_PEXT | N_EXT;
1002        inGlobalsRegion = true;
1003        return llvm::Error::success();
1004      }
1005    }
1006    // scopeLinkageUnit symbols are no longer global once linked.
1007    scope = N_PEXT;
1008    inGlobalsRegion = false;
1009    return llvm::Error::success();
1010  case Atom::scopeGlobal:
1011    if (_ctx.exportRestrictMode()) {
1012      if (_ctx.exportSymbolNamed(atom->name())) {
1013        scope = N_EXT;
1014        inGlobalsRegion = true;
1015        return llvm::Error::success();
1016      } else {
1017        scope = N_PEXT;
1018        inGlobalsRegion = false;
1019        return llvm::Error::success();
1020      }
1021    } else {
1022      scope = N_EXT;
1023      inGlobalsRegion = true;
1024      return llvm::Error::success();
1025    }
1026    break;
1027  }
1028  llvm_unreachable("atom->scope() unknown enum value");
1029}
1030
1031
1032
1033llvm::Error Util::addSymbols(const lld::File &atomFile,
1034                             NormalizedFile &file) {
1035  bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
1036  // Mach-O symbol table has four regions: stabs, locals, globals, undefs.
1037
1038  // Add all stabs.
1039  for (auto &stab : _stabs) {
1040    Symbol sym;
1041    sym.type = static_cast<NListType>(stab.type);
1042    sym.scope = 0;
1043    sym.sect = stab.other;
1044    sym.desc = stab.desc;
1045    if (stab.atom)
1046      sym.value = _atomToAddress[stab.atom];
1047    else
1048      sym.value = stab.value;
1049    sym.name = stab.str;
1050    file.stabsSymbols.push_back(sym);
1051  }
1052
1053  // Add all local (non-global) symbols in address order
1054  std::vector<AtomAndIndex> globals;
1055  globals.reserve(512);
1056  for (SectionInfo *sect : _sectionInfos) {
1057    for (const AtomInfo &info : sect->atomsAndOffsets) {
1058      const DefinedAtom *atom = info.atom;
1059      if (!atom->name().empty()) {
1060        SymbolScope symbolScope;
1061        bool inGlobalsRegion;
1062        if (auto ec = getSymbolTableRegion(atom, inGlobalsRegion, symbolScope)){
1063          return ec;
1064        }
1065        if (inGlobalsRegion) {
1066          AtomAndIndex ai = { atom, sect->finalSectionIndex, symbolScope };
1067          globals.push_back(ai);
1068        } else {
1069          Symbol sym;
1070          sym.name  = atom->name();
1071          sym.type  = N_SECT;
1072          sym.scope = symbolScope;
1073          sym.sect  = sect->finalSectionIndex;
1074          sym.desc  = descBits(atom);
1075          sym.value = _atomToAddress[atom];
1076          _atomToSymbolIndex[atom] = file.localSymbols.size();
1077          file.localSymbols.push_back(sym);
1078        }
1079      } else if (rMode && _archHandler.needsLocalSymbolInRelocatableFile(atom)){
1080        // Create 'Lxxx' labels for anonymous atoms if archHandler says so.
1081        static unsigned tempNum = 1;
1082        char tmpName[16];
1083        sprintf(tmpName, "L%04u", tempNum++);
1084        StringRef tempRef(tmpName);
1085        Symbol sym;
1086        sym.name  = tempRef.copy(file.ownedAllocations);
1087        sym.type  = N_SECT;
1088        sym.scope = 0;
1089        sym.sect  = sect->finalSectionIndex;
1090        sym.desc  = 0;
1091        sym.value = _atomToAddress[atom];
1092        _atomToSymbolIndex[atom] = file.localSymbols.size();
1093        file.localSymbols.push_back(sym);
1094      }
1095    }
1096  }
1097
1098  // Sort global symbol alphabetically, then add to symbol table.
1099  std::sort(globals.begin(), globals.end(), AtomSorter());
1100  const uint32_t globalStartIndex = file.localSymbols.size();
1101  for (AtomAndIndex &ai : globals) {
1102    Symbol sym;
1103    sym.name  = ai.atom->name();
1104    sym.type  = N_SECT;
1105    sym.scope = ai.scope;
1106    sym.sect  = ai.index;
1107    sym.desc  = descBits(static_cast<const DefinedAtom*>(ai.atom));
1108    sym.value = _atomToAddress[ai.atom];
1109    _atomToSymbolIndex[ai.atom] = globalStartIndex + file.globalSymbols.size();
1110    file.globalSymbols.push_back(sym);
1111  }
1112
1113  // Sort undefined symbol alphabetically, then add to symbol table.
1114  std::vector<AtomAndIndex> undefs;
1115  undefs.reserve(128);
1116  for (const UndefinedAtom *atom : atomFile.undefined()) {
1117    AtomAndIndex ai = { atom, 0, N_EXT };
1118    undefs.push_back(ai);
1119  }
1120  for (const SharedLibraryAtom *atom : atomFile.sharedLibrary()) {
1121    AtomAndIndex ai = { atom, 0, N_EXT };
1122    undefs.push_back(ai);
1123  }
1124  std::sort(undefs.begin(), undefs.end(), AtomSorter());
1125  const uint32_t start = file.globalSymbols.size() + file.localSymbols.size();
1126  for (AtomAndIndex &ai : undefs) {
1127    Symbol sym;
1128    uint16_t desc = 0;
1129    if (!rMode) {
1130      uint8_t ordinal = 0;
1131      if (!_ctx.useFlatNamespace())
1132        ordinal = dylibOrdinal(dyn_cast<SharedLibraryAtom>(ai.atom));
1133      llvm::MachO::SET_LIBRARY_ORDINAL(desc, ordinal);
1134    }
1135    sym.name  = ai.atom->name();
1136    sym.type  = N_UNDF;
1137    sym.scope = ai.scope;
1138    sym.sect  = 0;
1139    sym.desc  = desc;
1140    sym.value = 0;
1141    _atomToSymbolIndex[ai.atom] = file.undefinedSymbols.size() + start;
1142    file.undefinedSymbols.push_back(sym);
1143  }
1144
1145  return llvm::Error::success();
1146}
1147
1148const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) {
1149  for (const Reference *ref : *lpAtom) {
1150    if (_archHandler.isLazyPointer(*ref)) {
1151      return ref->target();
1152    }
1153  }
1154  return nullptr;
1155}
1156
1157const Atom *Util::targetOfStub(const DefinedAtom *stubAtom) {
1158  for (const Reference *ref : *stubAtom) {
1159    if (const Atom *ta = ref->target()) {
1160      if (const DefinedAtom *lpAtom = dyn_cast<DefinedAtom>(ta)) {
1161        const Atom *target = targetOfLazyPointer(lpAtom);
1162        if (target)
1163          return target;
1164      }
1165    }
1166  }
1167  return nullptr;
1168}
1169
1170void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) {
1171  for (SectionInfo *si : _sectionInfos) {
1172    Section &normSect = file.sections[si->normalizedSectionIndex];
1173    switch (si->type) {
1174    case llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS:
1175      for (const AtomInfo &info : si->atomsAndOffsets) {
1176        bool foundTarget = false;
1177        for (const Reference *ref : *info.atom) {
1178          const Atom *target = ref->target();
1179          if (target) {
1180            if (isa<const SharedLibraryAtom>(target)) {
1181              uint32_t index = _atomToSymbolIndex[target];
1182              normSect.indirectSymbols.push_back(index);
1183              foundTarget = true;
1184            } else {
1185              normSect.indirectSymbols.push_back(
1186                                            llvm::MachO::INDIRECT_SYMBOL_LOCAL);
1187            }
1188          }
1189        }
1190        if (!foundTarget) {
1191          normSect.indirectSymbols.push_back(
1192                                             llvm::MachO::INDIRECT_SYMBOL_ABS);
1193        }
1194      }
1195      break;
1196    case llvm::MachO::S_LAZY_SYMBOL_POINTERS:
1197      for (const AtomInfo &info : si->atomsAndOffsets) {
1198        const Atom *target = targetOfLazyPointer(info.atom);
1199        if (target) {
1200          uint32_t index = _atomToSymbolIndex[target];
1201          normSect.indirectSymbols.push_back(index);
1202        }
1203      }
1204      break;
1205    case llvm::MachO::S_SYMBOL_STUBS:
1206      for (const AtomInfo &info : si->atomsAndOffsets) {
1207        const Atom *target = targetOfStub(info.atom);
1208        if (target) {
1209          uint32_t index = _atomToSymbolIndex[target];
1210          normSect.indirectSymbols.push_back(index);
1211        }
1212      }
1213      break;
1214    default:
1215      break;
1216    }
1217  }
1218}
1219
1220void Util::addDependentDylibs(const lld::File &atomFile,
1221                              NormalizedFile &nFile) {
1222  // Scan all imported symbols and build up list of dylibs they are from.
1223  int ordinal = 1;
1224  for (const auto *dylib : _ctx.allDylibs()) {
1225    DylibPathToInfo::iterator pos = _dylibInfo.find(dylib->installName());
1226    if (pos == _dylibInfo.end()) {
1227      DylibInfo info;
1228      bool flatNamespaceAtom = dylib == _ctx.flatNamespaceFile();
1229
1230      // If we're in -flat_namespace mode (or this atom came from the flat
1231      // namespace file under -undefined dynamic_lookup) then use the flat
1232      // lookup ordinal.
1233      if (flatNamespaceAtom || _ctx.useFlatNamespace())
1234        info.ordinal = BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1235      else
1236        info.ordinal = ordinal++;
1237      info.hasWeak = false;
1238      info.hasNonWeak = !info.hasWeak;
1239      _dylibInfo[dylib->installName()] = info;
1240
1241      // Unless this was a flat_namespace atom, record the source dylib.
1242      if (!flatNamespaceAtom) {
1243        DependentDylib depInfo;
1244        depInfo.path = dylib->installName();
1245        depInfo.kind = llvm::MachO::LC_LOAD_DYLIB;
1246        depInfo.currentVersion = _ctx.dylibCurrentVersion(dylib->path());
1247        depInfo.compatVersion = _ctx.dylibCompatVersion(dylib->path());
1248        nFile.dependentDylibs.push_back(depInfo);
1249      }
1250    } else {
1251      pos->second.hasWeak = false;
1252      pos->second.hasNonWeak = !pos->second.hasWeak;
1253    }
1254  }
1255  // Automatically weak link dylib in which all symbols are weak (canBeNull).
1256  for (DependentDylib &dep : nFile.dependentDylibs) {
1257    DylibInfo &info = _dylibInfo[dep.path];
1258    if (info.hasWeak && !info.hasNonWeak)
1259      dep.kind = llvm::MachO::LC_LOAD_WEAK_DYLIB;
1260    else if (_ctx.isUpwardDylib(dep.path))
1261      dep.kind = llvm::MachO::LC_LOAD_UPWARD_DYLIB;
1262  }
1263}
1264
1265int Util::dylibOrdinal(const SharedLibraryAtom *sa) {
1266  return _dylibInfo[sa->loadName()].ordinal;
1267}
1268
1269void Util::segIndexForSection(const SectionInfo *sect, uint8_t &segmentIndex,
1270                                                  uint64_t &segmentStartAddr) {
1271  segmentIndex = 0;
1272  for (const SegmentInfo *seg : _segmentInfos) {
1273    if ((seg->address <= sect->address)
1274      && (seg->address+seg->size >= sect->address+sect->size)) {
1275      segmentStartAddr = seg->address;
1276      return;
1277    }
1278    ++segmentIndex;
1279  }
1280  llvm_unreachable("section not in any segment");
1281}
1282
1283uint32_t Util::sectionIndexForAtom(const Atom *atom) {
1284  uint64_t address = _atomToAddress[atom];
1285  for (const SectionInfo *si : _sectionInfos) {
1286    if ((si->address <= address) && (address < si->address+si->size))
1287      return si->finalSectionIndex;
1288  }
1289  llvm_unreachable("atom not in any section");
1290}
1291
1292void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) {
1293  if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT)
1294    return;
1295
1296  // Utility function for ArchHandler to find symbol index for an atom.
1297  auto symIndexForAtom = [&] (const Atom &atom) -> uint32_t {
1298    auto pos = _atomToSymbolIndex.find(&atom);
1299    assert(pos != _atomToSymbolIndex.end());
1300    return pos->second;
1301  };
1302
1303  // Utility function for ArchHandler to find section index for an atom.
1304  auto sectIndexForAtom = [&] (const Atom &atom) -> uint32_t {
1305    return sectionIndexForAtom(&atom);
1306  };
1307
1308  // Utility function for ArchHandler to find address of atom in output file.
1309  auto addressForAtom = [&] (const Atom &atom) -> uint64_t {
1310    auto pos = _atomToAddress.find(&atom);
1311    assert(pos != _atomToAddress.end());
1312    return pos->second;
1313  };
1314
1315  for (SectionInfo *si : _sectionInfos) {
1316    Section &normSect = file.sections[si->normalizedSectionIndex];
1317    for (const AtomInfo &info : si->atomsAndOffsets) {
1318      const DefinedAtom *atom = info.atom;
1319      for (const Reference *ref : *atom) {
1320        // Skip emitting relocs for sections which are always able to be
1321        // implicitly regenerated and where the relocation targets an address
1322        // which is defined.
1323        if (si->relocsToDefinedCanBeImplicit && isa<DefinedAtom>(ref->target()))
1324          continue;
1325        _archHandler.appendSectionRelocations(*atom, info.offsetInSection, *ref,
1326                                              symIndexForAtom,
1327                                              sectIndexForAtom,
1328                                              addressForAtom,
1329                                              normSect.relocations);
1330      }
1331    }
1332  }
1333}
1334
1335void Util::addFunctionStarts(const lld::File &, NormalizedFile &file) {
1336  if (!_ctx.generateFunctionStartsLoadCommand())
1337    return;
1338  file.functionStarts.reserve(8192);
1339  // Delta compress function starts, starting with the mach header symbol.
1340  const uint64_t badAddress = ~0ULL;
1341  uint64_t addr = badAddress;
1342  for (SectionInfo *si : _sectionInfos) {
1343    for (const AtomInfo &info : si->atomsAndOffsets) {
1344      auto type = info.atom->contentType();
1345      if (type == DefinedAtom::typeMachHeader) {
1346        addr = _atomToAddress[info.atom];
1347        continue;
1348      }
1349      if (type != DefinedAtom::typeCode)
1350        continue;
1351      assert(addr != badAddress && "Missing mach header symbol");
1352      // Skip atoms which have 0 size.  This is so that LC_FUNCTION_STARTS
1353      // can't spill in to the next section.
1354      if (!info.atom->size())
1355        continue;
1356      uint64_t nextAddr = _atomToAddress[info.atom];
1357      if (_archHandler.isThumbFunction(*info.atom))
1358        nextAddr |= 1;
1359      uint64_t delta = nextAddr - addr;
1360      if (delta) {
1361        ByteBuffer buffer;
1362        buffer.append_uleb128(delta);
1363        file.functionStarts.insert(file.functionStarts.end(), buffer.bytes(),
1364                                   buffer.bytes() + buffer.size());
1365      }
1366      addr = nextAddr;
1367    }
1368  }
1369
1370  // Null terminate, and pad to pointer size for this arch.
1371  file.functionStarts.push_back(0);
1372
1373  auto size = file.functionStarts.size();
1374  for (unsigned i = size, e = llvm::alignTo(size, _ctx.is64Bit() ? 8 : 4);
1375       i != e; ++i)
1376    file.functionStarts.push_back(0);
1377}
1378
1379void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) {
1380  if (!_ctx.generateDataInCodeLoadCommand())
1381    return;
1382  for (SectionInfo *si : _sectionInfos) {
1383    for (const AtomInfo &info : si->atomsAndOffsets) {
1384      // Atoms that contain data-in-code have "transition" references
1385      // which mark a point where the embedded data starts of ends.
1386      // This needs to be converted to the mach-o format which is an array
1387      // of data-in-code ranges.
1388      uint32_t startOffset = 0;
1389      DataRegionType mode = DataRegionType(0);
1390      for (const Reference *ref : *info.atom) {
1391        if (ref->kindNamespace() != Reference::KindNamespace::mach_o)
1392          continue;
1393        if (_archHandler.isDataInCodeTransition(ref->kindValue())) {
1394          DataRegionType nextMode = (DataRegionType)ref->addend();
1395          if (mode != nextMode) {
1396            if (mode != 0) {
1397              // Found end data range, so make range entry.
1398              DataInCode entry;
1399              entry.offset = si->address + info.offsetInSection + startOffset;
1400              entry.length = ref->offsetInAtom() - startOffset;
1401              entry.kind   = mode;
1402              file.dataInCode.push_back(entry);
1403            }
1404          }
1405          mode = nextMode;
1406          startOffset = ref->offsetInAtom();
1407        }
1408      }
1409      if (mode != 0) {
1410        // Function ends with data (no end transition).
1411        DataInCode entry;
1412        entry.offset = si->address + info.offsetInSection + startOffset;
1413        entry.length = info.atom->size() - startOffset;
1414        entry.kind   = mode;
1415        file.dataInCode.push_back(entry);
1416      }
1417    }
1418  }
1419}
1420
1421void Util::addRebaseAndBindingInfo(const lld::File &atomFile,
1422                                                        NormalizedFile &nFile) {
1423  if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT)
1424    return;
1425
1426  uint8_t segmentIndex;
1427  uint64_t segmentStartAddr;
1428  uint32_t offsetInBindInfo = 0;
1429
1430  for (SectionInfo *sect : _sectionInfos) {
1431    segIndexForSection(sect, segmentIndex, segmentStartAddr);
1432    for (const AtomInfo &info : sect->atomsAndOffsets) {
1433      const DefinedAtom *atom = info.atom;
1434      for (const Reference *ref : *atom) {
1435        uint64_t segmentOffset = _atomToAddress[atom] + ref->offsetInAtom()
1436                                - segmentStartAddr;
1437        const Atom* targ = ref->target();
1438        if (_archHandler.isPointer(*ref)) {
1439          // A pointer to a DefinedAtom requires rebasing.
1440          if (isa<DefinedAtom>(targ)) {
1441            RebaseLocation rebase;
1442            rebase.segIndex = segmentIndex;
1443            rebase.segOffset = segmentOffset;
1444            rebase.kind = llvm::MachO::REBASE_TYPE_POINTER;
1445            nFile.rebasingInfo.push_back(rebase);
1446          }
1447          // A pointer to an SharedLibraryAtom requires binding.
1448          if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) {
1449            BindLocation bind;
1450            bind.segIndex = segmentIndex;
1451            bind.segOffset = segmentOffset;
1452            bind.kind = llvm::MachO::BIND_TYPE_POINTER;
1453            bind.canBeNull = sa->canBeNullAtRuntime();
1454            bind.ordinal = dylibOrdinal(sa);
1455            bind.symbolName = targ->name();
1456            bind.addend = ref->addend();
1457            nFile.bindingInfo.push_back(bind);
1458          }
1459        }
1460        else if (_archHandler.isLazyPointer(*ref)) {
1461          BindLocation bind;
1462          if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) {
1463            bind.ordinal = dylibOrdinal(sa);
1464          } else {
1465            bind.ordinal = llvm::MachO::BIND_SPECIAL_DYLIB_SELF;
1466          }
1467          bind.segIndex = segmentIndex;
1468          bind.segOffset = segmentOffset;
1469          bind.kind = llvm::MachO::BIND_TYPE_POINTER;
1470          bind.canBeNull = false; //sa->canBeNullAtRuntime();
1471          bind.symbolName = targ->name();
1472          bind.addend = ref->addend();
1473          nFile.lazyBindingInfo.push_back(bind);
1474
1475          // Now that we know the segmentOffset and the ordinal attribute,
1476          // we can fix the helper's code
1477
1478          fixLazyReferenceImm(atom, offsetInBindInfo, nFile);
1479
1480          // 5 bytes for opcodes + variable sizes (target name + \0 and offset
1481          // encode's size)
1482          offsetInBindInfo +=
1483              6 + targ->name().size() + llvm::getULEB128Size(bind.segOffset);
1484          if (bind.ordinal > BIND_IMMEDIATE_MASK)
1485            offsetInBindInfo += llvm::getULEB128Size(bind.ordinal);
1486        }
1487      }
1488    }
1489  }
1490}
1491
1492void Util::fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset,
1493                               NormalizedFile &file) {
1494  for (const Reference *ref : *atom) {
1495    const DefinedAtom *da = dyn_cast<DefinedAtom>(ref->target());
1496    if (da == nullptr)
1497      return;
1498
1499    const Reference *helperRef = nullptr;
1500    for (const Reference *hr : *da) {
1501      if (hr->kindValue() == _archHandler.lazyImmediateLocationKind()) {
1502        helperRef = hr;
1503        break;
1504      }
1505    }
1506    if (helperRef == nullptr)
1507      continue;
1508
1509    // TODO: maybe get the fixed atom content from _archHandler ?
1510    for (SectionInfo *sectInfo : _sectionInfos) {
1511      for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) {
1512        if (atomInfo.atom == helperRef->target()) {
1513          auto sectionContent =
1514              file.sections[sectInfo->normalizedSectionIndex].content;
1515          uint8_t *rawb =
1516              file.ownedAllocations.Allocate<uint8_t>(sectionContent.size());
1517          llvm::MutableArrayRef<uint8_t> newContent{rawb,
1518                                                    sectionContent.size()};
1519          std::copy(sectionContent.begin(), sectionContent.end(),
1520                    newContent.begin());
1521          llvm::support::ulittle32_t *loc =
1522              reinterpret_cast<llvm::support::ulittle32_t *>(
1523                  &newContent[atomInfo.offsetInSection +
1524                              helperRef->offsetInAtom()]);
1525          *loc = offset;
1526          file.sections[sectInfo->normalizedSectionIndex].content = newContent;
1527        }
1528      }
1529    }
1530  }
1531}
1532
1533void Util::addExportInfo(const lld::File &atomFile, NormalizedFile &nFile) {
1534  if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT)
1535    return;
1536
1537  for (SectionInfo *sect : _sectionInfos) {
1538    for (const AtomInfo &info : sect->atomsAndOffsets) {
1539      const DefinedAtom *atom = info.atom;
1540      if (atom->scope() != Atom::scopeGlobal)
1541        continue;
1542      if (_ctx.exportRestrictMode()) {
1543        if (!_ctx.exportSymbolNamed(atom->name()))
1544          continue;
1545      }
1546      Export exprt;
1547      exprt.name = atom->name();
1548      exprt.offset = _atomToAddress[atom] - _ctx.baseAddress();
1549      exprt.kind = EXPORT_SYMBOL_FLAGS_KIND_REGULAR;
1550      if (atom->merge() == DefinedAtom::mergeAsWeak)
1551        exprt.flags = EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
1552      else
1553        exprt.flags = 0;
1554      exprt.otherOffset = 0;
1555      exprt.otherName = StringRef();
1556      nFile.exportInfo.push_back(exprt);
1557    }
1558  }
1559}
1560
1561uint32_t Util::fileFlags() {
1562  // FIXME: these need to determined at runtime.
1563  if (_ctx.outputMachOType() == MH_OBJECT) {
1564    return _subsectionsViaSymbols ? MH_SUBSECTIONS_VIA_SYMBOLS : 0;
1565  } else {
1566    uint32_t flags = MH_DYLDLINK;
1567    if (!_ctx.useFlatNamespace())
1568        flags |= MH_TWOLEVEL | MH_NOUNDEFS;
1569    if ((_ctx.outputMachOType() == MH_EXECUTE) && _ctx.PIE())
1570      flags |= MH_PIE;
1571    if (_hasTLVDescriptors)
1572      flags |= (MH_PIE | MH_HAS_TLV_DESCRIPTORS);
1573    return flags;
1574  }
1575}
1576
1577} // end anonymous namespace
1578
1579namespace lld {
1580namespace mach_o {
1581namespace normalized {
1582
1583/// Convert a set of Atoms into a normalized mach-o file.
1584llvm::Expected<std::unique_ptr<NormalizedFile>>
1585normalizedFromAtoms(const lld::File &atomFile,
1586                                           const MachOLinkingContext &context) {
1587  // The util object buffers info until the normalized file can be made.
1588  Util util(context);
1589  util.processDefinedAtoms(atomFile);
1590  util.organizeSections();
1591
1592  std::unique_ptr<NormalizedFile> f(new NormalizedFile());
1593  NormalizedFile &normFile = *f.get();
1594  normFile.arch = context.arch();
1595  normFile.fileType = context.outputMachOType();
1596  normFile.flags = util.fileFlags();
1597  normFile.stackSize = context.stackSize();
1598  normFile.installName = context.installName();
1599  normFile.currentVersion = context.currentVersion();
1600  normFile.compatVersion = context.compatibilityVersion();
1601  normFile.os = context.os();
1602
1603  // If we are emitting an object file, then the min version is the maximum
1604  // of the min's of all the source files and the cmdline.
1605  if (normFile.fileType == llvm::MachO::MH_OBJECT)
1606    normFile.minOSverson = std::max(context.osMinVersion(), util.minVersion());
1607  else
1608    normFile.minOSverson = context.osMinVersion();
1609
1610  normFile.minOSVersionKind = util.minVersionCommandType();
1611
1612  normFile.sdkVersion = context.sdkVersion();
1613  normFile.sourceVersion = context.sourceVersion();
1614
1615  if (context.generateVersionLoadCommand() &&
1616      context.os() != MachOLinkingContext::OS::unknown)
1617    normFile.hasMinVersionLoadCommand = true;
1618  else if (normFile.fileType == llvm::MachO::MH_OBJECT &&
1619           util.allSourceFilesHaveMinVersions() &&
1620           ((normFile.os != MachOLinkingContext::OS::unknown) ||
1621            util.minVersionCommandType())) {
1622    // If we emit an object file, then it should contain a min version load
1623    // command if all of the source files also contained min version commands.
1624    // Also, we either need to have a platform, or found a platform from the
1625    // source object files.
1626    normFile.hasMinVersionLoadCommand = true;
1627  }
1628  normFile.generateDataInCodeLoadCommand =
1629    context.generateDataInCodeLoadCommand();
1630  normFile.pageSize = context.pageSize();
1631  normFile.rpaths = context.rpaths();
1632  util.addDependentDylibs(atomFile, normFile);
1633  util.copySegmentInfo(normFile);
1634  util.copySectionInfo(normFile);
1635  util.assignAddressesToSections(normFile);
1636  util.buildAtomToAddressMap();
1637  if (auto err = util.synthesizeDebugNotes(normFile))
1638    return std::move(err);
1639  util.updateSectionInfo(normFile);
1640  util.copySectionContent(normFile);
1641  if (auto ec = util.addSymbols(atomFile, normFile)) {
1642    return std::move(ec);
1643  }
1644  util.addIndirectSymbols(atomFile, normFile);
1645  util.addRebaseAndBindingInfo(atomFile, normFile);
1646  util.addExportInfo(atomFile, normFile);
1647  util.addSectionRelocs(atomFile, normFile);
1648  util.addFunctionStarts(atomFile, normFile);
1649  util.buildDataInCodeArray(atomFile, normFile);
1650  util.copyEntryPointAddress(normFile);
1651
1652  return std::move(f);
1653}
1654
1655} // namespace normalized
1656} // namespace mach_o
1657} // namespace lld
1658