MachONormalizedFileFromAtoms.cpp revision 314564
1//===- lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp ------------===//
2//
3//                             The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10///
11/// \file Converts from in-memory Atoms to in-memory normalized mach-o.
12///
13///                  +------------+
14///                  | normalized |
15///                  +------------+
16///                        ^
17///                        |
18///                        |
19///                    +-------+
20///                    | Atoms |
21///                    +-------+
22
23#include "MachONormalizedFile.h"
24#include "ArchHandler.h"
25#include "DebugInfo.h"
26#include "MachONormalizedFileBinaryUtils.h"
27#include "lld/Core/Error.h"
28#include "lld/Core/LLVM.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/StringSwitch.h"
31#include "llvm/Support/Casting.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/Format.h"
35#include "llvm/Support/MachO.h"
36#include <map>
37#include <system_error>
38#include <unordered_set>
39
40using llvm::StringRef;
41using llvm::isa;
42using namespace llvm::MachO;
43using namespace lld::mach_o::normalized;
44using namespace lld;
45
46namespace {
47
48struct AtomInfo {
49  const DefinedAtom  *atom;
50  uint64_t            offsetInSection;
51};
52
53struct SectionInfo {
54  SectionInfo(StringRef seg, StringRef sect, SectionType type,
55              const MachOLinkingContext &ctxt, uint32_t attr,
56              bool relocsToDefinedCanBeImplicit);
57
58  StringRef                 segmentName;
59  StringRef                 sectionName;
60  SectionType               type;
61  uint32_t                  attributes;
62  uint64_t                  address;
63  uint64_t                  size;
64  uint16_t                  alignment;
65
66  /// If this is set, the any relocs in this section which point to defined
67  /// addresses can be implicitly generated.  This is the case for the
68  /// __eh_frame section where references to the function can be implicit if the
69  /// function is defined.
70  bool                      relocsToDefinedCanBeImplicit;
71
72
73  std::vector<AtomInfo>     atomsAndOffsets;
74  uint32_t                  normalizedSectionIndex;
75  uint32_t                  finalSectionIndex;
76};
77
78SectionInfo::SectionInfo(StringRef sg, StringRef sct, SectionType t,
79                         const MachOLinkingContext &ctxt, uint32_t attrs,
80                         bool relocsToDefinedCanBeImplicit)
81 : segmentName(sg), sectionName(sct), type(t), attributes(attrs),
82                 address(0), size(0), alignment(1),
83                 relocsToDefinedCanBeImplicit(relocsToDefinedCanBeImplicit),
84                 normalizedSectionIndex(0), finalSectionIndex(0) {
85  uint16_t align = 1;
86  if (ctxt.sectionAligned(segmentName, sectionName, align)) {
87    alignment = align;
88  }
89}
90
91struct SegmentInfo {
92  SegmentInfo(StringRef name);
93
94  StringRef                  name;
95  uint64_t                   address;
96  uint64_t                   size;
97  uint32_t                   init_access;
98  uint32_t                   max_access;
99  std::vector<SectionInfo*>  sections;
100  uint32_t                   normalizedSegmentIndex;
101};
102
103SegmentInfo::SegmentInfo(StringRef n)
104 : name(n), address(0), size(0), init_access(0), max_access(0),
105   normalizedSegmentIndex(0) {
106}
107
108class Util {
109public:
110  Util(const MachOLinkingContext &ctxt)
111      : _ctx(ctxt), _archHandler(ctxt.archHandler()), _entryAtom(nullptr),
112        _hasTLVDescriptors(false), _subsectionsViaSymbols(true) {}
113  ~Util();
114
115  void      processDefinedAtoms(const lld::File &atomFile);
116  void      processAtomAttributes(const DefinedAtom *atom);
117  void      assignAtomToSection(const DefinedAtom *atom);
118  void      organizeSections();
119  void      assignAddressesToSections(const NormalizedFile &file);
120  uint32_t  fileFlags();
121  void      copySegmentInfo(NormalizedFile &file);
122  void      copySectionInfo(NormalizedFile &file);
123  void      updateSectionInfo(NormalizedFile &file);
124  void      buildAtomToAddressMap();
125  llvm::Error synthesizeDebugNotes(NormalizedFile &file);
126  llvm::Error addSymbols(const lld::File &atomFile, NormalizedFile &file);
127  void      addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file);
128  void      addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file);
129  void      addExportInfo(const lld::File &, NormalizedFile &file);
130  void      addSectionRelocs(const lld::File &, NormalizedFile &file);
131  void      addFunctionStarts(const lld::File &, NormalizedFile &file);
132  void      buildDataInCodeArray(const lld::File &, NormalizedFile &file);
133  void      addDependentDylibs(const lld::File &, NormalizedFile &file);
134  void      copyEntryPointAddress(NormalizedFile &file);
135  void      copySectionContent(NormalizedFile &file);
136
137  bool allSourceFilesHaveMinVersions() const {
138    return _allSourceFilesHaveMinVersions;
139  }
140
141  uint32_t minVersion() const {
142    return _minVersion;
143  }
144
145  LoadCommandType minVersionCommandType() const {
146    return _minVersionCommandType;
147  }
148
149private:
150  typedef std::map<DefinedAtom::ContentType, SectionInfo*> TypeToSection;
151  typedef llvm::DenseMap<const Atom*, uint64_t> AtomToAddress;
152
153  struct DylibInfo { int ordinal; bool hasWeak; bool hasNonWeak; };
154  typedef llvm::StringMap<DylibInfo> DylibPathToInfo;
155
156  SectionInfo *sectionForAtom(const DefinedAtom*);
157  SectionInfo *getRelocatableSection(DefinedAtom::ContentType type);
158  SectionInfo *getFinalSection(DefinedAtom::ContentType type);
159  void         appendAtom(SectionInfo *sect, const DefinedAtom *atom);
160  SegmentInfo *segmentForName(StringRef segName);
161  void         layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr);
162  void         layoutSectionsInTextSegment(size_t, SegmentInfo *, uint64_t &);
163  void         copySectionContent(SectionInfo *si, ContentBytes &content);
164  uint16_t     descBits(const DefinedAtom* atom);
165  int          dylibOrdinal(const SharedLibraryAtom *sa);
166  void         segIndexForSection(const SectionInfo *sect,
167                             uint8_t &segmentIndex, uint64_t &segmentStartAddr);
168  const Atom  *targetOfLazyPointer(const DefinedAtom *lpAtom);
169  const Atom  *targetOfStub(const DefinedAtom *stubAtom);
170  llvm::Error getSymbolTableRegion(const DefinedAtom* atom,
171                                   bool &inGlobalsRegion,
172                                   SymbolScope &symbolScope);
173  void         appendSection(SectionInfo *si, NormalizedFile &file);
174  uint32_t     sectionIndexForAtom(const Atom *atom);
175
176  typedef llvm::DenseMap<const Atom*, uint32_t> AtomToIndex;
177  struct AtomAndIndex { const Atom *atom; uint32_t index; SymbolScope scope; };
178  struct AtomSorter {
179    bool operator()(const AtomAndIndex &left, const AtomAndIndex &right);
180  };
181  struct SegmentSorter {
182    bool operator()(const SegmentInfo *left, const SegmentInfo *right);
183    static unsigned weight(const SegmentInfo *);
184  };
185  struct TextSectionSorter {
186    bool operator()(const SectionInfo *left, const SectionInfo *right);
187    static unsigned weight(const SectionInfo *);
188  };
189
190  const MachOLinkingContext &_ctx;
191  mach_o::ArchHandler          &_archHandler;
192  llvm::BumpPtrAllocator        _allocator;
193  std::vector<SectionInfo*>     _sectionInfos;
194  std::vector<SegmentInfo*>     _segmentInfos;
195  TypeToSection                 _sectionMap;
196  std::vector<SectionInfo*>     _customSections;
197  AtomToAddress                 _atomToAddress;
198  DylibPathToInfo               _dylibInfo;
199  const DefinedAtom            *_entryAtom;
200  AtomToIndex                   _atomToSymbolIndex;
201  std::vector<const Atom *>     _machHeaderAliasAtoms;
202  bool                          _hasTLVDescriptors;
203  bool                          _subsectionsViaSymbols;
204  bool                          _allSourceFilesHaveMinVersions = true;
205  LoadCommandType               _minVersionCommandType = (LoadCommandType)0;
206  uint32_t                      _minVersion = 0;
207  std::vector<lld::mach_o::Stab> _stabs;
208};
209
210Util::~Util() {
211  // The SectionInfo structs are BumpPtr allocated, but atomsAndOffsets needs
212  // to be deleted.
213  for (SectionInfo *si : _sectionInfos) {
214    // clear() destroys vector elements, but does not deallocate.
215    // Instead use swap() to deallocate vector buffer.
216    std::vector<AtomInfo> empty;
217    si->atomsAndOffsets.swap(empty);
218  }
219  // The SegmentInfo structs are BumpPtr allocated, but sections needs
220  // to be deleted.
221  for (SegmentInfo *sgi : _segmentInfos) {
222    std::vector<SectionInfo*> empty2;
223    sgi->sections.swap(empty2);
224  }
225}
226
227SectionInfo *Util::getRelocatableSection(DefinedAtom::ContentType type) {
228  StringRef segmentName;
229  StringRef sectionName;
230  SectionType sectionType;
231  SectionAttr sectionAttrs;
232  bool relocsToDefinedCanBeImplicit;
233
234  // Use same table used by when parsing .o files.
235  relocatableSectionInfoForContentType(type, segmentName, sectionName,
236                                       sectionType, sectionAttrs,
237                                       relocsToDefinedCanBeImplicit);
238  // If we already have a SectionInfo with this name, re-use it.
239  // This can happen if two ContentType map to the same mach-o section.
240  for (auto sect : _sectionMap) {
241    if (sect.second->sectionName.equals(sectionName) &&
242        sect.second->segmentName.equals(segmentName)) {
243      return sect.second;
244    }
245  }
246  // Otherwise allocate new SectionInfo object.
247  auto *sect = new (_allocator)
248      SectionInfo(segmentName, sectionName, sectionType, _ctx, sectionAttrs,
249                  relocsToDefinedCanBeImplicit);
250  _sectionInfos.push_back(sect);
251  _sectionMap[type] = sect;
252  return sect;
253}
254
255#define ENTRY(seg, sect, type, atomType) \
256  {seg, sect, type, DefinedAtom::atomType }
257
258struct MachOFinalSectionFromAtomType {
259  StringRef                 segmentName;
260  StringRef                 sectionName;
261  SectionType               sectionType;
262  DefinedAtom::ContentType  atomType;
263};
264
265const MachOFinalSectionFromAtomType sectsToAtomType[] = {
266  ENTRY("__TEXT", "__text",           S_REGULAR,          typeCode),
267  ENTRY("__TEXT", "__text",           S_REGULAR,          typeMachHeader),
268  ENTRY("__TEXT", "__cstring",        S_CSTRING_LITERALS, typeCString),
269  ENTRY("__TEXT", "__ustring",        S_REGULAR,          typeUTF16String),
270  ENTRY("__TEXT", "__const",          S_REGULAR,          typeConstant),
271  ENTRY("__TEXT", "__const",          S_4BYTE_LITERALS,   typeLiteral4),
272  ENTRY("__TEXT", "__const",          S_8BYTE_LITERALS,   typeLiteral8),
273  ENTRY("__TEXT", "__const",          S_16BYTE_LITERALS,  typeLiteral16),
274  ENTRY("__TEXT", "__stubs",          S_SYMBOL_STUBS,     typeStub),
275  ENTRY("__TEXT", "__stub_helper",    S_REGULAR,          typeStubHelper),
276  ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR,          typeLSDA),
277  ENTRY("__TEXT", "__eh_frame",       S_COALESCED,        typeCFI),
278  ENTRY("__TEXT", "__unwind_info",    S_REGULAR,          typeProcessedUnwindInfo),
279  ENTRY("__DATA", "__data",           S_REGULAR,          typeData),
280  ENTRY("__DATA", "__const",          S_REGULAR,          typeConstData),
281  ENTRY("__DATA", "__cfstring",       S_REGULAR,          typeCFString),
282  ENTRY("__DATA", "__la_symbol_ptr",  S_LAZY_SYMBOL_POINTERS,
283                                                          typeLazyPointer),
284  ENTRY("__DATA", "__mod_init_func",  S_MOD_INIT_FUNC_POINTERS,
285                                                          typeInitializerPtr),
286  ENTRY("__DATA", "__mod_term_func",  S_MOD_TERM_FUNC_POINTERS,
287                                                          typeTerminatorPtr),
288  ENTRY("__DATA", "__got",            S_NON_LAZY_SYMBOL_POINTERS,
289                                                          typeGOT),
290  ENTRY("__DATA", "__nl_symbol_ptr",  S_NON_LAZY_SYMBOL_POINTERS,
291                                                          typeNonLazyPointer),
292  ENTRY("__DATA", "__thread_vars",    S_THREAD_LOCAL_VARIABLES,
293                                                          typeThunkTLV),
294  ENTRY("__DATA", "__thread_data",    S_THREAD_LOCAL_REGULAR,
295                                                          typeTLVInitialData),
296  ENTRY("__DATA", "__thread_ptrs",    S_THREAD_LOCAL_VARIABLE_POINTERS,
297                                                          typeTLVInitializerPtr),
298  ENTRY("__DATA", "__thread_bss",     S_THREAD_LOCAL_ZEROFILL,
299                                                         typeTLVInitialZeroFill),
300  ENTRY("__DATA", "__bss",            S_ZEROFILL,         typeZeroFill),
301  ENTRY("__DATA", "__interposing",    S_INTERPOSING,      typeInterposingTuples),
302};
303#undef ENTRY
304
305SectionInfo *Util::getFinalSection(DefinedAtom::ContentType atomType) {
306  for (auto &p : sectsToAtomType) {
307    if (p.atomType != atomType)
308      continue;
309    SectionAttr sectionAttrs = 0;
310    switch (atomType) {
311    case DefinedAtom::typeMachHeader:
312    case DefinedAtom::typeCode:
313    case DefinedAtom::typeStub:
314    case DefinedAtom::typeStubHelper:
315      sectionAttrs = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS;
316      break;
317    case DefinedAtom::typeThunkTLV:
318      _hasTLVDescriptors = true;
319      break;
320    default:
321      break;
322    }
323    // If we already have a SectionInfo with this name, re-use it.
324    // This can happen if two ContentType map to the same mach-o section.
325    for (auto sect : _sectionMap) {
326      if (sect.second->sectionName.equals(p.sectionName) &&
327          sect.second->segmentName.equals(p.segmentName)) {
328        return sect.second;
329      }
330    }
331    // Otherwise allocate new SectionInfo object.
332    auto *sect = new (_allocator) SectionInfo(
333        p.segmentName, p.sectionName, p.sectionType, _ctx, sectionAttrs,
334        /* relocsToDefinedCanBeImplicit */ false);
335    _sectionInfos.push_back(sect);
336    _sectionMap[atomType] = sect;
337    return sect;
338  }
339  llvm_unreachable("content type not yet supported");
340}
341
342SectionInfo *Util::sectionForAtom(const DefinedAtom *atom) {
343  if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) {
344    // Section for this atom is derived from content type.
345    DefinedAtom::ContentType type = atom->contentType();
346    auto pos = _sectionMap.find(type);
347    if ( pos != _sectionMap.end() )
348      return pos->second;
349    bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
350    return rMode ? getRelocatableSection(type) : getFinalSection(type);
351  } else {
352    // This atom needs to be in a custom section.
353    StringRef customName = atom->customSectionName();
354    // Look to see if we have already allocated the needed custom section.
355    for(SectionInfo *sect : _customSections) {
356      const DefinedAtom *firstAtom = sect->atomsAndOffsets.front().atom;
357      if (firstAtom->customSectionName().equals(customName)) {
358        return sect;
359      }
360    }
361    // Not found, so need to create a new custom section.
362    size_t seperatorIndex = customName.find('/');
363    assert(seperatorIndex != StringRef::npos);
364    StringRef segName = customName.slice(0, seperatorIndex);
365    StringRef sectName = customName.drop_front(seperatorIndex + 1);
366    auto *sect =
367        new (_allocator) SectionInfo(segName, sectName, S_REGULAR, _ctx,
368                                     0, /* relocsToDefinedCanBeImplicit */ false);
369    _customSections.push_back(sect);
370    _sectionInfos.push_back(sect);
371    return sect;
372  }
373}
374
375void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) {
376  // Figure out offset for atom in this section given alignment constraints.
377  uint64_t offset = sect->size;
378  DefinedAtom::Alignment atomAlign = atom->alignment();
379  uint64_t align = atomAlign.value;
380  uint64_t requiredModulus = atomAlign.modulus;
381  uint64_t currentModulus = (offset % align);
382  if ( currentModulus != requiredModulus ) {
383    if ( requiredModulus > currentModulus )
384      offset += requiredModulus-currentModulus;
385    else
386      offset += align+requiredModulus-currentModulus;
387  }
388  // Record max alignment of any atom in this section.
389  if (align > sect->alignment)
390    sect->alignment = atomAlign.value;
391  // Assign atom to this section with this offset.
392  AtomInfo ai = {atom, offset};
393  sect->atomsAndOffsets.push_back(ai);
394  // Update section size to include this atom.
395  sect->size = offset + atom->size();
396}
397
398void Util::processDefinedAtoms(const lld::File &atomFile) {
399  for (const DefinedAtom *atom : atomFile.defined()) {
400    processAtomAttributes(atom);
401    assignAtomToSection(atom);
402  }
403}
404
405void Util::processAtomAttributes(const DefinedAtom *atom) {
406  if (auto *machoFile = dyn_cast<mach_o::MachOFile>(&atom->file())) {
407    // If the file doesn't use subsections via symbols, then make sure we don't
408    // add that flag to the final output file if we have a relocatable file.
409    if (!machoFile->subsectionsViaSymbols())
410      _subsectionsViaSymbols = false;
411
412    // All the source files must have min versions for us to output an object
413    // file with a min version.
414    if (auto v = machoFile->minVersion())
415      _minVersion = std::max(_minVersion, v);
416    else
417      _allSourceFilesHaveMinVersions = false;
418
419    // If we don't have a platform load command, but one of the source files
420    // does, then take the one from the file.
421    if (!_minVersionCommandType)
422      if (auto v = machoFile->minVersionLoadCommandKind())
423        _minVersionCommandType = v;
424  }
425}
426
427void Util::assignAtomToSection(const DefinedAtom *atom) {
428  if (atom->contentType() == DefinedAtom::typeMachHeader) {
429    _machHeaderAliasAtoms.push_back(atom);
430    // Assign atom to this section with this offset.
431    AtomInfo ai = {atom, 0};
432    sectionForAtom(atom)->atomsAndOffsets.push_back(ai);
433  } else if (atom->contentType() == DefinedAtom::typeDSOHandle)
434    _machHeaderAliasAtoms.push_back(atom);
435  else
436    appendAtom(sectionForAtom(atom), atom);
437}
438
439SegmentInfo *Util::segmentForName(StringRef segName) {
440  for (SegmentInfo *si : _segmentInfos) {
441    if ( si->name.equals(segName) )
442      return si;
443  }
444  auto *info = new (_allocator) SegmentInfo(segName);
445
446  // Set the initial segment protection.
447  if (segName.equals("__TEXT"))
448    info->init_access = VM_PROT_READ | VM_PROT_EXECUTE;
449  else if (segName.equals("__PAGEZERO"))
450    info->init_access = 0;
451  else if (segName.equals("__LINKEDIT"))
452    info->init_access = VM_PROT_READ;
453  else {
454    // All others default to read-write
455    info->init_access = VM_PROT_READ | VM_PROT_WRITE;
456  }
457
458  // Set max segment protection
459  // Note, its overkill to use a switch statement here, but makes it so much
460  // easier to use switch coverage to catch new cases.
461  switch (_ctx.os()) {
462    case lld::MachOLinkingContext::OS::unknown:
463    case lld::MachOLinkingContext::OS::macOSX:
464    case lld::MachOLinkingContext::OS::iOS_simulator:
465      if (segName.equals("__PAGEZERO")) {
466        info->max_access = 0;
467        break;
468      }
469      // All others default to all
470      info->max_access = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
471      break;
472    case lld::MachOLinkingContext::OS::iOS:
473      // iPhoneOS always uses same protection for max and initial
474      info->max_access = info->init_access;
475      break;
476  }
477  _segmentInfos.push_back(info);
478  return info;
479}
480
481unsigned Util::SegmentSorter::weight(const SegmentInfo *seg) {
482 return llvm::StringSwitch<unsigned>(seg->name)
483    .Case("__PAGEZERO",  1)
484    .Case("__TEXT",      2)
485    .Case("__DATA",      3)
486    .Default(100);
487}
488
489bool Util::SegmentSorter::operator()(const SegmentInfo *left,
490                                  const SegmentInfo *right) {
491  return (weight(left) < weight(right));
492}
493
494unsigned Util::TextSectionSorter::weight(const SectionInfo *sect) {
495 return llvm::StringSwitch<unsigned>(sect->sectionName)
496    .Case("__text",         1)
497    .Case("__stubs",        2)
498    .Case("__stub_helper",  3)
499    .Case("__const",        4)
500    .Case("__cstring",      5)
501    .Case("__unwind_info",  98)
502    .Case("__eh_frame",     99)
503    .Default(10);
504}
505
506bool Util::TextSectionSorter::operator()(const SectionInfo *left,
507                                         const SectionInfo *right) {
508  return (weight(left) < weight(right));
509}
510
511void Util::organizeSections() {
512  // NOTE!: Keep this in sync with assignAddressesToSections.
513  switch (_ctx.outputMachOType()) {
514    case llvm::MachO::MH_EXECUTE:
515      // Main executables, need a zero-page segment
516      segmentForName("__PAGEZERO");
517      // Fall into next case.
518    case llvm::MachO::MH_DYLIB:
519    case llvm::MachO::MH_BUNDLE:
520      // All dynamic code needs TEXT segment to hold the load commands.
521      segmentForName("__TEXT");
522      break;
523    default:
524      break;
525  }
526  segmentForName("__LINKEDIT");
527
528  // Group sections into segments.
529  for (SectionInfo *si : _sectionInfos) {
530    SegmentInfo *seg = segmentForName(si->segmentName);
531    seg->sections.push_back(si);
532  }
533  // Sort segments.
534  std::sort(_segmentInfos.begin(), _segmentInfos.end(), SegmentSorter());
535
536  // Sort sections within segments.
537  for (SegmentInfo *seg : _segmentInfos) {
538    if (seg->name.equals("__TEXT")) {
539      std::sort(seg->sections.begin(), seg->sections.end(),
540                TextSectionSorter());
541    }
542  }
543
544  // Record final section indexes.
545  uint32_t segmentIndex = 0;
546  uint32_t sectionIndex = 1;
547  for (SegmentInfo *seg : _segmentInfos) {
548    seg->normalizedSegmentIndex = segmentIndex++;
549    for (SectionInfo *sect : seg->sections)
550      sect->finalSectionIndex = sectionIndex++;
551  }
552}
553
554void Util::layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr) {
555  seg->address = addr;
556  for (SectionInfo *sect : seg->sections) {
557    sect->address = llvm::alignTo(addr, sect->alignment);
558    addr = sect->address + sect->size;
559  }
560  seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize());
561}
562
563// __TEXT segment lays out backwards so padding is at front after load commands.
564void Util::layoutSectionsInTextSegment(size_t hlcSize, SegmentInfo *seg,
565                                                               uint64_t &addr) {
566  seg->address = addr;
567  // Walks sections starting at end to calculate padding for start.
568  int64_t taddr = 0;
569  for (auto it = seg->sections.rbegin(); it != seg->sections.rend(); ++it) {
570    SectionInfo *sect = *it;
571    taddr -= sect->size;
572    taddr = taddr & (0 - sect->alignment);
573  }
574  int64_t padding = taddr - hlcSize;
575  while (padding < 0)
576    padding += _ctx.pageSize();
577  // Start assigning section address starting at padded offset.
578  addr += (padding + hlcSize);
579  for (SectionInfo *sect : seg->sections) {
580    sect->address = llvm::alignTo(addr, sect->alignment);
581    addr = sect->address + sect->size;
582  }
583  seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize());
584}
585
586void Util::assignAddressesToSections(const NormalizedFile &file) {
587  // NOTE!: Keep this in sync with organizeSections.
588  size_t hlcSize = headerAndLoadCommandsSize(file);
589  uint64_t address = 0;
590  for (SegmentInfo *seg : _segmentInfos) {
591    if (seg->name.equals("__PAGEZERO")) {
592      seg->size = _ctx.pageZeroSize();
593      address += seg->size;
594    }
595    else if (seg->name.equals("__TEXT")) {
596      // _ctx.baseAddress()  == 0 implies it was either unspecified or
597      // pageZeroSize is also 0. In either case resetting address is safe.
598      address = _ctx.baseAddress() ? _ctx.baseAddress() : address;
599      layoutSectionsInTextSegment(hlcSize, seg, address);
600    } else
601      layoutSectionsInSegment(seg, address);
602
603    address = llvm::alignTo(address, _ctx.pageSize());
604  }
605  DEBUG_WITH_TYPE("WriterMachO-norm",
606    llvm::dbgs() << "assignAddressesToSections()\n";
607    for (SegmentInfo *sgi : _segmentInfos) {
608      llvm::dbgs()  << "   address=" << llvm::format("0x%08llX", sgi->address)
609                    << ", size="  << llvm::format("0x%08llX", sgi->size)
610                    << ", segment-name='" << sgi->name
611                    << "'\n";
612      for (SectionInfo *si : sgi->sections) {
613        llvm::dbgs()<< "      addr="  << llvm::format("0x%08llX", si->address)
614                    << ", size="  << llvm::format("0x%08llX", si->size)
615                    << ", section-name='" << si->sectionName
616                    << "\n";
617      }
618    }
619  );
620}
621
622void Util::copySegmentInfo(NormalizedFile &file) {
623  for (SegmentInfo *sgi : _segmentInfos) {
624    Segment seg;
625    seg.name    = sgi->name;
626    seg.address = sgi->address;
627    seg.size    = sgi->size;
628    seg.init_access  = sgi->init_access;
629    seg.max_access  = sgi->max_access;
630    file.segments.push_back(seg);
631  }
632}
633
634void Util::appendSection(SectionInfo *si, NormalizedFile &file) {
635   // Add new empty section to end of file.sections.
636  Section temp;
637  file.sections.push_back(std::move(temp));
638  Section* normSect = &file.sections.back();
639  // Copy fields to normalized section.
640  normSect->segmentName   = si->segmentName;
641  normSect->sectionName   = si->sectionName;
642  normSect->type          = si->type;
643  normSect->attributes    = si->attributes;
644  normSect->address       = si->address;
645  normSect->alignment     = si->alignment;
646  // Record where normalized section is.
647  si->normalizedSectionIndex = file.sections.size()-1;
648}
649
650void Util::copySectionContent(NormalizedFile &file) {
651  const bool r = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
652
653  // Utility function for ArchHandler to find address of atom in output file.
654  auto addrForAtom = [&] (const Atom &atom) -> uint64_t {
655    auto pos = _atomToAddress.find(&atom);
656    assert(pos != _atomToAddress.end());
657    return pos->second;
658  };
659
660  auto sectionAddrForAtom = [&] (const Atom &atom) -> uint64_t {
661    for (const SectionInfo *sectInfo : _sectionInfos)
662      for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets)
663        if (atomInfo.atom == &atom)
664          return sectInfo->address;
665    llvm_unreachable("atom not assigned to section");
666  };
667
668  for (SectionInfo *si : _sectionInfos) {
669    Section *normSect = &file.sections[si->normalizedSectionIndex];
670    if (isZeroFillSection(si->type)) {
671      const uint8_t *empty = nullptr;
672      normSect->content = llvm::makeArrayRef(empty, si->size);
673      continue;
674    }
675    // Copy content from atoms to content buffer for section.
676    llvm::MutableArrayRef<uint8_t> sectionContent;
677    if (si->size) {
678      uint8_t *sectContent = file.ownedAllocations.Allocate<uint8_t>(si->size);
679      sectionContent = llvm::MutableArrayRef<uint8_t>(sectContent, si->size);
680      normSect->content = sectionContent;
681    }
682    for (AtomInfo &ai : si->atomsAndOffsets) {
683      if (!ai.atom->size()) {
684        assert(ai.atom->begin() == ai.atom->end() &&
685               "Cannot have references without content");
686        continue;
687      }
688      auto atomContent = sectionContent.slice(ai.offsetInSection,
689                                              ai.atom->size());
690      _archHandler.generateAtomContent(*ai.atom, r, addrForAtom,
691                                       sectionAddrForAtom, _ctx.baseAddress(),
692                                       atomContent);
693    }
694  }
695}
696
697void Util::copySectionInfo(NormalizedFile &file) {
698  file.sections.reserve(_sectionInfos.size());
699  // Write sections grouped by segment.
700  for (SegmentInfo *sgi : _segmentInfos) {
701    for (SectionInfo *si : sgi->sections) {
702      appendSection(si, file);
703    }
704  }
705}
706
707void Util::updateSectionInfo(NormalizedFile &file) {
708  file.sections.reserve(_sectionInfos.size());
709  // sections grouped by segment.
710  for (SegmentInfo *sgi : _segmentInfos) {
711    Segment *normSeg = &file.segments[sgi->normalizedSegmentIndex];
712    normSeg->address = sgi->address;
713    normSeg->size = sgi->size;
714    for (SectionInfo *si : sgi->sections) {
715      Section *normSect = &file.sections[si->normalizedSectionIndex];
716      normSect->address = si->address;
717    }
718  }
719}
720
721void Util::copyEntryPointAddress(NormalizedFile &nFile) {
722  if (!_entryAtom) {
723    nFile.entryAddress = 0;
724    return;
725  }
726
727  if (_ctx.outputTypeHasEntry()) {
728    if (_archHandler.isThumbFunction(*_entryAtom))
729      nFile.entryAddress = (_atomToAddress[_entryAtom] | 1);
730    else
731      nFile.entryAddress = _atomToAddress[_entryAtom];
732  }
733}
734
735void Util::buildAtomToAddressMap() {
736  DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
737                   << "assign atom addresses:\n");
738  const bool lookForEntry = _ctx.outputTypeHasEntry();
739  for (SectionInfo *sect : _sectionInfos) {
740    for (const AtomInfo &info : sect->atomsAndOffsets) {
741      _atomToAddress[info.atom] = sect->address + info.offsetInSection;
742      if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) &&
743          (info.atom->size() != 0) &&
744          info.atom->name() == _ctx.entrySymbolName()) {
745        _entryAtom = info.atom;
746      }
747      DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
748                      << "   address="
749                      << llvm::format("0x%016X", _atomToAddress[info.atom])
750                      << llvm::format("    0x%09lX", info.atom)
751                      << ", file=#"
752                      << info.atom->file().ordinal()
753                      << ", atom=#"
754                      << info.atom->ordinal()
755                      << ", name="
756                      << info.atom->name()
757                      << ", type="
758                      << info.atom->contentType()
759                      << "\n");
760    }
761  }
762  DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
763                  << "assign header alias atom addresses:\n");
764  for (const Atom *atom : _machHeaderAliasAtoms) {
765    _atomToAddress[atom] = _ctx.baseAddress();
766#ifndef NDEBUG
767    if (auto *definedAtom = dyn_cast<DefinedAtom>(atom)) {
768      DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
769                      << "   address="
770                      << llvm::format("0x%016X", _atomToAddress[atom])
771                      << llvm::format("    0x%09lX", atom)
772                      << ", file=#"
773                      << definedAtom->file().ordinal()
774                      << ", atom=#"
775                      << definedAtom->ordinal()
776                      << ", name="
777                      << definedAtom->name()
778                      << ", type="
779                      << definedAtom->contentType()
780                      << "\n");
781    } else {
782      DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs()
783                      << "   address="
784                      << llvm::format("0x%016X", _atomToAddress[atom])
785                      << " atom=" << atom
786                      << " name=" << atom->name() << "\n");
787    }
788#endif
789  }
790}
791
792llvm::Error Util::synthesizeDebugNotes(NormalizedFile &file) {
793
794  // Bail out early if we don't need to generate a debug map.
795  if (_ctx.debugInfoMode() == MachOLinkingContext::DebugInfoMode::noDebugMap)
796    return llvm::Error::success();
797
798  std::vector<const DefinedAtom*> atomsNeedingDebugNotes;
799  std::set<const mach_o::MachOFile*> filesWithStabs;
800  bool objFileHasDwarf = false;
801  const File *objFile = nullptr;
802
803  for (SectionInfo *sect : _sectionInfos) {
804    for (const AtomInfo &info : sect->atomsAndOffsets) {
805      if (const DefinedAtom *atom = dyn_cast<DefinedAtom>(info.atom)) {
806
807        // FIXME: No stabs/debug-notes for symbols that wouldn't be in the
808        //        symbol table.
809        // FIXME: No stabs/debug-notes for kernel dtrace probes.
810
811        if (atom->contentType() == DefinedAtom::typeCFI ||
812            atom->contentType() == DefinedAtom::typeCString)
813          continue;
814
815        // Whenever we encounter a new file, update the 'objfileHasDwarf' flag.
816        if (&info.atom->file() != objFile) {
817          objFileHasDwarf = false;
818          if (const mach_o::MachOFile *atomFile =
819              dyn_cast<mach_o::MachOFile>(&info.atom->file())) {
820            if (atomFile->debugInfo()) {
821              if (isa<mach_o::DwarfDebugInfo>(atomFile->debugInfo()))
822                objFileHasDwarf = true;
823              else if (isa<mach_o::StabsDebugInfo>(atomFile->debugInfo()))
824                filesWithStabs.insert(atomFile);
825            }
826          }
827        }
828
829        // If this atom is from a file that needs dwarf, add it to the list.
830        if (objFileHasDwarf)
831          atomsNeedingDebugNotes.push_back(info.atom);
832      }
833    }
834  }
835
836  // Sort atoms needing debug notes by file ordinal, then atom ordinal.
837  std::sort(atomsNeedingDebugNotes.begin(), atomsNeedingDebugNotes.end(),
838            [](const DefinedAtom *lhs, const DefinedAtom *rhs) {
839              if (lhs->file().ordinal() != rhs->file().ordinal())
840                return (lhs->file().ordinal() < rhs->file().ordinal());
841              return (lhs->ordinal() < rhs->ordinal());
842            });
843
844  // FIXME: Handle <rdar://problem/17689030>: Add -add_ast_path option to \
845  //        linker which add N_AST stab entry to output
846  // See OutputFile::synthesizeDebugNotes in ObjectFile.cpp in ld64.
847
848  StringRef oldFileName = "";
849  StringRef oldDirPath = "";
850  bool wroteStartSO = false;
851  std::unordered_set<std::string> seenFiles;
852  for (const DefinedAtom *atom : atomsNeedingDebugNotes) {
853    const auto &atomFile = cast<mach_o::MachOFile>(atom->file());
854    assert(dyn_cast_or_null<lld::mach_o::DwarfDebugInfo>(atomFile.debugInfo())
855           && "file for atom needing debug notes does not contain dwarf");
856    auto &dwarf = cast<lld::mach_o::DwarfDebugInfo>(*atomFile.debugInfo());
857
858    auto &tu = dwarf.translationUnitSource();
859    StringRef newFileName = tu.name;
860    StringRef newDirPath = tu.path;
861
862    // Add an SO whenever the TU source file changes.
863    if (newFileName != oldFileName || newDirPath != oldDirPath) {
864      // Translation unit change, emit ending SO
865      if (oldFileName != "")
866        _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, ""));
867
868      oldFileName = newFileName;
869      oldDirPath = newDirPath;
870
871      // If newDirPath doesn't end with a '/' we need to add one:
872      if (newDirPath.back() != '/') {
873        char *p =
874          file.ownedAllocations.Allocate<char>(newDirPath.size() + 2);
875        memcpy(p, newDirPath.data(), newDirPath.size());
876        p[newDirPath.size()] = '/';
877        p[newDirPath.size() + 1] = '\0';
878        newDirPath = p;
879      }
880
881      // New translation unit, emit start SOs:
882      _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newDirPath));
883      _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newFileName));
884
885      // Synthesize OSO for start of file.
886      char *fullPath = nullptr;
887      {
888        SmallString<1024> pathBuf(atomFile.path());
889        if (auto EC = llvm::sys::fs::make_absolute(pathBuf))
890          return llvm::errorCodeToError(EC);
891        fullPath = file.ownedAllocations.Allocate<char>(pathBuf.size() + 1);
892        memcpy(fullPath, pathBuf.c_str(), pathBuf.size() + 1);
893      }
894
895      // Get mod time.
896      uint32_t modTime = 0;
897      llvm::sys::fs::file_status stat;
898      if (!llvm::sys::fs::status(fullPath, stat))
899        if (llvm::sys::fs::exists(stat))
900          modTime = llvm::sys::toTimeT(stat.getLastModificationTime());
901
902      _stabs.push_back(mach_o::Stab(nullptr, N_OSO, _ctx.getCPUSubType(), 1,
903                                    modTime, fullPath));
904      // <rdar://problem/6337329> linker should put cpusubtype in n_sect field
905      // of nlist entry for N_OSO debug note entries.
906      wroteStartSO = true;
907    }
908
909    if (atom->contentType() == DefinedAtom::typeCode) {
910      // Synthesize BNSYM and start FUN stabs.
911      _stabs.push_back(mach_o::Stab(atom, N_BNSYM, 1, 0, 0, ""));
912      _stabs.push_back(mach_o::Stab(atom, N_FUN, 1, 0, 0, atom->name()));
913      // Synthesize any SOL stabs needed
914      // FIXME: add SOL stabs.
915      _stabs.push_back(mach_o::Stab(nullptr, N_FUN, 0, 0,
916                                    atom->rawContent().size(), ""));
917      _stabs.push_back(mach_o::Stab(nullptr, N_ENSYM, 1, 0,
918                                    atom->rawContent().size(), ""));
919    } else {
920      if (atom->scope() == Atom::scopeTranslationUnit)
921        _stabs.push_back(mach_o::Stab(atom, N_STSYM, 1, 0, 0, atom->name()));
922      else
923        _stabs.push_back(mach_o::Stab(nullptr, N_GSYM, 1, 0, 0, atom->name()));
924    }
925  }
926
927  // Emit ending SO if necessary.
928  if (wroteStartSO)
929    _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, ""));
930
931  // Copy any stabs from .o file.
932  for (const auto *objFile : filesWithStabs) {
933    const auto &stabsList =
934      cast<mach_o::StabsDebugInfo>(objFile->debugInfo())->stabs();
935    for (auto &stab : stabsList) {
936      // FIXME: Drop stabs whose atoms have been dead-stripped.
937      _stabs.push_back(stab);
938    }
939  }
940
941  return llvm::Error::success();
942}
943
944uint16_t Util::descBits(const DefinedAtom* atom) {
945  uint16_t desc = 0;
946  switch (atom->merge()) {
947  case lld::DefinedAtom::mergeNo:
948  case lld::DefinedAtom::mergeAsTentative:
949    break;
950  case lld::DefinedAtom::mergeAsWeak:
951  case lld::DefinedAtom::mergeAsWeakAndAddressUsed:
952    desc |= N_WEAK_DEF;
953    break;
954  case lld::DefinedAtom::mergeSameNameAndSize:
955  case lld::DefinedAtom::mergeByLargestSection:
956  case lld::DefinedAtom::mergeByContent:
957    llvm_unreachable("Unsupported DefinedAtom::merge()");
958    break;
959  }
960  if (atom->contentType() == lld::DefinedAtom::typeResolver)
961    desc |= N_SYMBOL_RESOLVER;
962  if (atom->contentType() == lld::DefinedAtom::typeMachHeader)
963    desc |= REFERENCED_DYNAMICALLY;
964  if (_archHandler.isThumbFunction(*atom))
965    desc |= N_ARM_THUMB_DEF;
966  if (atom->deadStrip() == DefinedAtom::deadStripNever &&
967      _ctx.outputMachOType() == llvm::MachO::MH_OBJECT) {
968    if ((atom->contentType() != DefinedAtom::typeInitializerPtr)
969     && (atom->contentType() != DefinedAtom::typeTerminatorPtr))
970    desc |= N_NO_DEAD_STRIP;
971  }
972  return desc;
973}
974
975bool Util::AtomSorter::operator()(const AtomAndIndex &left,
976                                  const AtomAndIndex &right) {
977  return (left.atom->name().compare(right.atom->name()) < 0);
978}
979
980llvm::Error Util::getSymbolTableRegion(const DefinedAtom* atom,
981                                       bool &inGlobalsRegion,
982                                       SymbolScope &scope) {
983  bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
984  switch (atom->scope()) {
985  case Atom::scopeTranslationUnit:
986    scope = 0;
987    inGlobalsRegion = false;
988    return llvm::Error::success();
989  case Atom::scopeLinkageUnit:
990    if ((_ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) &&
991        _ctx.exportSymbolNamed(atom->name())) {
992      return llvm::make_error<GenericError>(
993                          Twine("cannot export hidden symbol ") + atom->name());
994    }
995    if (rMode) {
996      if (_ctx.keepPrivateExterns()) {
997        // -keep_private_externs means keep in globals region as N_PEXT.
998        scope = N_PEXT | N_EXT;
999        inGlobalsRegion = true;
1000        return llvm::Error::success();
1001      }
1002    }
1003    // scopeLinkageUnit symbols are no longer global once linked.
1004    scope = N_PEXT;
1005    inGlobalsRegion = false;
1006    return llvm::Error::success();
1007  case Atom::scopeGlobal:
1008    if (_ctx.exportRestrictMode()) {
1009      if (_ctx.exportSymbolNamed(atom->name())) {
1010        scope = N_EXT;
1011        inGlobalsRegion = true;
1012        return llvm::Error::success();
1013      } else {
1014        scope = N_PEXT;
1015        inGlobalsRegion = false;
1016        return llvm::Error::success();
1017      }
1018    } else {
1019      scope = N_EXT;
1020      inGlobalsRegion = true;
1021      return llvm::Error::success();
1022    }
1023    break;
1024  }
1025  llvm_unreachable("atom->scope() unknown enum value");
1026}
1027
1028
1029
1030llvm::Error Util::addSymbols(const lld::File &atomFile,
1031                             NormalizedFile &file) {
1032  bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT);
1033  // Mach-O symbol table has four regions: stabs, locals, globals, undefs.
1034
1035  // Add all stabs.
1036  for (auto &stab : _stabs) {
1037    Symbol sym;
1038    sym.type = static_cast<NListType>(stab.type);
1039    sym.scope = 0;
1040    sym.sect = stab.other;
1041    sym.desc = stab.desc;
1042    if (stab.atom)
1043      sym.value = _atomToAddress[stab.atom];
1044    else
1045      sym.value = stab.value;
1046    sym.name = stab.str;
1047    file.stabsSymbols.push_back(sym);
1048  }
1049
1050  // Add all local (non-global) symbols in address order
1051  std::vector<AtomAndIndex> globals;
1052  globals.reserve(512);
1053  for (SectionInfo *sect : _sectionInfos) {
1054    for (const AtomInfo &info : sect->atomsAndOffsets) {
1055      const DefinedAtom *atom = info.atom;
1056      if (!atom->name().empty()) {
1057        SymbolScope symbolScope;
1058        bool inGlobalsRegion;
1059        if (auto ec = getSymbolTableRegion(atom, inGlobalsRegion, symbolScope)){
1060          return ec;
1061        }
1062        if (inGlobalsRegion) {
1063          AtomAndIndex ai = { atom, sect->finalSectionIndex, symbolScope };
1064          globals.push_back(ai);
1065        } else {
1066          Symbol sym;
1067          sym.name  = atom->name();
1068          sym.type  = N_SECT;
1069          sym.scope = symbolScope;
1070          sym.sect  = sect->finalSectionIndex;
1071          sym.desc  = descBits(atom);
1072          sym.value = _atomToAddress[atom];
1073          _atomToSymbolIndex[atom] = file.localSymbols.size();
1074          file.localSymbols.push_back(sym);
1075        }
1076      } else if (rMode && _archHandler.needsLocalSymbolInRelocatableFile(atom)){
1077        // Create 'Lxxx' labels for anonymous atoms if archHandler says so.
1078        static unsigned tempNum = 1;
1079        char tmpName[16];
1080        sprintf(tmpName, "L%04u", tempNum++);
1081        StringRef tempRef(tmpName);
1082        Symbol sym;
1083        sym.name  = tempRef.copy(file.ownedAllocations);
1084        sym.type  = N_SECT;
1085        sym.scope = 0;
1086        sym.sect  = sect->finalSectionIndex;
1087        sym.desc  = 0;
1088        sym.value = _atomToAddress[atom];
1089        _atomToSymbolIndex[atom] = file.localSymbols.size();
1090        file.localSymbols.push_back(sym);
1091      }
1092    }
1093  }
1094
1095  // Sort global symbol alphabetically, then add to symbol table.
1096  std::sort(globals.begin(), globals.end(), AtomSorter());
1097  const uint32_t globalStartIndex = file.localSymbols.size();
1098  for (AtomAndIndex &ai : globals) {
1099    Symbol sym;
1100    sym.name  = ai.atom->name();
1101    sym.type  = N_SECT;
1102    sym.scope = ai.scope;
1103    sym.sect  = ai.index;
1104    sym.desc  = descBits(static_cast<const DefinedAtom*>(ai.atom));
1105    sym.value = _atomToAddress[ai.atom];
1106    _atomToSymbolIndex[ai.atom] = globalStartIndex + file.globalSymbols.size();
1107    file.globalSymbols.push_back(sym);
1108  }
1109
1110  // Sort undefined symbol alphabetically, then add to symbol table.
1111  std::vector<AtomAndIndex> undefs;
1112  undefs.reserve(128);
1113  for (const UndefinedAtom *atom : atomFile.undefined()) {
1114    AtomAndIndex ai = { atom, 0, N_EXT };
1115    undefs.push_back(ai);
1116  }
1117  for (const SharedLibraryAtom *atom : atomFile.sharedLibrary()) {
1118    AtomAndIndex ai = { atom, 0, N_EXT };
1119    undefs.push_back(ai);
1120  }
1121  std::sort(undefs.begin(), undefs.end(), AtomSorter());
1122  const uint32_t start = file.globalSymbols.size() + file.localSymbols.size();
1123  for (AtomAndIndex &ai : undefs) {
1124    Symbol sym;
1125    uint16_t desc = 0;
1126    if (!rMode) {
1127      uint8_t ordinal = 0;
1128      if (!_ctx.useFlatNamespace())
1129        ordinal = dylibOrdinal(dyn_cast<SharedLibraryAtom>(ai.atom));
1130      llvm::MachO::SET_LIBRARY_ORDINAL(desc, ordinal);
1131    }
1132    sym.name  = ai.atom->name();
1133    sym.type  = N_UNDF;
1134    sym.scope = ai.scope;
1135    sym.sect  = 0;
1136    sym.desc  = desc;
1137    sym.value = 0;
1138    _atomToSymbolIndex[ai.atom] = file.undefinedSymbols.size() + start;
1139    file.undefinedSymbols.push_back(sym);
1140  }
1141
1142  return llvm::Error::success();
1143}
1144
1145const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) {
1146  for (const Reference *ref : *lpAtom) {
1147    if (_archHandler.isLazyPointer(*ref)) {
1148      return ref->target();
1149    }
1150  }
1151  return nullptr;
1152}
1153
1154const Atom *Util::targetOfStub(const DefinedAtom *stubAtom) {
1155  for (const Reference *ref : *stubAtom) {
1156    if (const Atom *ta = ref->target()) {
1157      if (const DefinedAtom *lpAtom = dyn_cast<DefinedAtom>(ta)) {
1158        const Atom *target = targetOfLazyPointer(lpAtom);
1159        if (target)
1160          return target;
1161      }
1162    }
1163  }
1164  return nullptr;
1165}
1166
1167void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) {
1168  for (SectionInfo *si : _sectionInfos) {
1169    Section &normSect = file.sections[si->normalizedSectionIndex];
1170    switch (si->type) {
1171    case llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS:
1172      for (const AtomInfo &info : si->atomsAndOffsets) {
1173        bool foundTarget = false;
1174        for (const Reference *ref : *info.atom) {
1175          const Atom *target = ref->target();
1176          if (target) {
1177            if (isa<const SharedLibraryAtom>(target)) {
1178              uint32_t index = _atomToSymbolIndex[target];
1179              normSect.indirectSymbols.push_back(index);
1180              foundTarget = true;
1181            } else {
1182              normSect.indirectSymbols.push_back(
1183                                            llvm::MachO::INDIRECT_SYMBOL_LOCAL);
1184            }
1185          }
1186        }
1187        if (!foundTarget) {
1188          normSect.indirectSymbols.push_back(
1189                                             llvm::MachO::INDIRECT_SYMBOL_ABS);
1190        }
1191      }
1192      break;
1193    case llvm::MachO::S_LAZY_SYMBOL_POINTERS:
1194      for (const AtomInfo &info : si->atomsAndOffsets) {
1195        const Atom *target = targetOfLazyPointer(info.atom);
1196        if (target) {
1197          uint32_t index = _atomToSymbolIndex[target];
1198          normSect.indirectSymbols.push_back(index);
1199        }
1200      }
1201      break;
1202    case llvm::MachO::S_SYMBOL_STUBS:
1203      for (const AtomInfo &info : si->atomsAndOffsets) {
1204        const Atom *target = targetOfStub(info.atom);
1205        if (target) {
1206          uint32_t index = _atomToSymbolIndex[target];
1207          normSect.indirectSymbols.push_back(index);
1208        }
1209      }
1210      break;
1211    default:
1212      break;
1213    }
1214  }
1215}
1216
1217void Util::addDependentDylibs(const lld::File &atomFile,
1218                              NormalizedFile &nFile) {
1219  // Scan all imported symbols and build up list of dylibs they are from.
1220  int ordinal = 1;
1221  for (const auto *dylib : _ctx.allDylibs()) {
1222    DylibPathToInfo::iterator pos = _dylibInfo.find(dylib->installName());
1223    if (pos == _dylibInfo.end()) {
1224      DylibInfo info;
1225      bool flatNamespaceAtom = dylib == _ctx.flatNamespaceFile();
1226
1227      // If we're in -flat_namespace mode (or this atom came from the flat
1228      // namespace file under -undefined dynamic_lookup) then use the flat
1229      // lookup ordinal.
1230      if (flatNamespaceAtom || _ctx.useFlatNamespace())
1231        info.ordinal = BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
1232      else
1233        info.ordinal = ordinal++;
1234      info.hasWeak = false;
1235      info.hasNonWeak = !info.hasWeak;
1236      _dylibInfo[dylib->installName()] = info;
1237
1238      // Unless this was a flat_namespace atom, record the source dylib.
1239      if (!flatNamespaceAtom) {
1240        DependentDylib depInfo;
1241        depInfo.path = dylib->installName();
1242        depInfo.kind = llvm::MachO::LC_LOAD_DYLIB;
1243        depInfo.currentVersion = _ctx.dylibCurrentVersion(dylib->path());
1244        depInfo.compatVersion = _ctx.dylibCompatVersion(dylib->path());
1245        nFile.dependentDylibs.push_back(depInfo);
1246      }
1247    } else {
1248      pos->second.hasWeak = false;
1249      pos->second.hasNonWeak = !pos->second.hasWeak;
1250    }
1251  }
1252  // Automatically weak link dylib in which all symbols are weak (canBeNull).
1253  for (DependentDylib &dep : nFile.dependentDylibs) {
1254    DylibInfo &info = _dylibInfo[dep.path];
1255    if (info.hasWeak && !info.hasNonWeak)
1256      dep.kind = llvm::MachO::LC_LOAD_WEAK_DYLIB;
1257    else if (_ctx.isUpwardDylib(dep.path))
1258      dep.kind = llvm::MachO::LC_LOAD_UPWARD_DYLIB;
1259  }
1260}
1261
1262int Util::dylibOrdinal(const SharedLibraryAtom *sa) {
1263  return _dylibInfo[sa->loadName()].ordinal;
1264}
1265
1266void Util::segIndexForSection(const SectionInfo *sect, uint8_t &segmentIndex,
1267                                                  uint64_t &segmentStartAddr) {
1268  segmentIndex = 0;
1269  for (const SegmentInfo *seg : _segmentInfos) {
1270    if ((seg->address <= sect->address)
1271      && (seg->address+seg->size >= sect->address+sect->size)) {
1272      segmentStartAddr = seg->address;
1273      return;
1274    }
1275    ++segmentIndex;
1276  }
1277  llvm_unreachable("section not in any segment");
1278}
1279
1280uint32_t Util::sectionIndexForAtom(const Atom *atom) {
1281  uint64_t address = _atomToAddress[atom];
1282  for (const SectionInfo *si : _sectionInfos) {
1283    if ((si->address <= address) && (address < si->address+si->size))
1284      return si->finalSectionIndex;
1285  }
1286  llvm_unreachable("atom not in any section");
1287}
1288
1289void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) {
1290  if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT)
1291    return;
1292
1293  // Utility function for ArchHandler to find symbol index for an atom.
1294  auto symIndexForAtom = [&] (const Atom &atom) -> uint32_t {
1295    auto pos = _atomToSymbolIndex.find(&atom);
1296    assert(pos != _atomToSymbolIndex.end());
1297    return pos->second;
1298  };
1299
1300  // Utility function for ArchHandler to find section index for an atom.
1301  auto sectIndexForAtom = [&] (const Atom &atom) -> uint32_t {
1302    return sectionIndexForAtom(&atom);
1303  };
1304
1305  // Utility function for ArchHandler to find address of atom in output file.
1306  auto addressForAtom = [&] (const Atom &atom) -> uint64_t {
1307    auto pos = _atomToAddress.find(&atom);
1308    assert(pos != _atomToAddress.end());
1309    return pos->second;
1310  };
1311
1312  for (SectionInfo *si : _sectionInfos) {
1313    Section &normSect = file.sections[si->normalizedSectionIndex];
1314    for (const AtomInfo &info : si->atomsAndOffsets) {
1315      const DefinedAtom *atom = info.atom;
1316      for (const Reference *ref : *atom) {
1317        // Skip emitting relocs for sections which are always able to be
1318        // implicitly regenerated and where the relocation targets an address
1319        // which is defined.
1320        if (si->relocsToDefinedCanBeImplicit && isa<DefinedAtom>(ref->target()))
1321          continue;
1322        _archHandler.appendSectionRelocations(*atom, info.offsetInSection, *ref,
1323                                              symIndexForAtom,
1324                                              sectIndexForAtom,
1325                                              addressForAtom,
1326                                              normSect.relocations);
1327      }
1328    }
1329  }
1330}
1331
1332void Util::addFunctionStarts(const lld::File &, NormalizedFile &file) {
1333  if (!_ctx.generateFunctionStartsLoadCommand())
1334    return;
1335  file.functionStarts.reserve(8192);
1336  // Delta compress function starts, starting with the mach header symbol.
1337  const uint64_t badAddress = ~0ULL;
1338  uint64_t addr = badAddress;
1339  for (SectionInfo *si : _sectionInfos) {
1340    for (const AtomInfo &info : si->atomsAndOffsets) {
1341      auto type = info.atom->contentType();
1342      if (type == DefinedAtom::typeMachHeader) {
1343        addr = _atomToAddress[info.atom];
1344        continue;
1345      }
1346      if (type != DefinedAtom::typeCode)
1347        continue;
1348      assert(addr != badAddress && "Missing mach header symbol");
1349      // Skip atoms which have 0 size.  This is so that LC_FUNCTION_STARTS
1350      // can't spill in to the next section.
1351      if (!info.atom->size())
1352        continue;
1353      uint64_t nextAddr = _atomToAddress[info.atom];
1354      if (_archHandler.isThumbFunction(*info.atom))
1355        nextAddr |= 1;
1356      uint64_t delta = nextAddr - addr;
1357      if (delta) {
1358        ByteBuffer buffer;
1359        buffer.append_uleb128(delta);
1360        file.functionStarts.insert(file.functionStarts.end(), buffer.bytes(),
1361                                   buffer.bytes() + buffer.size());
1362      }
1363      addr = nextAddr;
1364    }
1365  }
1366
1367  // Null terminate, and pad to pointer size for this arch.
1368  file.functionStarts.push_back(0);
1369
1370  auto size = file.functionStarts.size();
1371  for (unsigned i = size, e = llvm::alignTo(size, _ctx.is64Bit() ? 8 : 4);
1372       i != e; ++i)
1373    file.functionStarts.push_back(0);
1374}
1375
1376void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) {
1377  if (!_ctx.generateDataInCodeLoadCommand())
1378    return;
1379  for (SectionInfo *si : _sectionInfos) {
1380    for (const AtomInfo &info : si->atomsAndOffsets) {
1381      // Atoms that contain data-in-code have "transition" references
1382      // which mark a point where the embedded data starts of ends.
1383      // This needs to be converted to the mach-o format which is an array
1384      // of data-in-code ranges.
1385      uint32_t startOffset = 0;
1386      DataRegionType mode = DataRegionType(0);
1387      for (const Reference *ref : *info.atom) {
1388        if (ref->kindNamespace() != Reference::KindNamespace::mach_o)
1389          continue;
1390        if (_archHandler.isDataInCodeTransition(ref->kindValue())) {
1391          DataRegionType nextMode = (DataRegionType)ref->addend();
1392          if (mode != nextMode) {
1393            if (mode != 0) {
1394              // Found end data range, so make range entry.
1395              DataInCode entry;
1396              entry.offset = si->address + info.offsetInSection + startOffset;
1397              entry.length = ref->offsetInAtom() - startOffset;
1398              entry.kind   = mode;
1399              file.dataInCode.push_back(entry);
1400            }
1401          }
1402          mode = nextMode;
1403          startOffset = ref->offsetInAtom();
1404        }
1405      }
1406      if (mode != 0) {
1407        // Function ends with data (no end transition).
1408        DataInCode entry;
1409        entry.offset = si->address + info.offsetInSection + startOffset;
1410        entry.length = info.atom->size() - startOffset;
1411        entry.kind   = mode;
1412        file.dataInCode.push_back(entry);
1413      }
1414    }
1415  }
1416}
1417
1418void Util::addRebaseAndBindingInfo(const lld::File &atomFile,
1419                                                        NormalizedFile &nFile) {
1420  if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT)
1421    return;
1422
1423  uint8_t segmentIndex;
1424  uint64_t segmentStartAddr;
1425  for (SectionInfo *sect : _sectionInfos) {
1426    segIndexForSection(sect, segmentIndex, segmentStartAddr);
1427    for (const AtomInfo &info : sect->atomsAndOffsets) {
1428      const DefinedAtom *atom = info.atom;
1429      for (const Reference *ref : *atom) {
1430        uint64_t segmentOffset = _atomToAddress[atom] + ref->offsetInAtom()
1431                                - segmentStartAddr;
1432        const Atom* targ = ref->target();
1433        if (_archHandler.isPointer(*ref)) {
1434          // A pointer to a DefinedAtom requires rebasing.
1435          if (isa<DefinedAtom>(targ)) {
1436            RebaseLocation rebase;
1437            rebase.segIndex = segmentIndex;
1438            rebase.segOffset = segmentOffset;
1439            rebase.kind = llvm::MachO::REBASE_TYPE_POINTER;
1440            nFile.rebasingInfo.push_back(rebase);
1441          }
1442          // A pointer to an SharedLibraryAtom requires binding.
1443          if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) {
1444            BindLocation bind;
1445            bind.segIndex = segmentIndex;
1446            bind.segOffset = segmentOffset;
1447            bind.kind = llvm::MachO::BIND_TYPE_POINTER;
1448            bind.canBeNull = sa->canBeNullAtRuntime();
1449            bind.ordinal = dylibOrdinal(sa);
1450            bind.symbolName = targ->name();
1451            bind.addend = ref->addend();
1452            nFile.bindingInfo.push_back(bind);
1453          }
1454        }
1455        else if (_archHandler.isLazyPointer(*ref)) {
1456          BindLocation bind;
1457          if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) {
1458            bind.ordinal = dylibOrdinal(sa);
1459          } else {
1460            bind.ordinal = llvm::MachO::BIND_SPECIAL_DYLIB_SELF;
1461          }
1462          bind.segIndex = segmentIndex;
1463          bind.segOffset = segmentOffset;
1464          bind.kind = llvm::MachO::BIND_TYPE_POINTER;
1465          bind.canBeNull = false; //sa->canBeNullAtRuntime();
1466          bind.symbolName = targ->name();
1467          bind.addend = ref->addend();
1468          nFile.lazyBindingInfo.push_back(bind);
1469        }
1470      }
1471    }
1472  }
1473}
1474
1475void Util::addExportInfo(const lld::File &atomFile, NormalizedFile &nFile) {
1476  if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT)
1477    return;
1478
1479  for (SectionInfo *sect : _sectionInfos) {
1480    for (const AtomInfo &info : sect->atomsAndOffsets) {
1481      const DefinedAtom *atom = info.atom;
1482      if (atom->scope() != Atom::scopeGlobal)
1483        continue;
1484      if (_ctx.exportRestrictMode()) {
1485        if (!_ctx.exportSymbolNamed(atom->name()))
1486          continue;
1487      }
1488      Export exprt;
1489      exprt.name = atom->name();
1490      exprt.offset = _atomToAddress[atom] - _ctx.baseAddress();
1491      exprt.kind = EXPORT_SYMBOL_FLAGS_KIND_REGULAR;
1492      if (atom->merge() == DefinedAtom::mergeAsWeak)
1493        exprt.flags = EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
1494      else
1495        exprt.flags = 0;
1496      exprt.otherOffset = 0;
1497      exprt.otherName = StringRef();
1498      nFile.exportInfo.push_back(exprt);
1499    }
1500  }
1501}
1502
1503uint32_t Util::fileFlags() {
1504  // FIXME: these need to determined at runtime.
1505  if (_ctx.outputMachOType() == MH_OBJECT) {
1506    return _subsectionsViaSymbols ? MH_SUBSECTIONS_VIA_SYMBOLS : 0;
1507  } else {
1508    uint32_t flags = MH_DYLDLINK;
1509    if (!_ctx.useFlatNamespace())
1510        flags |= MH_TWOLEVEL | MH_NOUNDEFS;
1511    if ((_ctx.outputMachOType() == MH_EXECUTE) && _ctx.PIE())
1512      flags |= MH_PIE;
1513    if (_hasTLVDescriptors)
1514      flags |= (MH_PIE | MH_HAS_TLV_DESCRIPTORS);
1515    return flags;
1516  }
1517}
1518
1519} // end anonymous namespace
1520
1521namespace lld {
1522namespace mach_o {
1523namespace normalized {
1524
1525/// Convert a set of Atoms into a normalized mach-o file.
1526llvm::Expected<std::unique_ptr<NormalizedFile>>
1527normalizedFromAtoms(const lld::File &atomFile,
1528                                           const MachOLinkingContext &context) {
1529  // The util object buffers info until the normalized file can be made.
1530  Util util(context);
1531  util.processDefinedAtoms(atomFile);
1532  util.organizeSections();
1533
1534  std::unique_ptr<NormalizedFile> f(new NormalizedFile());
1535  NormalizedFile &normFile = *f.get();
1536  normFile.arch = context.arch();
1537  normFile.fileType = context.outputMachOType();
1538  normFile.flags = util.fileFlags();
1539  normFile.stackSize = context.stackSize();
1540  normFile.installName = context.installName();
1541  normFile.currentVersion = context.currentVersion();
1542  normFile.compatVersion = context.compatibilityVersion();
1543  normFile.os = context.os();
1544
1545  // If we are emitting an object file, then the min version is the maximum
1546  // of the min's of all the source files and the cmdline.
1547  if (normFile.fileType == llvm::MachO::MH_OBJECT)
1548    normFile.minOSverson = std::max(context.osMinVersion(), util.minVersion());
1549  else
1550    normFile.minOSverson = context.osMinVersion();
1551
1552  normFile.minOSVersionKind = util.minVersionCommandType();
1553
1554  normFile.sdkVersion = context.sdkVersion();
1555  normFile.sourceVersion = context.sourceVersion();
1556
1557  if (context.generateVersionLoadCommand() &&
1558      context.os() != MachOLinkingContext::OS::unknown)
1559    normFile.hasMinVersionLoadCommand = true;
1560  else if (normFile.fileType == llvm::MachO::MH_OBJECT &&
1561           util.allSourceFilesHaveMinVersions() &&
1562           ((normFile.os != MachOLinkingContext::OS::unknown) ||
1563            util.minVersionCommandType())) {
1564    // If we emit an object file, then it should contain a min version load
1565    // command if all of the source files also contained min version commands.
1566    // Also, we either need to have a platform, or found a platform from the
1567    // source object files.
1568    normFile.hasMinVersionLoadCommand = true;
1569  }
1570  normFile.generateDataInCodeLoadCommand =
1571    context.generateDataInCodeLoadCommand();
1572  normFile.pageSize = context.pageSize();
1573  normFile.rpaths = context.rpaths();
1574  util.addDependentDylibs(atomFile, normFile);
1575  util.copySegmentInfo(normFile);
1576  util.copySectionInfo(normFile);
1577  util.assignAddressesToSections(normFile);
1578  util.buildAtomToAddressMap();
1579  if (auto err = util.synthesizeDebugNotes(normFile))
1580    return std::move(err);
1581  util.updateSectionInfo(normFile);
1582  util.copySectionContent(normFile);
1583  if (auto ec = util.addSymbols(atomFile, normFile)) {
1584    return std::move(ec);
1585  }
1586  util.addIndirectSymbols(atomFile, normFile);
1587  util.addRebaseAndBindingInfo(atomFile, normFile);
1588  util.addExportInfo(atomFile, normFile);
1589  util.addSectionRelocs(atomFile, normFile);
1590  util.addFunctionStarts(atomFile, normFile);
1591  util.buildDataInCodeArray(atomFile, normFile);
1592  util.copyEntryPointAddress(normFile);
1593
1594  return std::move(f);
1595}
1596
1597} // namespace normalized
1598} // namespace mach_o
1599} // namespace lld
1600