MachObjectWriter.cpp revision 221345
1//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/MC/MCMachObjectWriter.h"
11#include "llvm/ADT/OwningPtr.h"
12#include "llvm/ADT/StringMap.h"
13#include "llvm/ADT/Twine.h"
14#include "llvm/MC/MCAssembler.h"
15#include "llvm/MC/MCAsmLayout.h"
16#include "llvm/MC/MCExpr.h"
17#include "llvm/MC/MCObjectWriter.h"
18#include "llvm/MC/MCSectionMachO.h"
19#include "llvm/MC/MCSymbol.h"
20#include "llvm/MC/MCMachOSymbolFlags.h"
21#include "llvm/MC/MCValue.h"
22#include "llvm/Object/MachOFormat.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Target/TargetAsmBackend.h"
25
26// FIXME: Gross.
27#include "../Target/ARM/ARMFixupKinds.h"
28#include "../Target/X86/X86FixupKinds.h"
29
30#include <vector>
31using namespace llvm;
32using namespace llvm::object;
33
34// FIXME: this has been copied from (or to) X86AsmBackend.cpp
35static unsigned getFixupKindLog2Size(unsigned Kind) {
36  switch (Kind) {
37  default:
38    llvm_unreachable("invalid fixup kind!");
39  case FK_PCRel_1:
40  case FK_Data_1: return 0;
41  case FK_PCRel_2:
42  case FK_Data_2: return 1;
43  case FK_PCRel_4:
44    // FIXME: Remove these!!!
45  case X86::reloc_riprel_4byte:
46  case X86::reloc_riprel_4byte_movq_load:
47  case X86::reloc_signed_4byte:
48  case FK_Data_4: return 2;
49  case FK_Data_8: return 3;
50  }
51}
52
53static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
54  // Undefined symbols are always extern.
55  if (SD->Symbol->isUndefined())
56    return true;
57
58  // References to weak definitions require external relocation entries; the
59  // definition may not always be the one in the same object file.
60  if (SD->getFlags() & SF_WeakDefinition)
61    return true;
62
63  // Otherwise, we can use an internal relocation.
64  return false;
65}
66
67namespace {
68
69class MachObjectWriter : public MCObjectWriter {
70  /// MachSymbolData - Helper struct for containing some precomputed information
71  /// on symbols.
72  struct MachSymbolData {
73    MCSymbolData *SymbolData;
74    uint64_t StringIndex;
75    uint8_t SectionIndex;
76
77    // Support lexicographic sorting.
78    bool operator<(const MachSymbolData &RHS) const {
79      return SymbolData->getSymbol().getName() <
80             RHS.SymbolData->getSymbol().getName();
81    }
82  };
83
84  /// The target specific Mach-O writer instance.
85  llvm::OwningPtr<MCMachObjectTargetWriter> TargetObjectWriter;
86
87  /// @name Relocation Data
88  /// @{
89
90  llvm::DenseMap<const MCSectionData*,
91                 std::vector<macho::RelocationEntry> > Relocations;
92  llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;
93
94  /// @}
95  /// @name Symbol Table Data
96  /// @{
97
98  SmallString<256> StringTable;
99  std::vector<MachSymbolData> LocalSymbolData;
100  std::vector<MachSymbolData> ExternalSymbolData;
101  std::vector<MachSymbolData> UndefinedSymbolData;
102
103  /// @}
104
105private:
106  /// @name Utility Methods
107  /// @{
108
109  bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
110    const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
111      (MCFixupKind) Kind);
112
113    return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
114  }
115
116  /// @}
117
118  SectionAddrMap SectionAddress;
119  uint64_t getSectionAddress(const MCSectionData* SD) const {
120    return SectionAddress.lookup(SD);
121  }
122  uint64_t getSymbolAddress(const MCSymbolData* SD,
123                            const MCAsmLayout &Layout) const {
124    const MCSymbol &S = SD->getSymbol();
125
126    // If this is a variable, then recursively evaluate now.
127    if (S.isVariable()) {
128      MCValue Target;
129      if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout))
130        report_fatal_error("unable to evaluate offset for variable '" +
131                           S.getName() + "'");
132
133      // Verify that any used symbols are defined.
134      if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
135        report_fatal_error("unable to evaluate offset to undefined symbol '" +
136                           Target.getSymA()->getSymbol().getName() + "'");
137      if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
138        report_fatal_error("unable to evaluate offset to undefined symbol '" +
139                           Target.getSymB()->getSymbol().getName() + "'");
140
141      uint64_t Address = Target.getConstant();
142      if (Target.getSymA())
143        Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
144                                      Target.getSymA()->getSymbol()), Layout);
145      if (Target.getSymB())
146        Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
147                                      Target.getSymB()->getSymbol()), Layout);
148      return Address;
149    }
150
151    return getSectionAddress(SD->getFragment()->getParent()) +
152      Layout.getSymbolOffset(SD);
153  }
154  uint64_t getFragmentAddress(const MCFragment *Fragment,
155                            const MCAsmLayout &Layout) const {
156    return getSectionAddress(Fragment->getParent()) +
157      Layout.getFragmentOffset(Fragment);
158  }
159
160  uint64_t getPaddingSize(const MCSectionData *SD,
161                          const MCAsmLayout &Layout) const {
162    uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
163    unsigned Next = SD->getLayoutOrder() + 1;
164    if (Next >= Layout.getSectionOrder().size())
165      return 0;
166
167    const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
168    if (NextSD.getSection().isVirtualSection())
169      return 0;
170    return OffsetToAlignment(EndAddr, NextSD.getAlignment());
171  }
172
173public:
174  MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS,
175                   bool _IsLittleEndian)
176    : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) {
177  }
178
179  /// @name Target Writer Proxy Accessors
180  /// @{
181
182  bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
183  bool isARM() const {
184    uint32_t CPUType = TargetObjectWriter->getCPUType() & ~mach::CTFM_ArchMask;
185    return CPUType == mach::CTM_ARM;
186  }
187
188  /// @}
189
190  void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
191                   bool SubsectionsViaSymbols) {
192    uint32_t Flags = 0;
193
194    if (SubsectionsViaSymbols)
195      Flags |= macho::HF_SubsectionsViaSymbols;
196
197    // struct mach_header (28 bytes) or
198    // struct mach_header_64 (32 bytes)
199
200    uint64_t Start = OS.tell();
201    (void) Start;
202
203    Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
204
205    Write32(TargetObjectWriter->getCPUType());
206    Write32(TargetObjectWriter->getCPUSubtype());
207
208    Write32(macho::HFT_Object);
209    Write32(NumLoadCommands);
210    Write32(LoadCommandsSize);
211    Write32(Flags);
212    if (is64Bit())
213      Write32(0); // reserved
214
215    assert(OS.tell() - Start ==
216           (is64Bit() ? macho::Header64Size : macho::Header32Size));
217  }
218
219  /// WriteSegmentLoadCommand - Write a segment load command.
220  ///
221  /// \arg NumSections - The number of sections in this segment.
222  /// \arg SectionDataSize - The total size of the sections.
223  void WriteSegmentLoadCommand(unsigned NumSections,
224                               uint64_t VMSize,
225                               uint64_t SectionDataStartOffset,
226                               uint64_t SectionDataSize) {
227    // struct segment_command (56 bytes) or
228    // struct segment_command_64 (72 bytes)
229
230    uint64_t Start = OS.tell();
231    (void) Start;
232
233    unsigned SegmentLoadCommandSize =
234      is64Bit() ? macho::SegmentLoadCommand64Size:
235      macho::SegmentLoadCommand32Size;
236    Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
237    Write32(SegmentLoadCommandSize +
238            NumSections * (is64Bit() ? macho::Section64Size :
239                           macho::Section32Size));
240
241    WriteBytes("", 16);
242    if (is64Bit()) {
243      Write64(0); // vmaddr
244      Write64(VMSize); // vmsize
245      Write64(SectionDataStartOffset); // file offset
246      Write64(SectionDataSize); // file size
247    } else {
248      Write32(0); // vmaddr
249      Write32(VMSize); // vmsize
250      Write32(SectionDataStartOffset); // file offset
251      Write32(SectionDataSize); // file size
252    }
253    Write32(0x7); // maxprot
254    Write32(0x7); // initprot
255    Write32(NumSections);
256    Write32(0); // flags
257
258    assert(OS.tell() - Start == SegmentLoadCommandSize);
259  }
260
261  void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
262                    const MCSectionData &SD, uint64_t FileOffset,
263                    uint64_t RelocationsStart, unsigned NumRelocations) {
264    uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
265
266    // The offset is unused for virtual sections.
267    if (SD.getSection().isVirtualSection()) {
268      assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
269      FileOffset = 0;
270    }
271
272    // struct section (68 bytes) or
273    // struct section_64 (80 bytes)
274
275    uint64_t Start = OS.tell();
276    (void) Start;
277
278    const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
279    WriteBytes(Section.getSectionName(), 16);
280    WriteBytes(Section.getSegmentName(), 16);
281    if (is64Bit()) {
282      Write64(getSectionAddress(&SD)); // address
283      Write64(SectionSize); // size
284    } else {
285      Write32(getSectionAddress(&SD)); // address
286      Write32(SectionSize); // size
287    }
288    Write32(FileOffset);
289
290    unsigned Flags = Section.getTypeAndAttributes();
291    if (SD.hasInstructions())
292      Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
293
294    assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
295    Write32(Log2_32(SD.getAlignment()));
296    Write32(NumRelocations ? RelocationsStart : 0);
297    Write32(NumRelocations);
298    Write32(Flags);
299    Write32(IndirectSymBase.lookup(&SD)); // reserved1
300    Write32(Section.getStubSize()); // reserved2
301    if (is64Bit())
302      Write32(0); // reserved3
303
304    assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
305           macho::Section32Size));
306  }
307
308  void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
309                              uint32_t StringTableOffset,
310                              uint32_t StringTableSize) {
311    // struct symtab_command (24 bytes)
312
313    uint64_t Start = OS.tell();
314    (void) Start;
315
316    Write32(macho::LCT_Symtab);
317    Write32(macho::SymtabLoadCommandSize);
318    Write32(SymbolOffset);
319    Write32(NumSymbols);
320    Write32(StringTableOffset);
321    Write32(StringTableSize);
322
323    assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
324  }
325
326  void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
327                                uint32_t NumLocalSymbols,
328                                uint32_t FirstExternalSymbol,
329                                uint32_t NumExternalSymbols,
330                                uint32_t FirstUndefinedSymbol,
331                                uint32_t NumUndefinedSymbols,
332                                uint32_t IndirectSymbolOffset,
333                                uint32_t NumIndirectSymbols) {
334    // struct dysymtab_command (80 bytes)
335
336    uint64_t Start = OS.tell();
337    (void) Start;
338
339    Write32(macho::LCT_Dysymtab);
340    Write32(macho::DysymtabLoadCommandSize);
341    Write32(FirstLocalSymbol);
342    Write32(NumLocalSymbols);
343    Write32(FirstExternalSymbol);
344    Write32(NumExternalSymbols);
345    Write32(FirstUndefinedSymbol);
346    Write32(NumUndefinedSymbols);
347    Write32(0); // tocoff
348    Write32(0); // ntoc
349    Write32(0); // modtaboff
350    Write32(0); // nmodtab
351    Write32(0); // extrefsymoff
352    Write32(0); // nextrefsyms
353    Write32(IndirectSymbolOffset);
354    Write32(NumIndirectSymbols);
355    Write32(0); // extreloff
356    Write32(0); // nextrel
357    Write32(0); // locreloff
358    Write32(0); // nlocrel
359
360    assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
361  }
362
363  void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) {
364    MCSymbolData &Data = *MSD.SymbolData;
365    const MCSymbol &Symbol = Data.getSymbol();
366    uint8_t Type = 0;
367    uint16_t Flags = Data.getFlags();
368    uint32_t Address = 0;
369
370    // Set the N_TYPE bits. See <mach-o/nlist.h>.
371    //
372    // FIXME: Are the prebound or indirect fields possible here?
373    if (Symbol.isUndefined())
374      Type = macho::STT_Undefined;
375    else if (Symbol.isAbsolute())
376      Type = macho::STT_Absolute;
377    else
378      Type = macho::STT_Section;
379
380    // FIXME: Set STAB bits.
381
382    if (Data.isPrivateExtern())
383      Type |= macho::STF_PrivateExtern;
384
385    // Set external bit.
386    if (Data.isExternal() || Symbol.isUndefined())
387      Type |= macho::STF_External;
388
389    // Compute the symbol address.
390    if (Symbol.isDefined()) {
391      if (Symbol.isAbsolute()) {
392        Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
393      } else {
394        Address = getSymbolAddress(&Data, Layout);
395      }
396    } else if (Data.isCommon()) {
397      // Common symbols are encoded with the size in the address
398      // field, and their alignment in the flags.
399      Address = Data.getCommonSize();
400
401      // Common alignment is packed into the 'desc' bits.
402      if (unsigned Align = Data.getCommonAlignment()) {
403        unsigned Log2Size = Log2_32(Align);
404        assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
405        if (Log2Size > 15)
406          report_fatal_error("invalid 'common' alignment '" +
407                            Twine(Align) + "'");
408        // FIXME: Keep this mask with the SymbolFlags enumeration.
409        Flags = (Flags & 0xF0FF) | (Log2Size << 8);
410      }
411    }
412
413    // struct nlist (12 bytes)
414
415    Write32(MSD.StringIndex);
416    Write8(Type);
417    Write8(MSD.SectionIndex);
418
419    // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
420    // value.
421    Write16(Flags);
422    if (is64Bit())
423      Write64(Address);
424    else
425      Write32(Address);
426  }
427
428  // FIXME: We really need to improve the relocation validation. Basically, we
429  // want to implement a separate computation which evaluates the relocation
430  // entry as the linker would, and verifies that the resultant fixup value is
431  // exactly what the encoder wanted. This will catch several classes of
432  // problems:
433  //
434  //  - Relocation entry bugs, the two algorithms are unlikely to have the same
435  //    exact bug.
436  //
437  //  - Relaxation issues, where we forget to relax something.
438  //
439  //  - Input errors, where something cannot be correctly encoded. 'as' allows
440  //    these through in many cases.
441
442  static bool isFixupKindRIPRel(unsigned Kind) {
443    return Kind == X86::reloc_riprel_4byte ||
444      Kind == X86::reloc_riprel_4byte_movq_load;
445  }
446  void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
447                              const MCFragment *Fragment,
448                              const MCFixup &Fixup, MCValue Target,
449                              uint64_t &FixedValue) {
450    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
451    unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
452    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
453
454    // See <reloc.h>.
455    uint32_t FixupOffset =
456      Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
457    uint32_t FixupAddress =
458      getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
459    int64_t Value = 0;
460    unsigned Index = 0;
461    unsigned IsExtern = 0;
462    unsigned Type = 0;
463
464    Value = Target.getConstant();
465
466    if (IsPCRel) {
467      // Compensate for the relocation offset, Darwin x86_64 relocations only
468      // have the addend and appear to have attempted to define it to be the
469      // actual expression addend without the PCrel bias. However, instructions
470      // with data following the relocation are not accommodated for (see comment
471      // below regarding SIGNED{1,2,4}), so it isn't exactly that either.
472      Value += 1LL << Log2Size;
473    }
474
475    if (Target.isAbsolute()) { // constant
476      // SymbolNum of 0 indicates the absolute section.
477      Type = macho::RIT_X86_64_Unsigned;
478      Index = 0;
479
480      // FIXME: I believe this is broken, I don't think the linker can
481      // understand it. I think it would require a local relocation, but I'm not
482      // sure if that would work either. The official way to get an absolute
483      // PCrel relocation is to use an absolute symbol (which we don't support
484      // yet).
485      if (IsPCRel) {
486        IsExtern = 1;
487        Type = macho::RIT_X86_64_Branch;
488      }
489    } else if (Target.getSymB()) { // A - B + constant
490      const MCSymbol *A = &Target.getSymA()->getSymbol();
491      MCSymbolData &A_SD = Asm.getSymbolData(*A);
492      const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
493
494      const MCSymbol *B = &Target.getSymB()->getSymbol();
495      MCSymbolData &B_SD = Asm.getSymbolData(*B);
496      const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
497
498      // Neither symbol can be modified.
499      if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
500          Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
501        report_fatal_error("unsupported relocation of modified symbol");
502
503      // We don't support PCrel relocations of differences. Darwin 'as' doesn't
504      // implement most of these correctly.
505      if (IsPCRel)
506        report_fatal_error("unsupported pc-relative relocation of difference");
507
508      // The support for the situation where one or both of the symbols would
509      // require a local relocation is handled just like if the symbols were
510      // external.  This is certainly used in the case of debug sections where
511      // the section has only temporary symbols and thus the symbols don't have
512      // base symbols.  This is encoded using the section ordinal and
513      // non-extern relocation entries.
514
515      // Darwin 'as' doesn't emit correct relocations for this (it ends up with
516      // a single SIGNED relocation); reject it for now.  Except the case where
517      // both symbols don't have a base, equal but both NULL.
518      if (A_Base == B_Base && A_Base)
519        report_fatal_error("unsupported relocation with identical base");
520
521      Value += getSymbolAddress(&A_SD, Layout) -
522        (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout));
523      Value -= getSymbolAddress(&B_SD, Layout) -
524        (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout));
525
526      if (A_Base) {
527        Index = A_Base->getIndex();
528        IsExtern = 1;
529      }
530      else {
531        Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
532        IsExtern = 0;
533      }
534      Type = macho::RIT_X86_64_Unsigned;
535
536      macho::RelocationEntry MRE;
537      MRE.Word0 = FixupOffset;
538      MRE.Word1 = ((Index     <<  0) |
539                   (IsPCRel   << 24) |
540                   (Log2Size  << 25) |
541                   (IsExtern  << 27) |
542                   (Type      << 28));
543      Relocations[Fragment->getParent()].push_back(MRE);
544
545      if (B_Base) {
546        Index = B_Base->getIndex();
547        IsExtern = 1;
548      }
549      else {
550        Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
551        IsExtern = 0;
552      }
553      Type = macho::RIT_X86_64_Subtractor;
554    } else {
555      const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
556      MCSymbolData &SD = Asm.getSymbolData(*Symbol);
557      const MCSymbolData *Base = Asm.getAtom(&SD);
558
559      // Relocations inside debug sections always use local relocations when
560      // possible. This seems to be done because the debugger doesn't fully
561      // understand x86_64 relocation entries, and expects to find values that
562      // have already been fixed up.
563      if (Symbol->isInSection()) {
564        const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
565          Fragment->getParent()->getSection());
566        if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
567          Base = 0;
568      }
569
570      // x86_64 almost always uses external relocations, except when there is no
571      // symbol to use as a base address (a local symbol with no preceding
572      // non-local symbol).
573      if (Base) {
574        Index = Base->getIndex();
575        IsExtern = 1;
576
577        // Add the local offset, if needed.
578        if (Base != &SD)
579          Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
580      } else if (Symbol->isInSection() && !Symbol->isVariable()) {
581        // The index is the section ordinal (1-based).
582        Index = SD.getFragment()->getParent()->getOrdinal() + 1;
583        IsExtern = 0;
584        Value += getSymbolAddress(&SD, Layout);
585
586        if (IsPCRel)
587          Value -= FixupAddress + (1 << Log2Size);
588      } else if (Symbol->isVariable()) {
589        const MCExpr *Value = Symbol->getVariableValue();
590        int64_t Res;
591        bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress);
592        if (isAbs) {
593          FixedValue = Res;
594          return;
595        } else {
596          report_fatal_error("unsupported relocation of variable '" +
597                             Symbol->getName() + "'");
598        }
599      } else {
600        report_fatal_error("unsupported relocation of undefined symbol '" +
601                           Symbol->getName() + "'");
602      }
603
604      MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
605      if (IsPCRel) {
606        if (IsRIPRel) {
607          if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
608            // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
609            // rewrite the movq to an leaq at link time if the symbol ends up in
610            // the same linkage unit.
611            if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
612              Type = macho::RIT_X86_64_GOTLoad;
613            else
614              Type = macho::RIT_X86_64_GOT;
615          }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
616            Type = macho::RIT_X86_64_TLV;
617          }  else if (Modifier != MCSymbolRefExpr::VK_None) {
618            report_fatal_error("unsupported symbol modifier in relocation");
619          } else {
620            Type = macho::RIT_X86_64_Signed;
621
622            // The Darwin x86_64 relocation format has a problem where it cannot
623            // encode an address (L<foo> + <constant>) which is outside the atom
624            // containing L<foo>. Generally, this shouldn't occur but it does
625            // happen when we have a RIPrel instruction with data following the
626            // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
627            // adjustment Darwin x86_64 uses, the offset is still negative and
628            // the linker has no way to recognize this.
629            //
630            // To work around this, Darwin uses several special relocation types
631            // to indicate the offsets. However, the specification or
632            // implementation of these seems to also be incomplete; they should
633            // adjust the addend as well based on the actual encoded instruction
634            // (the additional bias), but instead appear to just look at the
635            // final offset.
636            switch (-(Target.getConstant() + (1LL << Log2Size))) {
637            case 1: Type = macho::RIT_X86_64_Signed1; break;
638            case 2: Type = macho::RIT_X86_64_Signed2; break;
639            case 4: Type = macho::RIT_X86_64_Signed4; break;
640            }
641          }
642        } else {
643          if (Modifier != MCSymbolRefExpr::VK_None)
644            report_fatal_error("unsupported symbol modifier in branch "
645                              "relocation");
646
647          Type = macho::RIT_X86_64_Branch;
648        }
649      } else {
650        if (Modifier == MCSymbolRefExpr::VK_GOT) {
651          Type = macho::RIT_X86_64_GOT;
652        } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
653          // GOTPCREL is allowed as a modifier on non-PCrel instructions, in
654          // which case all we do is set the PCrel bit in the relocation entry;
655          // this is used with exception handling, for example. The source is
656          // required to include any necessary offset directly.
657          Type = macho::RIT_X86_64_GOT;
658          IsPCRel = 1;
659        } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
660          report_fatal_error("TLVP symbol modifier should have been rip-rel");
661        } else if (Modifier != MCSymbolRefExpr::VK_None)
662          report_fatal_error("unsupported symbol modifier in relocation");
663        else
664          Type = macho::RIT_X86_64_Unsigned;
665      }
666    }
667
668    // x86_64 always writes custom values into the fixups.
669    FixedValue = Value;
670
671    // struct relocation_info (8 bytes)
672    macho::RelocationEntry MRE;
673    MRE.Word0 = FixupOffset;
674    MRE.Word1 = ((Index     <<  0) |
675                 (IsPCRel   << 24) |
676                 (Log2Size  << 25) |
677                 (IsExtern  << 27) |
678                 (Type      << 28));
679    Relocations[Fragment->getParent()].push_back(MRE);
680  }
681
682  void RecordScatteredRelocation(const MCAssembler &Asm,
683                                 const MCAsmLayout &Layout,
684                                 const MCFragment *Fragment,
685                                 const MCFixup &Fixup, MCValue Target,
686                                 unsigned Log2Size,
687                                 uint64_t &FixedValue) {
688    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
689    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
690    unsigned Type = macho::RIT_Vanilla;
691
692    // See <reloc.h>.
693    const MCSymbol *A = &Target.getSymA()->getSymbol();
694    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
695
696    if (!A_SD->getFragment())
697      report_fatal_error("symbol '" + A->getName() +
698                        "' can not be undefined in a subtraction expression");
699
700    uint32_t Value = getSymbolAddress(A_SD, Layout);
701    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
702    FixedValue += SecAddr;
703    uint32_t Value2 = 0;
704
705    if (const MCSymbolRefExpr *B = Target.getSymB()) {
706      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
707
708      if (!B_SD->getFragment())
709        report_fatal_error("symbol '" + B->getSymbol().getName() +
710                          "' can not be undefined in a subtraction expression");
711
712      // Select the appropriate difference relocation type.
713      //
714      // Note that there is no longer any semantic difference between these two
715      // relocation types from the linkers point of view, this is done solely
716      // for pedantic compatibility with 'as'.
717      Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
718        (unsigned)macho::RIT_Generic_LocalDifference;
719      Value2 = getSymbolAddress(B_SD, Layout);
720      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
721    }
722
723    // Relocations are written out in reverse order, so the PAIR comes first.
724    if (Type == macho::RIT_Difference ||
725        Type == macho::RIT_Generic_LocalDifference) {
726      macho::RelocationEntry MRE;
727      MRE.Word0 = ((0         <<  0) |
728                   (macho::RIT_Pair  << 24) |
729                   (Log2Size  << 28) |
730                   (IsPCRel   << 30) |
731                   macho::RF_Scattered);
732      MRE.Word1 = Value2;
733      Relocations[Fragment->getParent()].push_back(MRE);
734    }
735
736    macho::RelocationEntry MRE;
737    MRE.Word0 = ((FixupOffset <<  0) |
738                 (Type        << 24) |
739                 (Log2Size    << 28) |
740                 (IsPCRel     << 30) |
741                 macho::RF_Scattered);
742    MRE.Word1 = Value;
743    Relocations[Fragment->getParent()].push_back(MRE);
744  }
745
746  void RecordARMScatteredRelocation(const MCAssembler &Asm,
747                                    const MCAsmLayout &Layout,
748                                    const MCFragment *Fragment,
749                                    const MCFixup &Fixup, MCValue Target,
750                                    unsigned Log2Size,
751                                    uint64_t &FixedValue) {
752    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
753    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
754    unsigned Type = macho::RIT_Vanilla;
755
756    // See <reloc.h>.
757    const MCSymbol *A = &Target.getSymA()->getSymbol();
758    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
759
760    if (!A_SD->getFragment())
761      report_fatal_error("symbol '" + A->getName() +
762                        "' can not be undefined in a subtraction expression");
763
764    uint32_t Value = getSymbolAddress(A_SD, Layout);
765    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
766    FixedValue += SecAddr;
767    uint32_t Value2 = 0;
768
769    if (const MCSymbolRefExpr *B = Target.getSymB()) {
770      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
771
772      if (!B_SD->getFragment())
773        report_fatal_error("symbol '" + B->getSymbol().getName() +
774                          "' can not be undefined in a subtraction expression");
775
776      // Select the appropriate difference relocation type.
777      Type = macho::RIT_Difference;
778      Value2 = getSymbolAddress(B_SD, Layout);
779      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
780    }
781
782    // Relocations are written out in reverse order, so the PAIR comes first.
783    if (Type == macho::RIT_Difference ||
784        Type == macho::RIT_Generic_LocalDifference) {
785      macho::RelocationEntry MRE;
786      MRE.Word0 = ((0         <<  0) |
787                   (macho::RIT_Pair  << 24) |
788                   (Log2Size  << 28) |
789                   (IsPCRel   << 30) |
790                   macho::RF_Scattered);
791      MRE.Word1 = Value2;
792      Relocations[Fragment->getParent()].push_back(MRE);
793    }
794
795    macho::RelocationEntry MRE;
796    MRE.Word0 = ((FixupOffset <<  0) |
797                 (Type        << 24) |
798                 (Log2Size    << 28) |
799                 (IsPCRel     << 30) |
800                 macho::RF_Scattered);
801    MRE.Word1 = Value;
802    Relocations[Fragment->getParent()].push_back(MRE);
803  }
804
805  void RecordARMMovwMovtRelocation(const MCAssembler &Asm,
806                                   const MCAsmLayout &Layout,
807                                   const MCFragment *Fragment,
808                                   const MCFixup &Fixup, MCValue Target,
809                                   uint64_t &FixedValue) {
810    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
811    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
812    unsigned Type = macho::RIT_ARM_Half;
813
814    // See <reloc.h>.
815    const MCSymbol *A = &Target.getSymA()->getSymbol();
816    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
817
818    if (!A_SD->getFragment())
819      report_fatal_error("symbol '" + A->getName() +
820                        "' can not be undefined in a subtraction expression");
821
822    uint32_t Value = getSymbolAddress(A_SD, Layout);
823    uint32_t Value2 = 0;
824    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
825    FixedValue += SecAddr;
826
827    if (const MCSymbolRefExpr *B = Target.getSymB()) {
828      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
829
830      if (!B_SD->getFragment())
831        report_fatal_error("symbol '" + B->getSymbol().getName() +
832                          "' can not be undefined in a subtraction expression");
833
834      // Select the appropriate difference relocation type.
835      Type = macho::RIT_ARM_HalfDifference;
836      Value2 = getSymbolAddress(B_SD, Layout);
837      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
838    }
839
840    // Relocations are written out in reverse order, so the PAIR comes first.
841    // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
842    //
843    // For these two r_type relocations they always have a pair following them
844    // and the r_length bits are used differently.  The encoding of the
845    // r_length is as follows:
846    // low bit of r_length:
847    //  0 - :lower16: for movw instructions
848    //  1 - :upper16: for movt instructions
849    // high bit of r_length:
850    //  0 - arm instructions
851    //  1 - thumb instructions
852    // the other half of the relocated expression is in the following pair
853    // relocation entry in the the low 16 bits of r_address field.
854    unsigned ThumbBit = 0;
855    unsigned MovtBit = 0;
856    switch ((unsigned)Fixup.getKind()) {
857    default: break;
858    case ARM::fixup_arm_movt_hi16:
859    case ARM::fixup_arm_movt_hi16_pcrel:
860      MovtBit = 1;
861      break;
862    case ARM::fixup_t2_movt_hi16:
863    case ARM::fixup_t2_movt_hi16_pcrel:
864      MovtBit = 1;
865      // Fallthrough
866    case ARM::fixup_t2_movw_lo16:
867    case ARM::fixup_t2_movw_lo16_pcrel:
868      ThumbBit = 1;
869      break;
870    }
871
872
873    if (Type == macho::RIT_ARM_HalfDifference) {
874      uint32_t OtherHalf = MovtBit
875        ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
876
877      macho::RelocationEntry MRE;
878      MRE.Word0 = ((OtherHalf       <<  0) |
879                   (macho::RIT_Pair << 24) |
880                   (MovtBit         << 28) |
881                   (ThumbBit        << 29) |
882                   (IsPCRel         << 30) |
883                   macho::RF_Scattered);
884      MRE.Word1 = Value2;
885      Relocations[Fragment->getParent()].push_back(MRE);
886    }
887
888    macho::RelocationEntry MRE;
889    MRE.Word0 = ((FixupOffset <<  0) |
890                 (Type        << 24) |
891                 (MovtBit     << 28) |
892                 (ThumbBit    << 29) |
893                 (IsPCRel     << 30) |
894                 macho::RF_Scattered);
895    MRE.Word1 = Value;
896    Relocations[Fragment->getParent()].push_back(MRE);
897  }
898
899  void RecordTLVPRelocation(const MCAssembler &Asm,
900                            const MCAsmLayout &Layout,
901                            const MCFragment *Fragment,
902                            const MCFixup &Fixup, MCValue Target,
903                            uint64_t &FixedValue) {
904    assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
905           !is64Bit() &&
906           "Should only be called with a 32-bit TLVP relocation!");
907
908    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
909    uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
910    unsigned IsPCRel = 0;
911
912    // Get the symbol data.
913    MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
914    unsigned Index = SD_A->getIndex();
915
916    // We're only going to have a second symbol in pic mode and it'll be a
917    // subtraction from the picbase. For 32-bit pic the addend is the difference
918    // between the picbase and the next address.  For 32-bit static the addend
919    // is zero.
920    if (Target.getSymB()) {
921      // If this is a subtraction then we're pcrel.
922      uint32_t FixupAddress =
923        getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
924      MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
925      IsPCRel = 1;
926      FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) +
927                    Target.getConstant());
928      FixedValue += 1ULL << Log2Size;
929    } else {
930      FixedValue = 0;
931    }
932
933    // struct relocation_info (8 bytes)
934    macho::RelocationEntry MRE;
935    MRE.Word0 = Value;
936    MRE.Word1 = ((Index                  <<  0) |
937                 (IsPCRel                << 24) |
938                 (Log2Size               << 25) |
939                 (1                      << 27) | // Extern
940                 (macho::RIT_Generic_TLV << 28)); // Type
941    Relocations[Fragment->getParent()].push_back(MRE);
942  }
943
944  static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
945                                       unsigned &Log2Size) {
946    RelocType = unsigned(macho::RIT_Vanilla);
947    Log2Size = ~0U;
948
949    switch (Kind) {
950    default:
951      return false;
952
953    case FK_Data_1:
954      Log2Size = llvm::Log2_32(1);
955      return true;
956    case FK_Data_2:
957      Log2Size = llvm::Log2_32(2);
958      return true;
959    case FK_Data_4:
960      Log2Size = llvm::Log2_32(4);
961      return true;
962    case FK_Data_8:
963      Log2Size = llvm::Log2_32(8);
964      return true;
965
966      // Handle 24-bit branch kinds.
967    case ARM::fixup_arm_ldst_pcrel_12:
968    case ARM::fixup_arm_pcrel_10:
969    case ARM::fixup_arm_adr_pcrel_12:
970    case ARM::fixup_arm_condbranch:
971    case ARM::fixup_arm_uncondbranch:
972      RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
973      // Report as 'long', even though that is not quite accurate.
974      Log2Size = llvm::Log2_32(4);
975      return true;
976
977      // Handle Thumb branches.
978    case ARM::fixup_arm_thumb_br:
979      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
980      Log2Size = llvm::Log2_32(2);
981      return true;
982
983    case ARM::fixup_arm_thumb_bl:
984    case ARM::fixup_arm_thumb_blx:
985      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
986      Log2Size = llvm::Log2_32(4);
987      return true;
988
989    case ARM::fixup_arm_movt_hi16:
990    case ARM::fixup_arm_movt_hi16_pcrel:
991    case ARM::fixup_t2_movt_hi16:
992    case ARM::fixup_t2_movt_hi16_pcrel:
993      RelocType = unsigned(macho::RIT_ARM_HalfDifference);
994      // Report as 'long', even though that is not quite accurate.
995      Log2Size = llvm::Log2_32(4);
996      return true;
997
998    case ARM::fixup_arm_movw_lo16:
999    case ARM::fixup_arm_movw_lo16_pcrel:
1000    case ARM::fixup_t2_movw_lo16:
1001    case ARM::fixup_t2_movw_lo16_pcrel:
1002      RelocType = unsigned(macho::RIT_ARM_Half);
1003      // Report as 'long', even though that is not quite accurate.
1004      Log2Size = llvm::Log2_32(4);
1005      return true;
1006    }
1007  }
1008  void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
1009                           const MCFragment *Fragment, const MCFixup &Fixup,
1010                           MCValue Target, uint64_t &FixedValue) {
1011    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
1012    unsigned Log2Size;
1013    unsigned RelocType = macho::RIT_Vanilla;
1014    if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
1015      report_fatal_error("unknown ARM fixup kind!");
1016      return;
1017    }
1018
1019    // If this is a difference or a defined symbol plus an offset, then we need
1020    // a scattered relocation entry.  Differences always require scattered
1021    // relocations.
1022    if (Target.getSymB()) {
1023      if (RelocType == macho::RIT_ARM_Half ||
1024          RelocType == macho::RIT_ARM_HalfDifference)
1025        return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup,
1026                                           Target, FixedValue);
1027      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup,
1028                                          Target, Log2Size, FixedValue);
1029    }
1030
1031    // Get the symbol data, if any.
1032    MCSymbolData *SD = 0;
1033    if (Target.getSymA())
1034      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
1035
1036    // FIXME: For other platforms, we need to use scattered relocations for
1037    // internal relocations with offsets.  If this is an internal relocation
1038    // with an offset, it also needs a scattered relocation entry.
1039    //
1040    // Is this right for ARM?
1041    uint32_t Offset = Target.getConstant();
1042    if (IsPCRel && RelocType == macho::RIT_Vanilla)
1043      Offset += 1 << Log2Size;
1044    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
1045      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target,
1046                                          Log2Size, FixedValue);
1047
1048    // See <reloc.h>.
1049    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
1050    unsigned Index = 0;
1051    unsigned IsExtern = 0;
1052    unsigned Type = 0;
1053
1054    if (Target.isAbsolute()) { // constant
1055      // FIXME!
1056      report_fatal_error("FIXME: relocations to absolute targets "
1057                         "not yet implemented");
1058    } else {
1059      // Resolve constant variables.
1060      if (SD->getSymbol().isVariable()) {
1061        int64_t Res;
1062        if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
1063              Res, Layout, SectionAddress)) {
1064          FixedValue = Res;
1065          return;
1066        }
1067      }
1068
1069      // Check whether we need an external or internal relocation.
1070      if (doesSymbolRequireExternRelocation(SD)) {
1071        IsExtern = 1;
1072        Index = SD->getIndex();
1073        // For external relocations, make sure to offset the fixup value to
1074        // compensate for the addend of the symbol address, if it was
1075        // undefined. This occurs with weak definitions, for example.
1076        if (!SD->Symbol->isUndefined())
1077          FixedValue -= Layout.getSymbolOffset(SD);
1078      } else {
1079        // The index is the section ordinal (1-based).
1080        const MCSectionData &SymSD = Asm.getSectionData(
1081          SD->getSymbol().getSection());
1082        Index = SymSD.getOrdinal() + 1;
1083        FixedValue += getSectionAddress(&SymSD);
1084      }
1085      if (IsPCRel)
1086        FixedValue -= getSectionAddress(Fragment->getParent());
1087
1088      // The type is determined by the fixup kind.
1089      Type = RelocType;
1090    }
1091
1092    // struct relocation_info (8 bytes)
1093    macho::RelocationEntry MRE;
1094    MRE.Word0 = FixupOffset;
1095    MRE.Word1 = ((Index     <<  0) |
1096                 (IsPCRel   << 24) |
1097                 (Log2Size  << 25) |
1098                 (IsExtern  << 27) |
1099                 (Type      << 28));
1100    Relocations[Fragment->getParent()].push_back(MRE);
1101  }
1102
1103  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
1104                        const MCFragment *Fragment, const MCFixup &Fixup,
1105                        MCValue Target, uint64_t &FixedValue) {
1106    // FIXME: These needs to be factored into the target Mach-O writer.
1107    if (isARM()) {
1108      RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
1109      return;
1110    }
1111    if (is64Bit()) {
1112      RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
1113      return;
1114    }
1115
1116    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
1117    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
1118
1119    // If this is a 32-bit TLVP reloc it's handled a bit differently.
1120    if (Target.getSymA() &&
1121        Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
1122      RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
1123      return;
1124    }
1125
1126    // If this is a difference or a defined symbol plus an offset, then we need
1127    // a scattered relocation entry.
1128    // Differences always require scattered relocations.
1129    if (Target.getSymB())
1130        return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
1131                                         Target, Log2Size, FixedValue);
1132
1133    // Get the symbol data, if any.
1134    MCSymbolData *SD = 0;
1135    if (Target.getSymA())
1136      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
1137
1138    // If this is an internal relocation with an offset, it also needs a
1139    // scattered relocation entry.
1140    uint32_t Offset = Target.getConstant();
1141    if (IsPCRel)
1142      Offset += 1 << Log2Size;
1143    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
1144      return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
1145                                       Target, Log2Size, FixedValue);
1146
1147    // See <reloc.h>.
1148    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
1149    unsigned Index = 0;
1150    unsigned IsExtern = 0;
1151    unsigned Type = 0;
1152
1153    if (Target.isAbsolute()) { // constant
1154      // SymbolNum of 0 indicates the absolute section.
1155      //
1156      // FIXME: Currently, these are never generated (see code below). I cannot
1157      // find a case where they are actually emitted.
1158      Type = macho::RIT_Vanilla;
1159    } else {
1160      // Resolve constant variables.
1161      if (SD->getSymbol().isVariable()) {
1162        int64_t Res;
1163        if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
1164              Res, Layout, SectionAddress)) {
1165          FixedValue = Res;
1166          return;
1167        }
1168      }
1169
1170      // Check whether we need an external or internal relocation.
1171      if (doesSymbolRequireExternRelocation(SD)) {
1172        IsExtern = 1;
1173        Index = SD->getIndex();
1174        // For external relocations, make sure to offset the fixup value to
1175        // compensate for the addend of the symbol address, if it was
1176        // undefined. This occurs with weak definitions, for example.
1177        if (!SD->Symbol->isUndefined())
1178          FixedValue -= Layout.getSymbolOffset(SD);
1179      } else {
1180        // The index is the section ordinal (1-based).
1181        const MCSectionData &SymSD = Asm.getSectionData(
1182          SD->getSymbol().getSection());
1183        Index = SymSD.getOrdinal() + 1;
1184        FixedValue += getSectionAddress(&SymSD);
1185      }
1186      if (IsPCRel)
1187        FixedValue -= getSectionAddress(Fragment->getParent());
1188
1189      Type = macho::RIT_Vanilla;
1190    }
1191
1192    // struct relocation_info (8 bytes)
1193    macho::RelocationEntry MRE;
1194    MRE.Word0 = FixupOffset;
1195    MRE.Word1 = ((Index     <<  0) |
1196                 (IsPCRel   << 24) |
1197                 (Log2Size  << 25) |
1198                 (IsExtern  << 27) |
1199                 (Type      << 28));
1200    Relocations[Fragment->getParent()].push_back(MRE);
1201  }
1202
1203  void BindIndirectSymbols(MCAssembler &Asm) {
1204    // This is the point where 'as' creates actual symbols for indirect symbols
1205    // (in the following two passes). It would be easier for us to do this
1206    // sooner when we see the attribute, but that makes getting the order in the
1207    // symbol table much more complicated than it is worth.
1208    //
1209    // FIXME: Revisit this when the dust settles.
1210
1211    // Bind non lazy symbol pointers first.
1212    unsigned IndirectIndex = 0;
1213    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
1214           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
1215      const MCSectionMachO &Section =
1216        cast<MCSectionMachO>(it->SectionData->getSection());
1217
1218      if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
1219        continue;
1220
1221      // Initialize the section indirect symbol base, if necessary.
1222      if (!IndirectSymBase.count(it->SectionData))
1223        IndirectSymBase[it->SectionData] = IndirectIndex;
1224
1225      Asm.getOrCreateSymbolData(*it->Symbol);
1226    }
1227
1228    // Then lazy symbol pointers and symbol stubs.
1229    IndirectIndex = 0;
1230    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
1231           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
1232      const MCSectionMachO &Section =
1233        cast<MCSectionMachO>(it->SectionData->getSection());
1234
1235      if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
1236          Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
1237        continue;
1238
1239      // Initialize the section indirect symbol base, if necessary.
1240      if (!IndirectSymBase.count(it->SectionData))
1241        IndirectSymBase[it->SectionData] = IndirectIndex;
1242
1243      // Set the symbol type to undefined lazy, but only on construction.
1244      //
1245      // FIXME: Do not hardcode.
1246      bool Created;
1247      MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
1248      if (Created)
1249        Entry.setFlags(Entry.getFlags() | 0x0001);
1250    }
1251  }
1252
1253  /// ComputeSymbolTable - Compute the symbol table data
1254  ///
1255  /// \param StringTable [out] - The string table data.
1256  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
1257  /// string table.
1258  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
1259                          std::vector<MachSymbolData> &LocalSymbolData,
1260                          std::vector<MachSymbolData> &ExternalSymbolData,
1261                          std::vector<MachSymbolData> &UndefinedSymbolData) {
1262    // Build section lookup table.
1263    DenseMap<const MCSection*, uint8_t> SectionIndexMap;
1264    unsigned Index = 1;
1265    for (MCAssembler::iterator it = Asm.begin(),
1266           ie = Asm.end(); it != ie; ++it, ++Index)
1267      SectionIndexMap[&it->getSection()] = Index;
1268    assert(Index <= 256 && "Too many sections!");
1269
1270    // Index 0 is always the empty string.
1271    StringMap<uint64_t> StringIndexMap;
1272    StringTable += '\x00';
1273
1274    // Build the symbol arrays and the string table, but only for non-local
1275    // symbols.
1276    //
1277    // The particular order that we collect the symbols and create the string
1278    // table, then sort the symbols is chosen to match 'as'. Even though it
1279    // doesn't matter for correctness, this is important for letting us diff .o
1280    // files.
1281    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
1282           ie = Asm.symbol_end(); it != ie; ++it) {
1283      const MCSymbol &Symbol = it->getSymbol();
1284
1285      // Ignore non-linker visible symbols.
1286      if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
1287        continue;
1288
1289      if (!it->isExternal() && !Symbol.isUndefined())
1290        continue;
1291
1292      uint64_t &Entry = StringIndexMap[Symbol.getName()];
1293      if (!Entry) {
1294        Entry = StringTable.size();
1295        StringTable += Symbol.getName();
1296        StringTable += '\x00';
1297      }
1298
1299      MachSymbolData MSD;
1300      MSD.SymbolData = it;
1301      MSD.StringIndex = Entry;
1302
1303      if (Symbol.isUndefined()) {
1304        MSD.SectionIndex = 0;
1305        UndefinedSymbolData.push_back(MSD);
1306      } else if (Symbol.isAbsolute()) {
1307        MSD.SectionIndex = 0;
1308        ExternalSymbolData.push_back(MSD);
1309      } else {
1310        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
1311        assert(MSD.SectionIndex && "Invalid section index!");
1312        ExternalSymbolData.push_back(MSD);
1313      }
1314    }
1315
1316    // Now add the data for local symbols.
1317    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
1318           ie = Asm.symbol_end(); it != ie; ++it) {
1319      const MCSymbol &Symbol = it->getSymbol();
1320
1321      // Ignore non-linker visible symbols.
1322      if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
1323        continue;
1324
1325      if (it->isExternal() || Symbol.isUndefined())
1326        continue;
1327
1328      uint64_t &Entry = StringIndexMap[Symbol.getName()];
1329      if (!Entry) {
1330        Entry = StringTable.size();
1331        StringTable += Symbol.getName();
1332        StringTable += '\x00';
1333      }
1334
1335      MachSymbolData MSD;
1336      MSD.SymbolData = it;
1337      MSD.StringIndex = Entry;
1338
1339      if (Symbol.isAbsolute()) {
1340        MSD.SectionIndex = 0;
1341        LocalSymbolData.push_back(MSD);
1342      } else {
1343        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
1344        assert(MSD.SectionIndex && "Invalid section index!");
1345        LocalSymbolData.push_back(MSD);
1346      }
1347    }
1348
1349    // External and undefined symbols are required to be in lexicographic order.
1350    std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
1351    std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
1352
1353    // Set the symbol indices.
1354    Index = 0;
1355    for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
1356      LocalSymbolData[i].SymbolData->setIndex(Index++);
1357    for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
1358      ExternalSymbolData[i].SymbolData->setIndex(Index++);
1359    for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
1360      UndefinedSymbolData[i].SymbolData->setIndex(Index++);
1361
1362    // The string table is padded to a multiple of 4.
1363    while (StringTable.size() % 4)
1364      StringTable += '\x00';
1365  }
1366
1367  void computeSectionAddresses(const MCAssembler &Asm,
1368                               const MCAsmLayout &Layout) {
1369    uint64_t StartAddress = 0;
1370    const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
1371    for (int i = 0, n = Order.size(); i != n ; ++i) {
1372      const MCSectionData *SD = Order[i];
1373      StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
1374      SectionAddress[SD] = StartAddress;
1375      StartAddress += Layout.getSectionAddressSize(SD);
1376      // Explicitly pad the section to match the alignment requirements of the
1377      // following one. This is for 'gas' compatibility, it shouldn't
1378      /// strictly be necessary.
1379      StartAddress += getPaddingSize(SD, Layout);
1380    }
1381  }
1382
1383  void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) {
1384    computeSectionAddresses(Asm, Layout);
1385
1386    // Create symbol data for any indirect symbols.
1387    BindIndirectSymbols(Asm);
1388
1389    // Compute symbol table information and bind symbol indices.
1390    ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
1391                       UndefinedSymbolData);
1392  }
1393
1394  virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
1395                                                      const MCSymbolData &DataA,
1396                                                      const MCFragment &FB,
1397                                                      bool InSet,
1398                                                      bool IsPCRel) const {
1399    if (InSet)
1400      return true;
1401
1402    // The effective address is
1403    //     addr(atom(A)) + offset(A)
1404    //   - addr(atom(B)) - offset(B)
1405    // and the offsets are not relocatable, so the fixup is fully resolved when
1406    //  addr(atom(A)) - addr(atom(B)) == 0.
1407    const MCSymbolData *A_Base = 0, *B_Base = 0;
1408
1409    const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
1410    const MCSection &SecA = SA.getSection();
1411    const MCSection &SecB = FB.getParent()->getSection();
1412
1413    if (IsPCRel) {
1414      // The simple (Darwin, except on x86_64) way of dealing with this was to
1415      // assume that any reference to a temporary symbol *must* be a temporary
1416      // symbol in the same atom, unless the sections differ. Therefore, any
1417      // PCrel relocation to a temporary symbol (in the same section) is fully
1418      // resolved. This also works in conjunction with absolutized .set, which
1419      // requires the compiler to use .set to absolutize the differences between
1420      // symbols which the compiler knows to be assembly time constants, so we
1421      // don't need to worry about considering symbol differences fully
1422      // resolved.
1423
1424      if (!Asm.getBackend().hasReliableSymbolDifference()) {
1425        if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
1426          return false;
1427        return true;
1428      }
1429    } else {
1430      if (!TargetObjectWriter->useAggressiveSymbolFolding())
1431        return false;
1432    }
1433
1434    const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
1435
1436    A_Base = FA.getAtom();
1437    if (!A_Base)
1438      return false;
1439
1440    B_Base = FB.getAtom();
1441    if (!B_Base)
1442      return false;
1443
1444    // If the atoms are the same, they are guaranteed to have the same address.
1445    if (A_Base == B_Base)
1446      return true;
1447
1448    // Otherwise, we can't prove this is fully resolved.
1449    return false;
1450  }
1451
1452  void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
1453    unsigned NumSections = Asm.size();
1454
1455    // The section data starts after the header, the segment load command (and
1456    // section headers) and the symbol table.
1457    unsigned NumLoadCommands = 1;
1458    uint64_t LoadCommandsSize = is64Bit() ?
1459      macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
1460      macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
1461
1462    // Add the symbol table load command sizes, if used.
1463    unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
1464      UndefinedSymbolData.size();
1465    if (NumSymbols) {
1466      NumLoadCommands += 2;
1467      LoadCommandsSize += (macho::SymtabLoadCommandSize +
1468                           macho::DysymtabLoadCommandSize);
1469    }
1470
1471    // Compute the total size of the section data, as well as its file size and
1472    // vm size.
1473    uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
1474                                 macho::Header32Size) + LoadCommandsSize;
1475    uint64_t SectionDataSize = 0;
1476    uint64_t SectionDataFileSize = 0;
1477    uint64_t VMSize = 0;
1478    for (MCAssembler::const_iterator it = Asm.begin(),
1479           ie = Asm.end(); it != ie; ++it) {
1480      const MCSectionData &SD = *it;
1481      uint64_t Address = getSectionAddress(&SD);
1482      uint64_t Size = Layout.getSectionAddressSize(&SD);
1483      uint64_t FileSize = Layout.getSectionFileSize(&SD);
1484      FileSize += getPaddingSize(&SD, Layout);
1485
1486      VMSize = std::max(VMSize, Address + Size);
1487
1488      if (SD.getSection().isVirtualSection())
1489        continue;
1490
1491      SectionDataSize = std::max(SectionDataSize, Address + Size);
1492      SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
1493    }
1494
1495    // The section data is padded to 4 bytes.
1496    //
1497    // FIXME: Is this machine dependent?
1498    unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
1499    SectionDataFileSize += SectionDataPadding;
1500
1501    // Write the prolog, starting with the header and load command...
1502    WriteHeader(NumLoadCommands, LoadCommandsSize,
1503                Asm.getSubsectionsViaSymbols());
1504    WriteSegmentLoadCommand(NumSections, VMSize,
1505                            SectionDataStart, SectionDataSize);
1506
1507    // ... and then the section headers.
1508    uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
1509    for (MCAssembler::const_iterator it = Asm.begin(),
1510           ie = Asm.end(); it != ie; ++it) {
1511      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
1512      unsigned NumRelocs = Relocs.size();
1513      uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
1514      WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
1515      RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
1516    }
1517
1518    // Write the symbol table load command, if used.
1519    if (NumSymbols) {
1520      unsigned FirstLocalSymbol = 0;
1521      unsigned NumLocalSymbols = LocalSymbolData.size();
1522      unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
1523      unsigned NumExternalSymbols = ExternalSymbolData.size();
1524      unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
1525      unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
1526      unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
1527      unsigned NumSymTabSymbols =
1528        NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
1529      uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
1530      uint64_t IndirectSymbolOffset = 0;
1531
1532      // If used, the indirect symbols are written after the section data.
1533      if (NumIndirectSymbols)
1534        IndirectSymbolOffset = RelocTableEnd;
1535
1536      // The symbol table is written after the indirect symbol data.
1537      uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
1538
1539      // The string table is written after symbol table.
1540      uint64_t StringTableOffset =
1541        SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
1542                                                macho::Nlist32Size);
1543      WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
1544                             StringTableOffset, StringTable.size());
1545
1546      WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
1547                               FirstExternalSymbol, NumExternalSymbols,
1548                               FirstUndefinedSymbol, NumUndefinedSymbols,
1549                               IndirectSymbolOffset, NumIndirectSymbols);
1550    }
1551
1552    // Write the actual section data.
1553    for (MCAssembler::const_iterator it = Asm.begin(),
1554           ie = Asm.end(); it != ie; ++it) {
1555      Asm.WriteSectionData(it, Layout);
1556
1557      uint64_t Pad = getPaddingSize(it, Layout);
1558      for (unsigned int i = 0; i < Pad; ++i)
1559        Write8(0);
1560    }
1561
1562    // Write the extra padding.
1563    WriteZeros(SectionDataPadding);
1564
1565    // Write the relocation entries.
1566    for (MCAssembler::const_iterator it = Asm.begin(),
1567           ie = Asm.end(); it != ie; ++it) {
1568      // Write the section relocation entries, in reverse order to match 'as'
1569      // (approximately, the exact algorithm is more complicated than this).
1570      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
1571      for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
1572        Write32(Relocs[e - i - 1].Word0);
1573        Write32(Relocs[e - i - 1].Word1);
1574      }
1575    }
1576
1577    // Write the symbol table data, if used.
1578    if (NumSymbols) {
1579      // Write the indirect symbol entries.
1580      for (MCAssembler::const_indirect_symbol_iterator
1581             it = Asm.indirect_symbol_begin(),
1582             ie = Asm.indirect_symbol_end(); it != ie; ++it) {
1583        // Indirect symbols in the non lazy symbol pointer section have some
1584        // special handling.
1585        const MCSectionMachO &Section =
1586          static_cast<const MCSectionMachO&>(it->SectionData->getSection());
1587        if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
1588          // If this symbol is defined and internal, mark it as such.
1589          if (it->Symbol->isDefined() &&
1590              !Asm.getSymbolData(*it->Symbol).isExternal()) {
1591            uint32_t Flags = macho::ISF_Local;
1592            if (it->Symbol->isAbsolute())
1593              Flags |= macho::ISF_Absolute;
1594            Write32(Flags);
1595            continue;
1596          }
1597        }
1598
1599        Write32(Asm.getSymbolData(*it->Symbol).getIndex());
1600      }
1601
1602      // FIXME: Check that offsets match computed ones.
1603
1604      // Write the symbol table entries.
1605      for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
1606        WriteNlist(LocalSymbolData[i], Layout);
1607      for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
1608        WriteNlist(ExternalSymbolData[i], Layout);
1609      for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
1610        WriteNlist(UndefinedSymbolData[i], Layout);
1611
1612      // Write the string table.
1613      OS << StringTable.str();
1614    }
1615  }
1616};
1617
1618}
1619
1620MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
1621                                             raw_ostream &OS,
1622                                             bool IsLittleEndian) {
1623  return new MachObjectWriter(MOTW, OS, IsLittleEndian);
1624}
1625