1//=--------- MachOLinkGraphBuilder.cpp - MachO LinkGraph builder ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Generic MachO LinkGraph buliding code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "MachOLinkGraphBuilder.h"
14
15#define DEBUG_TYPE "jitlink"
16
17static const char *CommonSectionName = "__common";
18
19namespace llvm {
20namespace jitlink {
21
22MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {}
23
24Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() {
25
26  // Sanity check: we only operate on relocatable objects.
27  if (!Obj.isRelocatableObject())
28    return make_error<JITLinkError>("Object is not a relocatable MachO");
29
30  if (auto Err = createNormalizedSections())
31    return std::move(Err);
32
33  if (auto Err = createNormalizedSymbols())
34    return std::move(Err);
35
36  if (auto Err = graphifyRegularSymbols())
37    return std::move(Err);
38
39  if (auto Err = graphifySectionsWithCustomParsers())
40    return std::move(Err);
41
42  if (auto Err = addRelocations())
43    return std::move(Err);
44
45  return std::move(G);
46}
47
48MachOLinkGraphBuilder::MachOLinkGraphBuilder(
49    const object::MachOObjectFile &Obj, Triple TT,
50    LinkGraph::GetEdgeKindNameFunction GetEdgeKindName)
51    : Obj(Obj),
52      G(std::make_unique<LinkGraph>(
53          std::string(Obj.getFileName()), std::move(TT), getPointerSize(Obj),
54          getEndianness(Obj), std::move(GetEdgeKindName))) {}
55
56void MachOLinkGraphBuilder::addCustomSectionParser(
57    StringRef SectionName, SectionParserFunction Parser) {
58  assert(!CustomSectionParserFunctions.count(SectionName) &&
59         "Custom parser for this section already exists");
60  CustomSectionParserFunctions[SectionName] = std::move(Parser);
61}
62
63Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) {
64  if ((Desc & MachO::N_WEAK_DEF) || (Desc & MachO::N_WEAK_REF))
65    return Linkage::Weak;
66  return Linkage::Strong;
67}
68
69Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) {
70  if (Type & MachO::N_EXT) {
71    if ((Type & MachO::N_PEXT) || Name.startswith("l"))
72      return Scope::Hidden;
73    else
74      return Scope::Default;
75  }
76  return Scope::Local;
77}
78
79bool MachOLinkGraphBuilder::isAltEntry(const NormalizedSymbol &NSym) {
80  return NSym.Desc & MachO::N_ALT_ENTRY;
81}
82
83bool MachOLinkGraphBuilder::isDebugSection(const NormalizedSection &NSec) {
84  return (NSec.Flags & MachO::S_ATTR_DEBUG &&
85          strcmp(NSec.SegName, "__DWARF") == 0);
86}
87
88unsigned
89MachOLinkGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) {
90  return Obj.is64Bit() ? 8 : 4;
91}
92
93support::endianness
94MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
95  return Obj.isLittleEndian() ? support::little : support::big;
96}
97
98Section &MachOLinkGraphBuilder::getCommonSection() {
99  if (!CommonSection) {
100    auto Prot = static_cast<sys::Memory::ProtectionFlags>(
101        sys::Memory::MF_READ | sys::Memory::MF_WRITE);
102    CommonSection = &G->createSection(CommonSectionName, Prot);
103  }
104  return *CommonSection;
105}
106
107Error MachOLinkGraphBuilder::createNormalizedSections() {
108  // Build normalized sections. Verifies that section data is in-range (for
109  // sections with content) and that address ranges are non-overlapping.
110
111  LLVM_DEBUG(dbgs() << "Creating normalized sections...\n");
112
113  for (auto &SecRef : Obj.sections()) {
114    NormalizedSection NSec;
115    uint32_t DataOffset = 0;
116
117    auto SecIndex = Obj.getSectionIndex(SecRef.getRawDataRefImpl());
118
119    if (Obj.is64Bit()) {
120      const MachO::section_64 &Sec64 =
121          Obj.getSection64(SecRef.getRawDataRefImpl());
122
123      memcpy(&NSec.SectName, &Sec64.sectname, 16);
124      NSec.SectName[16] = '\0';
125      memcpy(&NSec.SegName, Sec64.segname, 16);
126      NSec.SegName[16] = '\0';
127
128      NSec.Address = Sec64.addr;
129      NSec.Size = Sec64.size;
130      NSec.Alignment = 1ULL << Sec64.align;
131      NSec.Flags = Sec64.flags;
132      DataOffset = Sec64.offset;
133    } else {
134      const MachO::section &Sec32 = Obj.getSection(SecRef.getRawDataRefImpl());
135
136      memcpy(&NSec.SectName, &Sec32.sectname, 16);
137      NSec.SectName[16] = '\0';
138      memcpy(&NSec.SegName, Sec32.segname, 16);
139      NSec.SegName[16] = '\0';
140
141      NSec.Address = Sec32.addr;
142      NSec.Size = Sec32.size;
143      NSec.Alignment = 1ULL << Sec32.align;
144      NSec.Flags = Sec32.flags;
145      DataOffset = Sec32.offset;
146    }
147
148    LLVM_DEBUG({
149      dbgs() << "  " << NSec.SegName << "," << NSec.SectName << ": "
150             << formatv("{0:x16}", NSec.Address) << " -- "
151             << formatv("{0:x16}", NSec.Address + NSec.Size)
152             << ", align: " << NSec.Alignment << ", index: " << SecIndex
153             << "\n";
154    });
155
156    // Get the section data if any.
157    {
158      unsigned SectionType = NSec.Flags & MachO::SECTION_TYPE;
159      if (SectionType != MachO::S_ZEROFILL &&
160          SectionType != MachO::S_GB_ZEROFILL) {
161
162        if (DataOffset + NSec.Size > Obj.getData().size())
163          return make_error<JITLinkError>(
164              "Section data extends past end of file");
165
166        NSec.Data = Obj.getData().data() + DataOffset;
167      }
168    }
169
170    // Get prot flags.
171    // FIXME: Make sure this test is correct (it's probably missing cases
172    // as-is).
173    sys::Memory::ProtectionFlags Prot;
174    if (NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS)
175      Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
176                                                       sys::Memory::MF_EXEC);
177    else
178      Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
179                                                       sys::Memory::MF_WRITE);
180
181    if (!isDebugSection(NSec)) {
182      auto FullyQualifiedName =
183          G->allocateString(StringRef(NSec.SegName) + "," + NSec.SectName);
184      NSec.GraphSection = &G->createSection(
185          StringRef(FullyQualifiedName.data(), FullyQualifiedName.size()),
186          Prot);
187    } else
188      LLVM_DEBUG({
189        dbgs() << "    " << NSec.SegName << "," << NSec.SectName
190               << " is a debug section: No graph section will be created.\n";
191      });
192
193    IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec)));
194  }
195
196  std::vector<NormalizedSection *> Sections;
197  Sections.reserve(IndexToSection.size());
198  for (auto &KV : IndexToSection)
199    Sections.push_back(&KV.second);
200
201  // If we didn't end up creating any sections then bail out. The code below
202  // assumes that we have at least one section.
203  if (Sections.empty())
204    return Error::success();
205
206  llvm::sort(Sections,
207             [](const NormalizedSection *LHS, const NormalizedSection *RHS) {
208               assert(LHS && RHS && "Null section?");
209               if (LHS->Address != RHS->Address)
210                 return LHS->Address < RHS->Address;
211               return LHS->Size < RHS->Size;
212             });
213
214  for (unsigned I = 0, E = Sections.size() - 1; I != E; ++I) {
215    auto &Cur = *Sections[I];
216    auto &Next = *Sections[I + 1];
217    if (Next.Address < Cur.Address + Cur.Size)
218      return make_error<JITLinkError>(
219          "Address range for section " +
220          formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Cur.SegName,
221                  Cur.SectName, Cur.Address, Cur.Address + Cur.Size) +
222          "overlaps section \"" + Next.SegName + "/" + Next.SectName + "\"" +
223          formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Next.SegName,
224                  Next.SectName, Next.Address, Next.Address + Next.Size));
225  }
226
227  return Error::success();
228}
229
230Error MachOLinkGraphBuilder::createNormalizedSymbols() {
231  LLVM_DEBUG(dbgs() << "Creating normalized symbols...\n");
232
233  for (auto &SymRef : Obj.symbols()) {
234
235    unsigned SymbolIndex = Obj.getSymbolIndex(SymRef.getRawDataRefImpl());
236    uint64_t Value;
237    uint32_t NStrX;
238    uint8_t Type;
239    uint8_t Sect;
240    uint16_t Desc;
241
242    if (Obj.is64Bit()) {
243      const MachO::nlist_64 &NL64 =
244          Obj.getSymbol64TableEntry(SymRef.getRawDataRefImpl());
245      Value = NL64.n_value;
246      NStrX = NL64.n_strx;
247      Type = NL64.n_type;
248      Sect = NL64.n_sect;
249      Desc = NL64.n_desc;
250    } else {
251      const MachO::nlist &NL32 =
252          Obj.getSymbolTableEntry(SymRef.getRawDataRefImpl());
253      Value = NL32.n_value;
254      NStrX = NL32.n_strx;
255      Type = NL32.n_type;
256      Sect = NL32.n_sect;
257      Desc = NL32.n_desc;
258    }
259
260    // Skip stabs.
261    // FIXME: Are there other symbols we should be skipping?
262    if (Type & MachO::N_STAB)
263      continue;
264
265    Optional<StringRef> Name;
266    if (NStrX) {
267      if (auto NameOrErr = SymRef.getName())
268        Name = *NameOrErr;
269      else
270        return NameOrErr.takeError();
271    }
272
273    LLVM_DEBUG({
274      dbgs() << "  ";
275      if (!Name)
276        dbgs() << "<anonymous symbol>";
277      else
278        dbgs() << *Name;
279      dbgs() << ": value = " << formatv("{0:x16}", Value)
280             << ", type = " << formatv("{0:x2}", Type)
281             << ", desc = " << formatv("{0:x4}", Desc) << ", sect = ";
282      if (Sect)
283        dbgs() << static_cast<unsigned>(Sect - 1);
284      else
285        dbgs() << "none";
286      dbgs() << "\n";
287    });
288
289    // If this symbol has a section, sanity check that the addresses line up.
290    if (Sect != 0) {
291      auto NSec = findSectionByIndex(Sect - 1);
292      if (!NSec)
293        return NSec.takeError();
294
295      if (Value < NSec->Address || Value > NSec->Address + NSec->Size)
296        return make_error<JITLinkError>("Symbol address does not fall within "
297                                        "section");
298
299      if (!NSec->GraphSection) {
300        LLVM_DEBUG({
301          dbgs() << "  Skipping: Symbol is in section " << NSec->SegName << "/"
302                 << NSec->SectName
303                 << " which has no associated graph section.\n";
304        });
305        continue;
306      }
307    }
308
309    IndexToSymbol[SymbolIndex] =
310        &createNormalizedSymbol(*Name, Value, Type, Sect, Desc,
311                                getLinkage(Desc), getScope(*Name, Type));
312  }
313
314  return Error::success();
315}
316
317void MachOLinkGraphBuilder::addSectionStartSymAndBlock(
318    Section &GraphSec, uint64_t Address, const char *Data, uint64_t Size,
319    uint32_t Alignment, bool IsLive) {
320  Block &B =
321      Data ? G->createContentBlock(GraphSec, ArrayRef<char>(Data, Size),
322                                   Address, Alignment, 0)
323           : G->createZeroFillBlock(GraphSec, Size, Address, Alignment, 0);
324  auto &Sym = G->addAnonymousSymbol(B, 0, Size, false, IsLive);
325  assert(!AddrToCanonicalSymbol.count(Sym.getAddress()) &&
326         "Anonymous block start symbol clashes with existing symbol address");
327  AddrToCanonicalSymbol[Sym.getAddress()] = &Sym;
328}
329
330Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
331
332  LLVM_DEBUG(dbgs() << "Creating graph symbols...\n");
333
334  /// We only have 256 section indexes: Use a vector rather than a map.
335  std::vector<std::vector<NormalizedSymbol *>> SecIndexToSymbols;
336  SecIndexToSymbols.resize(256);
337
338  // Create commons, externs, and absolutes, and partition all other symbols by
339  // section.
340  for (auto &KV : IndexToSymbol) {
341    auto &NSym = *KV.second;
342
343    switch (NSym.Type & MachO::N_TYPE) {
344    case MachO::N_UNDF:
345      if (NSym.Value) {
346        if (!NSym.Name)
347          return make_error<JITLinkError>("Anonymous common symbol at index " +
348                                          Twine(KV.first));
349        NSym.GraphSymbol = &G->addCommonSymbol(
350            *NSym.Name, NSym.S, getCommonSection(), 0, NSym.Value,
351            1ull << MachO::GET_COMM_ALIGN(NSym.Desc),
352            NSym.Desc & MachO::N_NO_DEAD_STRIP);
353      } else {
354        if (!NSym.Name)
355          return make_error<JITLinkError>("Anonymous external symbol at "
356                                          "index " +
357                                          Twine(KV.first));
358        NSym.GraphSymbol = &G->addExternalSymbol(
359            *NSym.Name, 0,
360            NSym.Desc & MachO::N_WEAK_REF ? Linkage::Weak : Linkage::Strong);
361      }
362      break;
363    case MachO::N_ABS:
364      if (!NSym.Name)
365        return make_error<JITLinkError>("Anonymous absolute symbol at index " +
366                                        Twine(KV.first));
367      NSym.GraphSymbol = &G->addAbsoluteSymbol(
368          *NSym.Name, NSym.Value, 0, Linkage::Strong, Scope::Default,
369          NSym.Desc & MachO::N_NO_DEAD_STRIP);
370      break;
371    case MachO::N_SECT:
372      SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym);
373      break;
374    case MachO::N_PBUD:
375      return make_error<JITLinkError>(
376          "Unupported N_PBUD symbol " +
377          (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
378          " at index " + Twine(KV.first));
379    case MachO::N_INDR:
380      return make_error<JITLinkError>(
381          "Unupported N_INDR symbol " +
382          (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
383          " at index " + Twine(KV.first));
384    default:
385      return make_error<JITLinkError>(
386          "Unrecognized symbol type " + Twine(NSym.Type & MachO::N_TYPE) +
387          " for symbol " +
388          (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
389          " at index " + Twine(KV.first));
390    }
391  }
392
393  // Loop over sections performing regular graphification for those that
394  // don't have custom parsers.
395  for (auto &KV : IndexToSection) {
396    auto SecIndex = KV.first;
397    auto &NSec = KV.second;
398
399    if (!NSec.GraphSection) {
400      LLVM_DEBUG({
401        dbgs() << "  " << NSec.SegName << "/" << NSec.SectName
402               << " has no graph section. Skipping.\n";
403      });
404      continue;
405    }
406
407    // Skip sections with custom parsers.
408    if (CustomSectionParserFunctions.count(NSec.GraphSection->getName())) {
409      LLVM_DEBUG({
410        dbgs() << "  Skipping section " << NSec.GraphSection->getName()
411               << " as it has a custom parser.\n";
412      });
413      continue;
414    } else
415      LLVM_DEBUG({
416        dbgs() << "  Processing section " << NSec.GraphSection->getName()
417               << "...\n";
418      });
419
420    bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP;
421    bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
422
423    auto &SecNSymStack = SecIndexToSymbols[SecIndex];
424
425    // If this section is non-empty but there are no symbols covering it then
426    // create one block and anonymous symbol to cover the entire section.
427    if (SecNSymStack.empty()) {
428      if (NSec.Size > 0) {
429        LLVM_DEBUG({
430          dbgs() << "    Section non-empty, but contains no symbols. "
431                    "Creating anonymous block to cover "
432                 << formatv("{0:x16}", NSec.Address) << " -- "
433                 << formatv("{0:x16}", NSec.Address + NSec.Size) << "\n";
434        });
435        addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
436                                   NSec.Size, NSec.Alignment,
437                                   SectionIsNoDeadStrip);
438      } else
439        LLVM_DEBUG({
440          dbgs() << "    Section empty and contains no symbols. Skipping.\n";
441        });
442      continue;
443    }
444
445    // Sort the symbol stack in by address, alt-entry status, scope, and name.
446    // We sort in reverse order so that symbols will be visited in the right
447    // order when we pop off the stack below.
448    llvm::sort(SecNSymStack, [](const NormalizedSymbol *LHS,
449                                const NormalizedSymbol *RHS) {
450      if (LHS->Value != RHS->Value)
451        return LHS->Value > RHS->Value;
452      if (isAltEntry(*LHS) != isAltEntry(*RHS))
453        return isAltEntry(*RHS);
454      if (LHS->S != RHS->S)
455        return static_cast<uint8_t>(LHS->S) < static_cast<uint8_t>(RHS->S);
456      return LHS->Name < RHS->Name;
457    });
458
459    // The first symbol in a section can not be an alt-entry symbol.
460    if (!SecNSymStack.empty() && isAltEntry(*SecNSymStack.back()))
461      return make_error<JITLinkError>(
462          "First symbol in " + NSec.GraphSection->getName() + " is alt-entry");
463
464    // If the section is non-empty but there is no symbol covering the start
465    // address then add an anonymous one.
466    if (SecNSymStack.back()->Value != NSec.Address) {
467      auto AnonBlockSize = SecNSymStack.back()->Value - NSec.Address;
468      LLVM_DEBUG({
469        dbgs() << "    Section start not covered by symbol. "
470               << "Creating anonymous block to cover [ "
471               << formatv("{0:x16}", NSec.Address) << " -- "
472               << formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n";
473      });
474      addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
475                                 AnonBlockSize, NSec.Alignment,
476                                 SectionIsNoDeadStrip);
477    }
478
479    // Visit section symbols in order by popping off the reverse-sorted stack,
480    // building blocks for each alt-entry chain and creating symbols as we go.
481    while (!SecNSymStack.empty()) {
482      SmallVector<NormalizedSymbol *, 8> BlockSyms;
483
484      BlockSyms.push_back(SecNSymStack.back());
485      SecNSymStack.pop_back();
486      while (!SecNSymStack.empty() &&
487             (isAltEntry(*SecNSymStack.back()) ||
488              SecNSymStack.back()->Value == BlockSyms.back()->Value)) {
489        BlockSyms.push_back(SecNSymStack.back());
490        SecNSymStack.pop_back();
491      }
492
493      // BlockNSyms now contains the block symbols in reverse canonical order.
494      JITTargetAddress BlockStart = BlockSyms.front()->Value;
495      JITTargetAddress BlockEnd = SecNSymStack.empty()
496                                      ? NSec.Address + NSec.Size
497                                      : SecNSymStack.back()->Value;
498      JITTargetAddress BlockOffset = BlockStart - NSec.Address;
499      JITTargetAddress BlockSize = BlockEnd - BlockStart;
500
501      LLVM_DEBUG({
502        dbgs() << "    Creating block for " << formatv("{0:x16}", BlockStart)
503               << " -- " << formatv("{0:x16}", BlockEnd) << ": "
504               << NSec.GraphSection->getName() << " + "
505               << formatv("{0:x16}", BlockOffset) << " with "
506               << BlockSyms.size() << " symbol(s)...\n";
507      });
508
509      Block &B =
510          NSec.Data
511              ? G->createContentBlock(
512                    *NSec.GraphSection,
513                    ArrayRef<char>(NSec.Data + BlockOffset, BlockSize),
514                    BlockStart, NSec.Alignment, BlockStart % NSec.Alignment)
515              : G->createZeroFillBlock(*NSec.GraphSection, BlockSize,
516                                       BlockStart, NSec.Alignment,
517                                       BlockStart % NSec.Alignment);
518
519      Optional<JITTargetAddress> LastCanonicalAddr;
520      JITTargetAddress SymEnd = BlockEnd;
521      while (!BlockSyms.empty()) {
522        auto &NSym = *BlockSyms.back();
523        BlockSyms.pop_back();
524
525        bool SymLive =
526            (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip;
527
528        LLVM_DEBUG({
529          dbgs() << "      " << formatv("{0:x16}", NSym.Value) << " -- "
530                 << formatv("{0:x16}", SymEnd) << ": ";
531          if (!NSym.Name)
532            dbgs() << "<anonymous symbol>";
533          else
534            dbgs() << NSym.Name;
535          if (SymLive)
536            dbgs() << " [no-dead-strip]";
537          if (LastCanonicalAddr == NSym.Value)
538            dbgs() << " [non-canonical]";
539          dbgs() << "\n";
540        });
541
542        auto &Sym =
543            NSym.Name
544                ? G->addDefinedSymbol(B, NSym.Value - BlockStart, *NSym.Name,
545                                      SymEnd - NSym.Value, NSym.L, NSym.S,
546                                      SectionIsText, SymLive)
547                : G->addAnonymousSymbol(B, NSym.Value - BlockStart,
548                                        SymEnd - NSym.Value, SectionIsText,
549                                        SymLive);
550        NSym.GraphSymbol = &Sym;
551        if (LastCanonicalAddr != Sym.getAddress()) {
552          if (LastCanonicalAddr)
553            SymEnd = *LastCanonicalAddr;
554          LastCanonicalAddr = Sym.getAddress();
555          setCanonicalSymbol(Sym);
556        }
557      }
558    }
559  }
560
561  return Error::success();
562}
563
564Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() {
565  // Graphify special sections.
566  for (auto &KV : IndexToSection) {
567    auto &NSec = KV.second;
568
569    // Skip non-graph sections.
570    if (!NSec.GraphSection)
571      continue;
572
573    auto HI = CustomSectionParserFunctions.find(NSec.GraphSection->getName());
574    if (HI != CustomSectionParserFunctions.end()) {
575      auto &Parse = HI->second;
576      if (auto Err = Parse(NSec))
577        return Err;
578    }
579  }
580
581  return Error::success();
582}
583
584} // end namespace jitlink
585} // end namespace llvm
586