1//===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "obj2yaml.h"
10#include "llvm/DebugInfo/DWARF/DWARFContext.h"
11#include "llvm/Object/MachOUniversal.h"
12#include "llvm/ObjectYAML/DWARFYAML.h"
13#include "llvm/ObjectYAML/ObjectYAML.h"
14#include "llvm/Support/Errc.h"
15#include "llvm/Support/Error.h"
16#include "llvm/Support/ErrorHandling.h"
17#include "llvm/Support/LEB128.h"
18
19#include <string.h> // for memcpy
20
21using namespace llvm;
22
23class MachODumper {
24
25  template <typename StructType>
26  Expected<const char *> processLoadCommandData(
27      MachOYAML::LoadCommand &LC,
28      const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
29      MachOYAML::Object &Y);
30
31  const object::MachOObjectFile &Obj;
32  std::unique_ptr<DWARFContext> DWARFCtx;
33  unsigned RawSegment;
34  void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
35  Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
36  void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
37  void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
38  void dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y);
39  void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
40                       ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
41  void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
42  void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y);
43  void dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y);
44  void dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y);
45  void dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y);
46
47  template <typename SectionType>
48  Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec,
49                                                      size_t SecIndex);
50  template <typename SectionType>
51  Expected<MachOYAML::Section> constructSection(SectionType Sec,
52                                                size_t SecIndex);
53  template <typename SectionType, typename SegmentType>
54  Expected<const char *>
55  extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
56                  std::vector<MachOYAML::Section> &Sections,
57                  MachOYAML::Object &Y);
58
59public:
60  MachODumper(const object::MachOObjectFile &O,
61              std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments)
62      : Obj(O), DWARFCtx(std::move(DCtx)), RawSegment(RawSegments) {}
63  Expected<std::unique_ptr<MachOYAML::Object>> dump();
64};
65
66#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
67  case MachO::LCName:                                                          \
68    memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr,                    \
69           sizeof(MachO::LCStruct));                                           \
70    if (Obj.isLittleEndian() != sys::IsLittleEndianHost)                       \
71      MachO::swapStruct(LC.Data.LCStruct##_data);                              \
72    if (Expected<const char *> ExpectedEndPtr =                                \
73            processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get()))    \
74      EndPtr = *ExpectedEndPtr;                                                \
75    else                                                                       \
76      return ExpectedEndPtr.takeError();                                       \
77    break;
78
79template <typename SectionType>
80Expected<MachOYAML::Section>
81MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) {
82  MachOYAML::Section TempSec;
83  memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
84  memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
85  TempSec.addr = Sec.addr;
86  TempSec.size = Sec.size;
87  TempSec.offset = Sec.offset;
88  TempSec.align = Sec.align;
89  TempSec.reloff = Sec.reloff;
90  TempSec.nreloc = Sec.nreloc;
91  TempSec.flags = Sec.flags;
92  TempSec.reserved1 = Sec.reserved1;
93  TempSec.reserved2 = Sec.reserved2;
94  TempSec.reserved3 = 0;
95  if (!MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE))
96    TempSec.content =
97        yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
98
99  if (Expected<object::SectionRef> SecRef = Obj.getSection(SecIndex)) {
100    TempSec.relocations.reserve(TempSec.nreloc);
101    for (const object::RelocationRef &Reloc : SecRef->relocations()) {
102      const object::DataRefImpl Rel = Reloc.getRawDataRefImpl();
103      const MachO::any_relocation_info RE = Obj.getRelocation(Rel);
104      MachOYAML::Relocation R;
105      R.address = Obj.getAnyRelocationAddress(RE);
106      R.is_pcrel = Obj.getAnyRelocationPCRel(RE);
107      R.length = Obj.getAnyRelocationLength(RE);
108      R.type = Obj.getAnyRelocationType(RE);
109      R.is_scattered = Obj.isRelocationScattered(RE);
110      R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE));
111      R.is_extern =
112          (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE));
113      R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0);
114      TempSec.relocations.push_back(R);
115    }
116  } else {
117    return SecRef.takeError();
118  }
119  return TempSec;
120}
121
122template <>
123Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec,
124                                                           size_t SecIndex) {
125  Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
126  if (TempSec)
127    TempSec->reserved3 = 0;
128  return TempSec;
129}
130
131template <>
132Expected<MachOYAML::Section>
133MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) {
134  Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
135  if (TempSec)
136    TempSec->reserved3 = Sec.reserved3;
137  return TempSec;
138}
139
140static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx,
141                              DWARFYAML::Data &DWARF) {
142  if (SecName == "__debug_abbrev") {
143    dumpDebugAbbrev(DCtx, DWARF);
144    return Error::success();
145  }
146  if (SecName == "__debug_aranges")
147    return dumpDebugARanges(DCtx, DWARF);
148  if (SecName == "__debug_info") {
149    dumpDebugInfo(DCtx, DWARF);
150    return Error::success();
151  }
152  if (SecName == "__debug_line") {
153    dumpDebugLines(DCtx, DWARF);
154    return Error::success();
155  }
156  if (SecName.startswith("__debug_pub")) {
157    // FIXME: We should extract pub-section dumpers from this function.
158    dumpDebugPubSections(DCtx, DWARF);
159    return Error::success();
160  }
161  if (SecName == "__debug_ranges")
162    return dumpDebugRanges(DCtx, DWARF);
163  if (SecName == "__debug_str")
164    return dumpDebugStrings(DCtx, DWARF);
165  return createStringError(errc::not_supported,
166                           "dumping " + SecName + " section is not supported");
167}
168
169template <typename SectionType, typename SegmentType>
170Expected<const char *> MachODumper::extractSections(
171    const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
172    std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) {
173  auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
174  const SectionType *Curr =
175      reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
176  for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
177    SectionType Sec;
178    memcpy((void *)&Sec, Curr, sizeof(SectionType));
179    if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
180      MachO::swapStruct(Sec);
181    // For MachO section indices start from 1.
182    if (Expected<MachOYAML::Section> S =
183            constructSection(Sec, Sections.size() + 1)) {
184      StringRef SecName(S->sectname);
185
186      // Copy data sections if requested.
187      if ((RawSegment & ::RawSegments::data) &&
188          StringRef(S->segname).startswith("__DATA"))
189        S->content =
190            yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
191
192      if (SecName.startswith("__debug_")) {
193        // If the DWARF section cannot be successfully parsed, emit raw content
194        // instead of an entry in the DWARF section of the YAML.
195        if (Error Err = dumpDebugSection(SecName, *DWARFCtx, Y.DWARF))
196          consumeError(std::move(Err));
197        else
198          S->content.reset();
199      }
200      Sections.push_back(std::move(*S));
201    } else
202      return S.takeError();
203  }
204  return reinterpret_cast<const char *>(Curr);
205}
206
207template <typename StructType>
208Expected<const char *> MachODumper::processLoadCommandData(
209    MachOYAML::LoadCommand &LC,
210    const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
211    MachOYAML::Object &Y) {
212  return LoadCmd.Ptr + sizeof(StructType);
213}
214
215template <>
216Expected<const char *>
217MachODumper::processLoadCommandData<MachO::segment_command>(
218    MachOYAML::LoadCommand &LC,
219    const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
220    MachOYAML::Object &Y) {
221  return extractSections<MachO::section, MachO::segment_command>(
222      LoadCmd, LC.Sections, Y);
223}
224
225template <>
226Expected<const char *>
227MachODumper::processLoadCommandData<MachO::segment_command_64>(
228    MachOYAML::LoadCommand &LC,
229    const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
230    MachOYAML::Object &Y) {
231  return extractSections<MachO::section_64, MachO::segment_command_64>(
232      LoadCmd, LC.Sections, Y);
233}
234
235template <typename StructType>
236const char *
237readString(MachOYAML::LoadCommand &LC,
238           const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
239  auto Start = LoadCmd.Ptr + sizeof(StructType);
240  auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
241  auto Size = strnlen(Start, MaxSize);
242  LC.Content = StringRef(Start, Size).str();
243  return Start + Size;
244}
245
246template <>
247Expected<const char *>
248MachODumper::processLoadCommandData<MachO::dylib_command>(
249    MachOYAML::LoadCommand &LC,
250    const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
251    MachOYAML::Object &Y) {
252  return readString<MachO::dylib_command>(LC, LoadCmd);
253}
254
255template <>
256Expected<const char *>
257MachODumper::processLoadCommandData<MachO::dylinker_command>(
258    MachOYAML::LoadCommand &LC,
259    const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
260    MachOYAML::Object &Y) {
261  return readString<MachO::dylinker_command>(LC, LoadCmd);
262}
263
264template <>
265Expected<const char *>
266MachODumper::processLoadCommandData<MachO::rpath_command>(
267    MachOYAML::LoadCommand &LC,
268    const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
269    MachOYAML::Object &Y) {
270  return readString<MachO::rpath_command>(LC, LoadCmd);
271}
272
273template <>
274Expected<const char *>
275MachODumper::processLoadCommandData<MachO::build_version_command>(
276    MachOYAML::LoadCommand &LC,
277    const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
278    MachOYAML::Object &Y) {
279  auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command);
280  auto NTools = LC.Data.build_version_command_data.ntools;
281  for (unsigned i = 0; i < NTools; ++i) {
282    auto Curr = Start + i * sizeof(MachO::build_tool_version);
283    MachO::build_tool_version BV;
284    memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version));
285    if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
286      MachO::swapStruct(BV);
287    LC.Tools.push_back(BV);
288  }
289  return Start + NTools * sizeof(MachO::build_tool_version);
290}
291
292Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
293  auto Y = std::make_unique<MachOYAML::Object>();
294  Y->IsLittleEndian = Obj.isLittleEndian();
295  dumpHeader(Y);
296  if (Error Err = dumpLoadCommands(Y))
297    return std::move(Err);
298  if (RawSegment & ::RawSegments::linkedit)
299    Y->RawLinkEditSegment =
300        yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT"));
301  else
302    dumpLinkEdit(Y);
303
304  return std::move(Y);
305}
306
307void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) {
308  Y->Header.magic = Obj.getHeader().magic;
309  Y->Header.cputype = Obj.getHeader().cputype;
310  Y->Header.cpusubtype = Obj.getHeader().cpusubtype;
311  Y->Header.filetype = Obj.getHeader().filetype;
312  Y->Header.ncmds = Obj.getHeader().ncmds;
313  Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds;
314  Y->Header.flags = Obj.getHeader().flags;
315  Y->Header.reserved = 0;
316}
317
318Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
319  for (auto LoadCmd : Obj.load_commands()) {
320    MachOYAML::LoadCommand LC;
321    const char *EndPtr = LoadCmd.Ptr;
322    switch (LoadCmd.C.cmd) {
323    default:
324      memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr,
325             sizeof(MachO::load_command));
326      if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
327        MachO::swapStruct(LC.Data.load_command_data);
328      if (Expected<const char *> ExpectedEndPtr =
329              processLoadCommandData<MachO::load_command>(LC, LoadCmd, *Y))
330        EndPtr = *ExpectedEndPtr;
331      else
332        return ExpectedEndPtr.takeError();
333      break;
334#include "llvm/BinaryFormat/MachO.def"
335    }
336    auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
337    if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
338                     [](const char C) { return C == 0; })) {
339      LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr,
340                             &EndPtr[RemainingBytes]);
341      RemainingBytes = 0;
342    }
343    LC.ZeroPadBytes = RemainingBytes;
344    Y->LoadCommands.push_back(std::move(LC));
345  }
346  return Error::success();
347}
348
349void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
350  dumpRebaseOpcodes(Y);
351  dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes());
352  dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes,
353                  Obj.getDyldInfoWeakBindOpcodes());
354  dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(),
355                  true);
356  dumpExportTrie(Y);
357  dumpSymbols(Y);
358  dumpIndirectSymbols(Y);
359  dumpFunctionStarts(Y);
360  dumpChainedFixups(Y);
361  dumpDataInCode(Y);
362}
363
364void MachODumper::dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y) {
365  MachOYAML::LinkEditData &LEData = Y->LinkEdit;
366
367  auto FunctionStarts = Obj.getFunctionStarts();
368  for (auto Addr : FunctionStarts)
369    LEData.FunctionStarts.push_back(Addr);
370}
371
372void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
373  MachOYAML::LinkEditData &LEData = Y->LinkEdit;
374
375  auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes();
376  for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end();
377       ++OpCode) {
378    MachOYAML::RebaseOpcode RebaseOp;
379    RebaseOp.Opcode =
380        static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK);
381    RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK;
382
383    unsigned Count;
384    uint64_t ULEB = 0;
385
386    switch (RebaseOp.Opcode) {
387    case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
388
389      ULEB = decodeULEB128(OpCode + 1, &Count);
390      RebaseOp.ExtraData.push_back(ULEB);
391      OpCode += Count;
392      [[fallthrough]];
393    // Intentionally no break here -- This opcode has two ULEB values
394    case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
395    case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
396    case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
397    case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
398
399      ULEB = decodeULEB128(OpCode + 1, &Count);
400      RebaseOp.ExtraData.push_back(ULEB);
401      OpCode += Count;
402      break;
403    default:
404      break;
405    }
406
407    LEData.RebaseOpcodes.push_back(RebaseOp);
408
409    if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE)
410      break;
411  }
412}
413
414StringRef ReadStringRef(const uint8_t *Start) {
415  const uint8_t *Itr = Start;
416  for (; *Itr; ++Itr)
417    ;
418  return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
419}
420
421void MachODumper::dumpBindOpcodes(
422    std::vector<MachOYAML::BindOpcode> &BindOpcodes,
423    ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
424  for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end();
425       ++OpCode) {
426    MachOYAML::BindOpcode BindOp;
427    BindOp.Opcode =
428        static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
429    BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
430
431    unsigned Count;
432    uint64_t ULEB = 0;
433    int64_t SLEB = 0;
434
435    switch (BindOp.Opcode) {
436    case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
437      ULEB = decodeULEB128(OpCode + 1, &Count);
438      BindOp.ULEBExtraData.push_back(ULEB);
439      OpCode += Count;
440      [[fallthrough]];
441    // Intentionally no break here -- this opcode has two ULEB values
442
443    case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
444    case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
445    case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
446    case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
447      ULEB = decodeULEB128(OpCode + 1, &Count);
448      BindOp.ULEBExtraData.push_back(ULEB);
449      OpCode += Count;
450      break;
451
452    case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
453      SLEB = decodeSLEB128(OpCode + 1, &Count);
454      BindOp.SLEBExtraData.push_back(SLEB);
455      OpCode += Count;
456      break;
457
458    case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
459      BindOp.Symbol = ReadStringRef(OpCode + 1);
460      OpCode += BindOp.Symbol.size() + 1;
461      break;
462    default:
463      break;
464    }
465
466    BindOpcodes.push_back(BindOp);
467
468    // Lazy bindings have DONE opcodes between operations, so we need to keep
469    // processing after a DONE.
470    if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE)
471      break;
472  }
473}
474
475/*!
476 * /brief processes a node from the export trie, and its children.
477 *
478 * To my knowledge there is no documentation of the encoded format of this data
479 * other than in the heads of the Apple linker engineers. To that end hopefully
480 * this comment and the implementation below can serve to light the way for
481 * anyone crazy enough to come down this path in the future.
482 *
483 * This function reads and preserves the trie structure of the export trie. To
484 * my knowledge there is no code anywhere else that reads the data and preserves
485 * the Trie. LD64 (sources available at opensource.apple.com) has a similar
486 * implementation that parses the export trie into a vector. That code as well
487 * as LLVM's libObject MachO implementation were the basis for this.
488 *
489 * The export trie is an encoded trie. The node serialization is a bit awkward.
490 * The below pseudo-code is the best description I've come up with for it.
491 *
492 * struct SerializedNode {
493 *   ULEB128 TerminalSize;
494 *   struct TerminalData { <-- This is only present if TerminalSize > 0
495 *     ULEB128 Flags;
496 *     ULEB128 Address; <-- Present if (! Flags & REEXPORT )
497 *     ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
498 *                                     Flags & STUB_AND_RESOLVER )
499 *     char[] ImportName; <-- Present if ( Flags & REEXPORT )
500 *   }
501 *   uint8_t ChildrenCount;
502 *   Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
503 *   SerializedNode Children[ChildrenCount]
504 * }
505 *
506 * Terminal nodes are nodes that represent actual exports. They can appear
507 * anywhere in the tree other than at the root; they do not need to be leaf
508 * nodes. When reading the data out of the trie this routine reads it in-order,
509 * but it puts the child names and offsets directly into the child nodes. This
510 * results in looping over the children twice during serialization and
511 * de-serialization, but it makes the YAML representation more human readable.
512 *
513 * Below is an example of the graph from a "Hello World" executable:
514 *
515 * -------
516 * | ''  |
517 * -------
518 *    |
519 * -------
520 * | '_' |
521 * -------
522 *    |
523 *    |----------------------------------------|
524 *    |                                        |
525 *  ------------------------      ---------------------
526 *  | '_mh_execute_header' |      | 'main'            |
527 *  | Flags: 0x00000000    |      | Flags: 0x00000000 |
528 *  | Addr:  0x00000000    |      | Addr:  0x00001160 |
529 *  ------------------------      ---------------------
530 *
531 * This graph represents the trie for the exports "__mh_execute_header" and
532 * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
533 * terminal.
534*/
535
536const uint8_t *processExportNode(const uint8_t *CurrPtr,
537                                 const uint8_t *const End,
538                                 MachOYAML::ExportEntry &Entry) {
539  if (CurrPtr >= End)
540    return CurrPtr;
541  unsigned Count = 0;
542  Entry.TerminalSize = decodeULEB128(CurrPtr, &Count);
543  CurrPtr += Count;
544  if (Entry.TerminalSize != 0) {
545    Entry.Flags = decodeULEB128(CurrPtr, &Count);
546    CurrPtr += Count;
547    if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
548      Entry.Address = 0;
549      Entry.Other = decodeULEB128(CurrPtr, &Count);
550      CurrPtr += Count;
551      Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
552    } else {
553      Entry.Address = decodeULEB128(CurrPtr, &Count);
554      CurrPtr += Count;
555      if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
556        Entry.Other = decodeULEB128(CurrPtr, &Count);
557        CurrPtr += Count;
558      } else
559        Entry.Other = 0;
560    }
561  }
562  uint8_t childrenCount = *CurrPtr++;
563  if (childrenCount == 0)
564    return CurrPtr;
565
566  Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount,
567                        MachOYAML::ExportEntry());
568  for (auto &Child : Entry.Children) {
569    Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
570    CurrPtr += Child.Name.length() + 1;
571    Child.NodeOffset = decodeULEB128(CurrPtr, &Count);
572    CurrPtr += Count;
573  }
574  for (auto &Child : Entry.Children) {
575    CurrPtr = processExportNode(CurrPtr, End, Child);
576  }
577  return CurrPtr;
578}
579
580void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
581  MachOYAML::LinkEditData &LEData = Y->LinkEdit;
582  // The exports trie can be in LC_DYLD_INFO or LC_DYLD_EXPORTS_TRIE
583  auto ExportsTrie = Obj.getDyldInfoExportsTrie();
584  if (ExportsTrie.empty())
585    ExportsTrie = Obj.getDyldExportsTrie();
586  processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie);
587}
588
589template <typename nlist_t>
590MachOYAML::NListEntry constructNameList(const nlist_t &nlist) {
591  MachOYAML::NListEntry NL;
592  NL.n_strx = nlist.n_strx;
593  NL.n_type = nlist.n_type;
594  NL.n_sect = nlist.n_sect;
595  NL.n_desc = nlist.n_desc;
596  NL.n_value = nlist.n_value;
597  return NL;
598}
599
600void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
601  MachOYAML::LinkEditData &LEData = Y->LinkEdit;
602
603  for (auto Symbol : Obj.symbols()) {
604    MachOYAML::NListEntry NLE =
605        Obj.is64Bit()
606            ? constructNameList<MachO::nlist_64>(
607                  Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
608            : constructNameList<MachO::nlist>(
609                  Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl()));
610    LEData.NameList.push_back(NLE);
611  }
612
613  StringRef RemainingTable = Obj.getStringTableData();
614  while (RemainingTable.size() > 0) {
615    auto SymbolPair = RemainingTable.split('\0');
616    RemainingTable = SymbolPair.second;
617    LEData.StringTable.push_back(SymbolPair.first);
618  }
619}
620
621void MachODumper::dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
622  MachOYAML::LinkEditData &LEData = Y->LinkEdit;
623
624  MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand();
625  for (unsigned i = 0; i < DLC.nindirectsyms; ++i)
626    LEData.IndirectSymbols.push_back(Obj.getIndirectSymbolTableEntry(DLC, i));
627}
628
629void MachODumper::dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y) {
630  MachOYAML::LinkEditData &LEData = Y->LinkEdit;
631
632  for (const auto &LC : Y->LoadCommands) {
633    if (LC.Data.load_command_data.cmd == llvm::MachO::LC_DYLD_CHAINED_FIXUPS) {
634      const MachO::linkedit_data_command &DC =
635          LC.Data.linkedit_data_command_data;
636      if (DC.dataoff) {
637        assert(DC.dataoff < Obj.getData().size());
638        assert(DC.dataoff + DC.datasize <= Obj.getData().size());
639        const char *Bytes = Obj.getData().data() + DC.dataoff;
640        for (size_t Idx = 0; Idx < DC.datasize; Idx++) {
641          LEData.ChainedFixups.push_back(Bytes[Idx]);
642        }
643      }
644      break;
645    }
646  }
647}
648
649void MachODumper::dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y) {
650  MachOYAML::LinkEditData &LEData = Y->LinkEdit;
651
652  MachO::linkedit_data_command DIC = Obj.getDataInCodeLoadCommand();
653  uint32_t NumEntries = DIC.datasize / sizeof(MachO::data_in_code_entry);
654  for (uint32_t Idx = 0; Idx < NumEntries; ++Idx) {
655    MachO::data_in_code_entry DICE =
656        Obj.getDataInCodeTableEntry(DIC.dataoff, Idx);
657    MachOYAML::DataInCodeEntry Entry{DICE.offset, DICE.length, DICE.kind};
658    LEData.DataInCode.emplace_back(Entry);
659  }
660}
661
662Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj,
663                 unsigned RawSegments) {
664  std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
665  MachODumper Dumper(Obj, std::move(DCtx), RawSegments);
666  Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
667  if (!YAML)
668    return YAML.takeError();
669
670  yaml::YamlObjectFile YAMLFile;
671  YAMLFile.MachO = std::move(YAML.get());
672
673  yaml::Output Yout(Out);
674  Yout << YAMLFile;
675  return Error::success();
676}
677
678Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj,
679                 unsigned RawSegments) {
680  yaml::YamlObjectFile YAMLFile;
681  YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
682  MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
683  YAML.Header.magic = Obj.getMagic();
684  YAML.Header.nfat_arch = Obj.getNumberOfObjects();
685
686  for (auto Slice : Obj.objects()) {
687    MachOYAML::FatArch arch;
688    arch.cputype = Slice.getCPUType();
689    arch.cpusubtype = Slice.getCPUSubType();
690    arch.offset = Slice.getOffset();
691    arch.size = Slice.getSize();
692    arch.align = Slice.getAlign();
693    arch.reserved = Slice.getReserved();
694    YAML.FatArchs.push_back(arch);
695
696    auto SliceObj = Slice.getAsObjectFile();
697    if (!SliceObj)
698      return SliceObj.takeError();
699
700    std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
701    MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments);
702    Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
703    if (!YAMLObj)
704      return YAMLObj.takeError();
705    YAML.Slices.push_back(*YAMLObj.get());
706  }
707
708  yaml::Output Yout(Out);
709  Yout << YAML;
710  return Error::success();
711}
712
713Error macho2yaml(raw_ostream &Out, const object::Binary &Binary,
714                 unsigned RawSegments) {
715  if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
716    return macho2yaml(Out, *MachOObj, RawSegments);
717
718  if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
719    return macho2yaml(Out, *MachOObj, RawSegments);
720
721  llvm_unreachable("unexpected Mach-O file format");
722}
723