1//===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOReader.h"
10#include "MachOObject.h"
11#include "llvm/BinaryFormat/MachO.h"
12#include "llvm/Object/MachO.h"
13#include "llvm/Support/Errc.h"
14#include "llvm/Support/SystemZ/zOSSupport.h"
15#include <memory>
16
17using namespace llvm;
18using namespace llvm::objcopy;
19using namespace llvm::objcopy::macho;
20
21void MachOReader::readHeader(Object &O) const {
22  O.Header.Magic = MachOObj.getHeader().magic;
23  O.Header.CPUType = MachOObj.getHeader().cputype;
24  O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
25  O.Header.FileType = MachOObj.getHeader().filetype;
26  O.Header.NCmds = MachOObj.getHeader().ncmds;
27  O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
28  O.Header.Flags = MachOObj.getHeader().flags;
29}
30
31template <typename SectionType>
32static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) {
33  StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
34  StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
35  Section S(SegName, SectName);
36  S.Index = Index;
37  S.Addr = Sec.addr;
38  S.Size = Sec.size;
39  S.OriginalOffset = Sec.offset;
40  S.Align = Sec.align;
41  S.RelOff = Sec.reloff;
42  S.NReloc = Sec.nreloc;
43  S.Flags = Sec.flags;
44  S.Reserved1 = Sec.reserved1;
45  S.Reserved2 = Sec.reserved2;
46  S.Reserved3 = 0;
47  return S;
48}
49
50Section constructSection(const MachO::section &Sec, uint32_t Index) {
51  return constructSectionCommon(Sec, Index);
52}
53
54Section constructSection(const MachO::section_64 &Sec, uint32_t Index) {
55  Section S = constructSectionCommon(Sec, Index);
56  S.Reserved3 = Sec.reserved3;
57  return S;
58}
59
60template <typename SectionType, typename SegmentType>
61Expected<std::vector<std::unique_ptr<Section>>> static extractSections(
62    const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
63    const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) {
64  std::vector<std::unique_ptr<Section>> Sections;
65  for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
66                                                         sizeof(SegmentType)),
67            End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
68                                                        LoadCmd.C.cmdsize);
69       Curr < End; ++Curr) {
70    SectionType Sec;
71    memcpy((void *)&Sec, reinterpret_cast<const char *>(Curr),
72           sizeof(SectionType));
73
74    if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
75      MachO::swapStruct(Sec);
76
77    Sections.push_back(
78        std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));
79
80    Section &S = *Sections.back();
81
82    Expected<object::SectionRef> SecRef =
83        MachOObj.getSection(NextSectionIndex++);
84    if (!SecRef)
85      return SecRef.takeError();
86
87    Expected<ArrayRef<uint8_t>> Data =
88        MachOObj.getSectionContents(SecRef->getRawDataRefImpl());
89    if (!Data)
90      return Data.takeError();
91
92    S.Content =
93        StringRef(reinterpret_cast<const char *>(Data->data()), Data->size());
94
95    const uint32_t CPUType = MachOObj.getHeader().cputype;
96    S.Relocations.reserve(S.NReloc);
97    for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
98              RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());
99         RI != RE; ++RI) {
100      RelocationInfo R;
101      R.Symbol = nullptr; // We'll fill this field later.
102      R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());
103      R.Scattered = MachOObj.isRelocationScattered(R.Info);
104      unsigned Type = MachOObj.getAnyRelocationType(R.Info);
105      // TODO Support CPU_TYPE_ARM.
106      R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 &&
107                                    Type == MachO::ARM64_RELOC_ADDEND);
108      R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info);
109      S.Relocations.push_back(R);
110    }
111
112    assert(S.NReloc == S.Relocations.size() &&
113           "Incorrect number of relocations");
114  }
115  return std::move(Sections);
116}
117
118Error MachOReader::readLoadCommands(Object &O) const {
119  // For MachO sections indices start from 1.
120  uint32_t NextSectionIndex = 1;
121  static constexpr char TextSegmentName[] = "__TEXT";
122  for (auto LoadCmd : MachOObj.load_commands()) {
123    LoadCommand LC;
124    switch (LoadCmd.C.cmd) {
125    case MachO::LC_CODE_SIGNATURE:
126      O.CodeSignatureCommandIndex = O.LoadCommands.size();
127      break;
128    case MachO::LC_SEGMENT:
129      // LoadCmd.Ptr might not be aligned temporarily as
130      // MachO::segment_command requires, but the segname char pointer do not
131      // have alignment restrictions.
132      if (StringRef(reinterpret_cast<const char *>(
133              LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) ==
134          TextSegmentName)
135        O.TextSegmentCommandIndex = O.LoadCommands.size();
136
137      if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
138              extractSections<MachO::section, MachO::segment_command>(
139                  LoadCmd, MachOObj, NextSectionIndex))
140        LC.Sections = std::move(*Sections);
141      else
142        return Sections.takeError();
143      break;
144    case MachO::LC_SEGMENT_64:
145      // LoadCmd.Ptr might not be aligned temporarily as
146      // MachO::segment_command_64 requires, but the segname char pointer do
147      // not have alignment restrictions.
148      if (StringRef(reinterpret_cast<const char *>(
149              LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) ==
150          TextSegmentName)
151        O.TextSegmentCommandIndex = O.LoadCommands.size();
152
153      if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
154              extractSections<MachO::section_64, MachO::segment_command_64>(
155                  LoadCmd, MachOObj, NextSectionIndex))
156        LC.Sections = std::move(*Sections);
157      else
158        return Sections.takeError();
159      break;
160    case MachO::LC_SYMTAB:
161      O.SymTabCommandIndex = O.LoadCommands.size();
162      break;
163    case MachO::LC_DYSYMTAB:
164      O.DySymTabCommandIndex = O.LoadCommands.size();
165      break;
166    case MachO::LC_DYLD_INFO:
167    case MachO::LC_DYLD_INFO_ONLY:
168      O.DyLdInfoCommandIndex = O.LoadCommands.size();
169      break;
170    case MachO::LC_DATA_IN_CODE:
171      O.DataInCodeCommandIndex = O.LoadCommands.size();
172      break;
173    case MachO::LC_LINKER_OPTIMIZATION_HINT:
174      O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size();
175      break;
176    case MachO::LC_FUNCTION_STARTS:
177      O.FunctionStartsCommandIndex = O.LoadCommands.size();
178      break;
179    case MachO::LC_DYLIB_CODE_SIGN_DRS:
180      O.DylibCodeSignDRsIndex = O.LoadCommands.size();
181      break;
182    case MachO::LC_DYLD_EXPORTS_TRIE:
183      O.ExportsTrieCommandIndex = O.LoadCommands.size();
184      break;
185    case MachO::LC_DYLD_CHAINED_FIXUPS:
186      O.ChainedFixupsCommandIndex = O.LoadCommands.size();
187      break;
188    }
189#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
190  case MachO::LCName:                                                          \
191    memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr,        \
192           sizeof(MachO::LCStruct));                                           \
193    if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)                  \
194      MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data);                  \
195    if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct))                           \
196      LC.Payload = ArrayRef<uint8_t>(                                          \
197          reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +       \
198              sizeof(MachO::LCStruct),                                         \
199          LoadCmd.C.cmdsize - sizeof(MachO::LCStruct));                        \
200    break;
201
202    switch (LoadCmd.C.cmd) {
203    default:
204      memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr,
205             sizeof(MachO::load_command));
206      if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
207        MachO::swapStruct(LC.MachOLoadCommand.load_command_data);
208      if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))
209        LC.Payload = ArrayRef<uint8_t>(
210            reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
211                sizeof(MachO::load_command),
212            LoadCmd.C.cmdsize - sizeof(MachO::load_command));
213      break;
214#include "llvm/BinaryFormat/MachO.def"
215    }
216    O.LoadCommands.push_back(std::move(LC));
217  }
218  return Error::success();
219}
220
221template <typename nlist_t>
222SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
223  assert(nlist.n_strx < StrTable.size() &&
224         "n_strx exceeds the size of the string table");
225  SymbolEntry SE;
226  SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
227  SE.n_type = nlist.n_type;
228  SE.n_sect = nlist.n_sect;
229  SE.n_desc = nlist.n_desc;
230  SE.n_value = nlist.n_value;
231  return SE;
232}
233
234void MachOReader::readSymbolTable(Object &O) const {
235  StringRef StrTable = MachOObj.getStringTableData();
236  for (auto Symbol : MachOObj.symbols()) {
237    SymbolEntry SE =
238        (MachOObj.is64Bit()
239             ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry(
240                                                  Symbol.getRawDataRefImpl()))
241             : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry(
242                                                  Symbol.getRawDataRefImpl())));
243
244    O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));
245  }
246}
247
248void MachOReader::setSymbolInRelocationInfo(Object &O) const {
249  std::vector<const Section *> Sections;
250  for (auto &LC : O.LoadCommands)
251    for (std::unique_ptr<Section> &Sec : LC.Sections)
252      Sections.push_back(Sec.get());
253
254  for (LoadCommand &LC : O.LoadCommands)
255    for (std::unique_ptr<Section> &Sec : LC.Sections)
256      for (auto &Reloc : Sec->Relocations)
257        if (!Reloc.Scattered && !Reloc.IsAddend) {
258          const uint32_t SymbolNum =
259              Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian());
260          if (Reloc.Extern) {
261            Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum);
262          } else {
263            // FIXME: Refactor error handling in MachOReader and report an error
264            // if we encounter an invalid relocation.
265            assert(SymbolNum >= 1 && SymbolNum <= Sections.size() &&
266                   "Invalid section index.");
267            Reloc.Sec = Sections[SymbolNum - 1];
268          }
269        }
270}
271
272void MachOReader::readRebaseInfo(Object &O) const {
273  O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
274}
275
276void MachOReader::readBindInfo(Object &O) const {
277  O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
278}
279
280void MachOReader::readWeakBindInfo(Object &O) const {
281  O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
282}
283
284void MachOReader::readLazyBindInfo(Object &O) const {
285  O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
286}
287
288void MachOReader::readExportInfo(Object &O) const {
289  // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE
290  ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie();
291  if (Trie.empty())
292    Trie = MachOObj.getDyldExportsTrie();
293  O.Exports.Trie = Trie;
294}
295
296void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex,
297                               LinkData &LD) const {
298  if (!LCIndex)
299    return;
300  const MachO::linkedit_data_command &LC =
301      O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;
302  LD.Data =
303      arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize));
304}
305
306void MachOReader::readDataInCodeData(Object &O) const {
307  return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode);
308}
309
310void MachOReader::readLinkerOptimizationHint(Object &O) const {
311  return readLinkData(O, O.LinkerOptimizationHintCommandIndex,
312                      O.LinkerOptimizationHint);
313}
314
315void MachOReader::readFunctionStartsData(Object &O) const {
316  return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts);
317}
318
319void MachOReader::readDylibCodeSignDRs(Object &O) const {
320  return readLinkData(O, O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs);
321}
322
323void MachOReader::readExportsTrie(Object &O) const {
324  return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie);
325}
326
327void MachOReader::readChainedFixups(Object &O) const {
328  return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups);
329}
330
331void MachOReader::readIndirectSymbolTable(Object &O) const {
332  MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
333  constexpr uint32_t AbsOrLocalMask =
334      MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
335  for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
336    uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i);
337    if ((Index & AbsOrLocalMask) != 0)
338      O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt);
339    else
340      O.IndirectSymTable.Symbols.emplace_back(
341          Index, O.SymTable.getSymbolByIndex(Index));
342  }
343}
344
345void MachOReader::readSwiftVersion(Object &O) const {
346  struct ObjCImageInfo {
347    uint32_t Version;
348    uint32_t Flags;
349  } ImageInfo;
350
351  for (const LoadCommand &LC : O.LoadCommands)
352    for (const std::unique_ptr<Section> &Sec : LC.Sections)
353      if (Sec->Sectname == "__objc_imageinfo" &&
354          (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" ||
355           Sec->Segname == "__DATA_DIRTY") &&
356          Sec->Content.size() >= sizeof(ObjCImageInfo)) {
357        memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo));
358        if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
359          sys::swapByteOrder(ImageInfo.Version);
360          sys::swapByteOrder(ImageInfo.Flags);
361        }
362        O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff;
363        return;
364      }
365}
366
367Expected<std::unique_ptr<Object>> MachOReader::create() const {
368  auto Obj = std::make_unique<Object>();
369  readHeader(*Obj);
370  if (Error E = readLoadCommands(*Obj))
371    return std::move(E);
372  readSymbolTable(*Obj);
373  setSymbolInRelocationInfo(*Obj);
374  readRebaseInfo(*Obj);
375  readBindInfo(*Obj);
376  readWeakBindInfo(*Obj);
377  readLazyBindInfo(*Obj);
378  readExportInfo(*Obj);
379  readDataInCodeData(*Obj);
380  readLinkerOptimizationHint(*Obj);
381  readFunctionStartsData(*Obj);
382  readDylibCodeSignDRs(*Obj);
383  readExportsTrie(*Obj);
384  readChainedFixups(*Obj);
385  readIndirectSymbolTable(*Obj);
386  readSwiftVersion(*Obj);
387  return std::move(Obj);
388}
389