1//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOLayoutBuilder.h"
10#include "llvm/Support/Alignment.h"
11#include "llvm/Support/Errc.h"
12#include "llvm/Support/ErrorHandling.h"
13#include "llvm/Support/SystemZ/zOSSupport.h"
14
15using namespace llvm;
16using namespace llvm::objcopy::macho;
17
18StringTableBuilder::Kind
19MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) {
20  if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT)
21    return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO;
22  return Is64Bit ? StringTableBuilder::MachO64Linked
23                 : StringTableBuilder::MachOLinked;
24}
25
26uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
27  uint32_t Size = 0;
28  for (const LoadCommand &LC : O.LoadCommands) {
29    const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
30    auto cmd = MLC.load_command_data.cmd;
31    switch (cmd) {
32    case MachO::LC_SEGMENT:
33      Size += sizeof(MachO::segment_command) +
34              sizeof(MachO::section) * LC.Sections.size();
35      continue;
36    case MachO::LC_SEGMENT_64:
37      Size += sizeof(MachO::segment_command_64) +
38              sizeof(MachO::section_64) * LC.Sections.size();
39      continue;
40    }
41
42    switch (cmd) {
43#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
44  case MachO::LCName:                                                          \
45    Size += sizeof(MachO::LCStruct) + LC.Payload.size();                       \
46    break;
47#include "llvm/BinaryFormat/MachO.def"
48#undef HANDLE_LOAD_COMMAND
49    }
50  }
51
52  return Size;
53}
54
55void MachOLayoutBuilder::constructStringTable() {
56  for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
57    StrTableBuilder.add(Sym->Name);
58  StrTableBuilder.finalize();
59}
60
61void MachOLayoutBuilder::updateSymbolIndexes() {
62  uint32_t Index = 0;
63  for (auto &Symbol : O.SymTable.Symbols)
64    Symbol->Index = Index++;
65}
66
67// Updates the index and the number of local/external/undefined symbols.
68void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
69  assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
70  // Make sure that nlist entries in the symbol table are sorted by the those
71  // types. The order is: local < defined external < undefined external.
72  assert(llvm::is_sorted(O.SymTable.Symbols,
73                         [](const std::unique_ptr<SymbolEntry> &A,
74                            const std::unique_ptr<SymbolEntry> &B) {
75                           bool AL = A->isLocalSymbol(),
76                                BL = B->isLocalSymbol();
77                           if (AL != BL)
78                             return AL;
79                           return !AL && !A->isUndefinedSymbol() &&
80                                  B->isUndefinedSymbol();
81                         }) &&
82         "Symbols are not sorted by their types.");
83
84  uint32_t NumLocalSymbols = 0;
85  auto Iter = O.SymTable.Symbols.begin();
86  auto End = O.SymTable.Symbols.end();
87  for (; Iter != End; ++Iter) {
88    if ((*Iter)->isExternalSymbol())
89      break;
90
91    ++NumLocalSymbols;
92  }
93
94  uint32_t NumExtDefSymbols = 0;
95  for (; Iter != End; ++Iter) {
96    if ((*Iter)->isUndefinedSymbol())
97      break;
98
99    ++NumExtDefSymbols;
100  }
101
102  MLC.dysymtab_command_data.ilocalsym = 0;
103  MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
104  MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
105  MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
106  MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
107  MLC.dysymtab_command_data.nundefsym =
108      O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
109}
110
111// Recomputes and updates offset and size fields in load commands and sections
112// since they could be modified.
113uint64_t MachOLayoutBuilder::layoutSegments() {
114  auto HeaderSize =
115      Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
116  const bool IsObjectFile =
117      O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
118  uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
119  for (LoadCommand &LC : O.LoadCommands) {
120    auto &MLC = LC.MachOLoadCommand;
121    StringRef Segname;
122    uint64_t SegmentVmAddr;
123    uint64_t SegmentVmSize;
124    switch (MLC.load_command_data.cmd) {
125    case MachO::LC_SEGMENT:
126      SegmentVmAddr = MLC.segment_command_data.vmaddr;
127      SegmentVmSize = MLC.segment_command_data.vmsize;
128      Segname = StringRef(MLC.segment_command_data.segname,
129                          strnlen(MLC.segment_command_data.segname,
130                                  sizeof(MLC.segment_command_data.segname)));
131      break;
132    case MachO::LC_SEGMENT_64:
133      SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
134      SegmentVmSize = MLC.segment_command_64_data.vmsize;
135      Segname = StringRef(MLC.segment_command_64_data.segname,
136                          strnlen(MLC.segment_command_64_data.segname,
137                                  sizeof(MLC.segment_command_64_data.segname)));
138      break;
139    default:
140      continue;
141    }
142
143    if (Segname == "__LINKEDIT") {
144      // We update the __LINKEDIT segment later (in layoutTail).
145      assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
146      LinkEditLoadCommand = &MLC;
147      continue;
148    }
149
150    // Update file offsets and sizes of sections.
151    uint64_t SegOffset = Offset;
152    uint64_t SegFileSize = 0;
153    uint64_t VMSize = 0;
154    for (std::unique_ptr<Section> &Sec : LC.Sections) {
155      assert(SegmentVmAddr <= Sec->Addr &&
156             "Section's address cannot be smaller than Segment's one");
157      uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
158      if (IsObjectFile) {
159        if (!Sec->hasValidOffset()) {
160          Sec->Offset = 0;
161        } else {
162          uint64_t PaddingSize =
163              offsetToAlignment(SegFileSize, Align(1ull << Sec->Align));
164          Sec->Offset = SegOffset + SegFileSize + PaddingSize;
165          Sec->Size = Sec->Content.size();
166          SegFileSize += PaddingSize + Sec->Size;
167        }
168      } else {
169        if (!Sec->hasValidOffset()) {
170          Sec->Offset = 0;
171        } else {
172          Sec->Offset = SegOffset + SectOffset;
173          Sec->Size = Sec->Content.size();
174          SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
175        }
176      }
177      VMSize = std::max(VMSize, SectOffset + Sec->Size);
178    }
179
180    if (IsObjectFile) {
181      Offset += SegFileSize;
182    } else {
183      Offset = alignTo(Offset + SegFileSize, PageSize);
184      SegFileSize = alignTo(SegFileSize, PageSize);
185      // Use the original vmsize if the segment is __PAGEZERO.
186      VMSize =
187          Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
188    }
189
190    switch (MLC.load_command_data.cmd) {
191    case MachO::LC_SEGMENT:
192      MLC.segment_command_data.cmdsize =
193          sizeof(MachO::segment_command) +
194          sizeof(MachO::section) * LC.Sections.size();
195      MLC.segment_command_data.nsects = LC.Sections.size();
196      MLC.segment_command_data.fileoff = SegOffset;
197      MLC.segment_command_data.vmsize = VMSize;
198      MLC.segment_command_data.filesize = SegFileSize;
199      break;
200    case MachO::LC_SEGMENT_64:
201      MLC.segment_command_64_data.cmdsize =
202          sizeof(MachO::segment_command_64) +
203          sizeof(MachO::section_64) * LC.Sections.size();
204      MLC.segment_command_64_data.nsects = LC.Sections.size();
205      MLC.segment_command_64_data.fileoff = SegOffset;
206      MLC.segment_command_64_data.vmsize = VMSize;
207      MLC.segment_command_64_data.filesize = SegFileSize;
208      break;
209    }
210  }
211
212  return Offset;
213}
214
215uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
216  for (LoadCommand &LC : O.LoadCommands)
217    for (std::unique_ptr<Section> &Sec : LC.Sections) {
218      Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset;
219      Sec->NReloc = Sec->Relocations.size();
220      Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc;
221    }
222
223  return Offset;
224}
225
226Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
227  // If we are building the layout of an executable or dynamic library
228  // which does not have any segments other than __LINKEDIT,
229  // the Offset can be equal to zero by this time. It happens because of the
230  // convention that in such cases the file offsets specified by LC_SEGMENT
231  // start with zero (unlike the case of a relocatable object file).
232  const uint64_t HeaderSize =
233      Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
234  assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) ||
235          Offset >= HeaderSize + O.Header.SizeOfCmds) &&
236         "Incorrect tail offset");
237  Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);
238
239  // The exports trie can be in either LC_DYLD_INFO or in
240  // LC_DYLD_EXPORTS_TRIE, but not both.
241  size_t DyldInfoExportsTrieSize = 0;
242  size_t DyldExportsTrieSize = 0;
243  for (const auto &LC : O.LoadCommands) {
244    switch (LC.MachOLoadCommand.load_command_data.cmd) {
245    case MachO::LC_DYLD_INFO:
246    case MachO::LC_DYLD_INFO_ONLY:
247      DyldInfoExportsTrieSize = O.Exports.Trie.size();
248      break;
249    case MachO::LC_DYLD_EXPORTS_TRIE:
250      DyldExportsTrieSize = O.Exports.Trie.size();
251      break;
252    default:
253      break;
254    }
255  }
256  assert((DyldInfoExportsTrieSize == 0 || DyldExportsTrieSize == 0) &&
257         "Export trie in both LCs");
258
259  uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
260  uint64_t StartOfLinkEdit = Offset;
261
262  // The order of LINKEDIT elements is as follows:
263  // rebase info, binding info, weak binding info, lazy binding info, export
264  // trie, chained fixups, dyld exports trie, function starts, data-in-code,
265  // symbol table, indirect symbol table, symbol table strings,
266  // dylib codesign drs, and code signature.
267  auto updateOffset = [&Offset](size_t Size) {
268    uint64_t PreviousOffset = Offset;
269    Offset += Size;
270    return PreviousOffset;
271  };
272
273  uint64_t StartOfRebaseInfo = updateOffset(O.Rebases.Opcodes.size());
274  uint64_t StartOfBindingInfo = updateOffset(O.Binds.Opcodes.size());
275  uint64_t StartOfWeakBindingInfo = updateOffset(O.WeakBinds.Opcodes.size());
276  uint64_t StartOfLazyBindingInfo = updateOffset(O.LazyBinds.Opcodes.size());
277  uint64_t StartOfExportTrie = updateOffset(DyldInfoExportsTrieSize);
278  uint64_t StartOfChainedFixups = updateOffset(O.ChainedFixups.Data.size());
279  uint64_t StartOfDyldExportsTrie = updateOffset(DyldExportsTrieSize);
280  uint64_t StartOfFunctionStarts = updateOffset(O.FunctionStarts.Data.size());
281  uint64_t StartOfDataInCode = updateOffset(O.DataInCode.Data.size());
282  uint64_t StartOfLinkerOptimizationHint =
283      updateOffset(O.LinkerOptimizationHint.Data.size());
284  uint64_t StartOfSymbols = updateOffset(NListSize * O.SymTable.Symbols.size());
285  uint64_t StartOfIndirectSymbols =
286      updateOffset(sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
287  uint64_t StartOfSymbolStrings = updateOffset(StrTableBuilder.getSize());
288  uint64_t StartOfDylibCodeSignDRs = updateOffset(O.DylibCodeSignDRs.Data.size());
289
290  uint64_t StartOfCodeSignature = Offset;
291  uint32_t CodeSignatureSize = 0;
292  if (O.CodeSignatureCommandIndex) {
293    StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
294
295    // Note: These calculations are to be kept in sync with the same
296    // calculations performed in LLD's CodeSignatureSection.
297    const uint32_t AllHeadersSize =
298        alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1,
299                CodeSignature.Align);
300    const uint32_t BlockCount =
301        (StartOfCodeSignature + CodeSignature.BlockSize - 1) /
302        CodeSignature.BlockSize;
303    const uint32_t Size =
304        alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize,
305                CodeSignature.Align);
306
307    CodeSignature.StartOffset = StartOfCodeSignature;
308    CodeSignature.AllHeadersSize = AllHeadersSize;
309    CodeSignature.BlockCount = BlockCount;
310    CodeSignature.OutputFileName = OutputFileName;
311    CodeSignature.Size = Size;
312    CodeSignatureSize = Size;
313  }
314  uint64_t LinkEditSize =
315      StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit;
316
317  // Now we have determined the layout of the contents of the __LINKEDIT
318  // segment. Update its load command.
319  if (LinkEditLoadCommand) {
320    MachO::macho_load_command *MLC = LinkEditLoadCommand;
321    switch (LinkEditLoadCommand->load_command_data.cmd) {
322    case MachO::LC_SEGMENT:
323      MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
324      MLC->segment_command_data.fileoff = StartOfLinkEdit;
325      MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
326      MLC->segment_command_data.filesize = LinkEditSize;
327      break;
328    case MachO::LC_SEGMENT_64:
329      MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
330      MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
331      MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
332      MLC->segment_command_64_data.filesize = LinkEditSize;
333      break;
334    }
335  }
336
337  for (LoadCommand &LC : O.LoadCommands) {
338    auto &MLC = LC.MachOLoadCommand;
339    auto cmd = MLC.load_command_data.cmd;
340    switch (cmd) {
341    case MachO::LC_CODE_SIGNATURE:
342      MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
343      MLC.linkedit_data_command_data.datasize = CodeSignatureSize;
344      break;
345    case MachO::LC_DYLIB_CODE_SIGN_DRS:
346      MLC.linkedit_data_command_data.dataoff = StartOfDylibCodeSignDRs;
347      MLC.linkedit_data_command_data.datasize = O.DylibCodeSignDRs.Data.size();
348      break;
349    case MachO::LC_SYMTAB:
350      MLC.symtab_command_data.symoff = StartOfSymbols;
351      MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
352      MLC.symtab_command_data.stroff = StartOfSymbolStrings;
353      MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
354      break;
355    case MachO::LC_DYSYMTAB: {
356      if (MLC.dysymtab_command_data.ntoc != 0 ||
357          MLC.dysymtab_command_data.nmodtab != 0 ||
358          MLC.dysymtab_command_data.nextrefsyms != 0 ||
359          MLC.dysymtab_command_data.nlocrel != 0 ||
360          MLC.dysymtab_command_data.nextrel != 0)
361        return createStringError(llvm::errc::not_supported,
362                                 "shared library is not yet supported");
363
364      if (!O.IndirectSymTable.Symbols.empty()) {
365        MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
366        MLC.dysymtab_command_data.nindirectsyms =
367            O.IndirectSymTable.Symbols.size();
368      }
369
370      updateDySymTab(MLC);
371      break;
372    }
373    case MachO::LC_DATA_IN_CODE:
374      MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
375      MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
376      break;
377    case MachO::LC_LINKER_OPTIMIZATION_HINT:
378      MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint;
379      MLC.linkedit_data_command_data.datasize =
380          O.LinkerOptimizationHint.Data.size();
381      break;
382    case MachO::LC_FUNCTION_STARTS:
383      MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
384      MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
385      break;
386    case MachO::LC_DYLD_CHAINED_FIXUPS:
387      MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups;
388      MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size();
389      break;
390    case MachO::LC_DYLD_EXPORTS_TRIE:
391      MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie;
392      MLC.linkedit_data_command_data.datasize = DyldExportsTrieSize;
393      break;
394    case MachO::LC_DYLD_INFO:
395    case MachO::LC_DYLD_INFO_ONLY:
396      MLC.dyld_info_command_data.rebase_off =
397          O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
398      MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
399      MLC.dyld_info_command_data.bind_off =
400          O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
401      MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
402      MLC.dyld_info_command_data.weak_bind_off =
403          O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
404      MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
405      MLC.dyld_info_command_data.lazy_bind_off =
406          O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
407      MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
408      MLC.dyld_info_command_data.export_off =
409          O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
410      MLC.dyld_info_command_data.export_size = DyldInfoExportsTrieSize;
411      break;
412    // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
413    // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
414    // relative virtual address. At the moment modification of the __TEXT
415    // segment of executables isn't supported anyway (e.g. data in code entries
416    // are not recalculated). Moreover, in general
417    // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
418    // without making additional assumptions (e.g. that the entire __TEXT
419    // segment should be encrypted) we do not know how to recalculate the
420    // boundaries of the encrypted part. For now just copy over these load
421    // commands until we encounter a real world usecase where
422    // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
423    case MachO::LC_ENCRYPTION_INFO:
424    case MachO::LC_ENCRYPTION_INFO_64:
425    case MachO::LC_LOAD_DYLINKER:
426    case MachO::LC_MAIN:
427    case MachO::LC_RPATH:
428    case MachO::LC_SEGMENT:
429    case MachO::LC_SEGMENT_64:
430    case MachO::LC_VERSION_MIN_MACOSX:
431    case MachO::LC_VERSION_MIN_IPHONEOS:
432    case MachO::LC_VERSION_MIN_TVOS:
433    case MachO::LC_VERSION_MIN_WATCHOS:
434    case MachO::LC_BUILD_VERSION:
435    case MachO::LC_ID_DYLIB:
436    case MachO::LC_LOAD_DYLIB:
437    case MachO::LC_LOAD_WEAK_DYLIB:
438    case MachO::LC_UUID:
439    case MachO::LC_SOURCE_VERSION:
440    case MachO::LC_THREAD:
441    case MachO::LC_UNIXTHREAD:
442    case MachO::LC_SUB_FRAMEWORK:
443    case MachO::LC_SUB_UMBRELLA:
444    case MachO::LC_SUB_CLIENT:
445    case MachO::LC_SUB_LIBRARY:
446    case MachO::LC_LINKER_OPTION:
447      // Nothing to update.
448      break;
449    default:
450      // Abort if it's unsupported in order to prevent corrupting the object.
451      return createStringError(llvm::errc::not_supported,
452                               "unsupported load command (cmd=0x%x)", cmd);
453    }
454  }
455
456  return Error::success();
457}
458
459Error MachOLayoutBuilder::layout() {
460  O.Header.NCmds = O.LoadCommands.size();
461  O.Header.SizeOfCmds = computeSizeOfCmds();
462  constructStringTable();
463  updateSymbolIndexes();
464  uint64_t Offset = layoutSegments();
465  Offset = layoutRelocations(Offset);
466  return layoutTail(Offset);
467}
468