1//===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOObject.h"
10#include "llvm/ADT/SmallPtrSet.h"
11#include <unordered_set>
12
13using namespace llvm;
14using namespace llvm::objcopy::macho;
15
16Section::Section(StringRef SegName, StringRef SectName)
17    : Segname(SegName), Sectname(SectName),
18      CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
19
20Section::Section(StringRef SegName, StringRef SectName, StringRef Content)
21    : Segname(SegName), Sectname(SectName),
22      CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
23      Content(Content) {}
24
25const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
26  assert(Index < Symbols.size() && "invalid symbol index");
27  return Symbols[Index].get();
28}
29
30SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
31  return const_cast<SymbolEntry *>(
32      static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
33}
34
35void SymbolTable::removeSymbols(
36    function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
37  llvm::erase_if(Symbols, ToRemove);
38}
39
40void Object::updateLoadCommandIndexes() {
41  static constexpr char TextSegmentName[] = "__TEXT";
42  // Update indices of special load commands
43  for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
44    LoadCommand &LC = LoadCommands[Index];
45    switch (LC.MachOLoadCommand.load_command_data.cmd) {
46    case MachO::LC_CODE_SIGNATURE:
47      CodeSignatureCommandIndex = Index;
48      break;
49    case MachO::LC_SEGMENT:
50      if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
51          TextSegmentName)
52        TextSegmentCommandIndex = Index;
53      break;
54    case MachO::LC_SEGMENT_64:
55      if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
56          TextSegmentName)
57        TextSegmentCommandIndex = Index;
58      break;
59    case MachO::LC_SYMTAB:
60      SymTabCommandIndex = Index;
61      break;
62    case MachO::LC_DYSYMTAB:
63      DySymTabCommandIndex = Index;
64      break;
65    case MachO::LC_DYLD_INFO:
66    case MachO::LC_DYLD_INFO_ONLY:
67      DyLdInfoCommandIndex = Index;
68      break;
69    case MachO::LC_DATA_IN_CODE:
70      DataInCodeCommandIndex = Index;
71      break;
72    case MachO::LC_LINKER_OPTIMIZATION_HINT:
73      LinkerOptimizationHintCommandIndex = Index;
74      break;
75    case MachO::LC_FUNCTION_STARTS:
76      FunctionStartsCommandIndex = Index;
77      break;
78    case MachO::LC_DYLIB_CODE_SIGN_DRS:
79      DylibCodeSignDRsIndex = Index;
80      break;
81    case MachO::LC_DYLD_CHAINED_FIXUPS:
82      ChainedFixupsCommandIndex = Index;
83      break;
84    case MachO::LC_DYLD_EXPORTS_TRIE:
85      ExportsTrieCommandIndex = Index;
86      break;
87    }
88  }
89}
90
91Error Object::removeLoadCommands(
92    function_ref<bool(const LoadCommand &)> ToRemove) {
93  auto It = std::stable_partition(
94      LoadCommands.begin(), LoadCommands.end(),
95      [&](const LoadCommand &LC) { return !ToRemove(LC); });
96  LoadCommands.erase(It, LoadCommands.end());
97
98  updateLoadCommandIndexes();
99  return Error::success();
100}
101
102Error Object::removeSections(
103    function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
104  DenseMap<uint32_t, const Section *> OldIndexToSection;
105  uint32_t NextSectionIndex = 1;
106  for (LoadCommand &LC : LoadCommands) {
107    auto It = std::stable_partition(
108        std::begin(LC.Sections), std::end(LC.Sections),
109        [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
110    for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
111      OldIndexToSection[(*I)->Index] = I->get();
112      (*I)->Index = NextSectionIndex++;
113    }
114    LC.Sections.erase(It, LC.Sections.end());
115  }
116
117  auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
118    std::optional<uint32_t> Section = S->section();
119    return (Section && !OldIndexToSection.count(*Section));
120  };
121
122  SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
123  for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
124    if (IsDead(Sym))
125      DeadSymbols.insert(Sym.get());
126
127  for (const LoadCommand &LC : LoadCommands)
128    for (const std::unique_ptr<Section> &Sec : LC.Sections)
129      for (const RelocationInfo &R : Sec->Relocations)
130        if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
131          return createStringError(std::errc::invalid_argument,
132                                   "symbol '%s' defined in section with index "
133                                   "'%u' cannot be removed because it is "
134                                   "referenced by a relocation in section '%s'",
135                                   (*R.Symbol)->Name.c_str(),
136                                   *((*R.Symbol)->section()),
137                                   Sec->CanonicalName.c_str());
138  SymTable.removeSymbols(IsDead);
139  for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
140    if (S->section())
141      S->n_sect = OldIndexToSection[S->n_sect]->Index;
142  return Error::success();
143}
144
145uint64_t Object::nextAvailableSegmentAddress() const {
146  uint64_t HeaderSize =
147      is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
148  uint64_t Addr = HeaderSize + Header.SizeOfCmds;
149  for (const LoadCommand &LC : LoadCommands) {
150    const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
151    switch (MLC.load_command_data.cmd) {
152    case MachO::LC_SEGMENT:
153      Addr = std::max(Addr,
154                      static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
155                          MLC.segment_command_data.vmsize);
156      break;
157    case MachO::LC_SEGMENT_64:
158      Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
159                                MLC.segment_command_64_data.vmsize);
160      break;
161    default:
162      continue;
163    }
164  }
165  return Addr;
166}
167
168template <typename SegmentType>
169static void
170constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
171                 StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
172  assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
173  memset(&Seg, 0, sizeof(SegmentType));
174  Seg.cmd = CmdType;
175  strncpy(Seg.segname, SegName.data(), SegName.size());
176  Seg.maxprot |=
177      (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
178  Seg.initprot |=
179      (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
180  Seg.vmaddr = SegVMAddr;
181  Seg.vmsize = SegVMSize;
182}
183
184LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
185  LoadCommand LC;
186  const uint64_t SegVMAddr = nextAvailableSegmentAddress();
187  if (is64Bit())
188    constructSegment(LC.MachOLoadCommand.segment_command_64_data,
189                     MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
190  else
191    constructSegment(LC.MachOLoadCommand.segment_command_data,
192                     MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
193
194  LoadCommands.push_back(std::move(LC));
195  return LoadCommands.back();
196}
197
198/// Extracts a segment name from a string which is possibly non-null-terminated.
199static StringRef extractSegmentName(const char *SegName) {
200  return StringRef(SegName,
201                   strnlen(SegName, sizeof(MachO::segment_command::segname)));
202}
203
204std::optional<StringRef> LoadCommand::getSegmentName() const {
205  const MachO::macho_load_command &MLC = MachOLoadCommand;
206  switch (MLC.load_command_data.cmd) {
207  case MachO::LC_SEGMENT:
208    return extractSegmentName(MLC.segment_command_data.segname);
209  case MachO::LC_SEGMENT_64:
210    return extractSegmentName(MLC.segment_command_64_data.segname);
211  default:
212    return std::nullopt;
213  }
214}
215
216std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
217  const MachO::macho_load_command &MLC = MachOLoadCommand;
218  switch (MLC.load_command_data.cmd) {
219  case MachO::LC_SEGMENT:
220    return MLC.segment_command_data.vmaddr;
221  case MachO::LC_SEGMENT_64:
222    return MLC.segment_command_64_data.vmaddr;
223  default:
224    return std::nullopt;
225  }
226}
227