1//===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOObject.h"
10#include "llvm/ADT/SmallPtrSet.h"
11#include "llvm/Support/SystemZ/zOSSupport.h"
12#include <unordered_set>
13
14using namespace llvm;
15using namespace llvm::objcopy::macho;
16
17Section::Section(StringRef SegName, StringRef SectName)
18    : Segname(SegName), Sectname(SectName),
19      CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
20
21Section::Section(StringRef SegName, StringRef SectName, StringRef Content)
22    : Segname(SegName), Sectname(SectName),
23      CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
24      Content(Content) {}
25
26const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
27  assert(Index < Symbols.size() && "invalid symbol index");
28  return Symbols[Index].get();
29}
30
31SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
32  return const_cast<SymbolEntry *>(
33      static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
34}
35
36void SymbolTable::removeSymbols(
37    function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
38  llvm::erase_if(Symbols, ToRemove);
39}
40
41void Object::updateLoadCommandIndexes() {
42  static constexpr char TextSegmentName[] = "__TEXT";
43  // Update indices of special load commands
44  for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
45    LoadCommand &LC = LoadCommands[Index];
46    switch (LC.MachOLoadCommand.load_command_data.cmd) {
47    case MachO::LC_CODE_SIGNATURE:
48      CodeSignatureCommandIndex = Index;
49      break;
50    case MachO::LC_SEGMENT:
51      if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
52          TextSegmentName)
53        TextSegmentCommandIndex = Index;
54      break;
55    case MachO::LC_SEGMENT_64:
56      if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
57          TextSegmentName)
58        TextSegmentCommandIndex = Index;
59      break;
60    case MachO::LC_SYMTAB:
61      SymTabCommandIndex = Index;
62      break;
63    case MachO::LC_DYSYMTAB:
64      DySymTabCommandIndex = Index;
65      break;
66    case MachO::LC_DYLD_INFO:
67    case MachO::LC_DYLD_INFO_ONLY:
68      DyLdInfoCommandIndex = Index;
69      break;
70    case MachO::LC_DATA_IN_CODE:
71      DataInCodeCommandIndex = Index;
72      break;
73    case MachO::LC_LINKER_OPTIMIZATION_HINT:
74      LinkerOptimizationHintCommandIndex = Index;
75      break;
76    case MachO::LC_FUNCTION_STARTS:
77      FunctionStartsCommandIndex = Index;
78      break;
79    case MachO::LC_DYLIB_CODE_SIGN_DRS:
80      DylibCodeSignDRsIndex = Index;
81      break;
82    case MachO::LC_DYLD_CHAINED_FIXUPS:
83      ChainedFixupsCommandIndex = Index;
84      break;
85    case MachO::LC_DYLD_EXPORTS_TRIE:
86      ExportsTrieCommandIndex = Index;
87      break;
88    }
89  }
90}
91
92Error Object::removeLoadCommands(
93    function_ref<bool(const LoadCommand &)> ToRemove) {
94  auto It = std::stable_partition(
95      LoadCommands.begin(), LoadCommands.end(),
96      [&](const LoadCommand &LC) { return !ToRemove(LC); });
97  LoadCommands.erase(It, LoadCommands.end());
98
99  updateLoadCommandIndexes();
100  return Error::success();
101}
102
103Error Object::removeSections(
104    function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
105  DenseMap<uint32_t, const Section *> OldIndexToSection;
106  uint32_t NextSectionIndex = 1;
107  for (LoadCommand &LC : LoadCommands) {
108    auto It = std::stable_partition(
109        std::begin(LC.Sections), std::end(LC.Sections),
110        [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
111    for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
112      OldIndexToSection[(*I)->Index] = I->get();
113      (*I)->Index = NextSectionIndex++;
114    }
115    LC.Sections.erase(It, LC.Sections.end());
116  }
117
118  auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
119    std::optional<uint32_t> Section = S->section();
120    return (Section && !OldIndexToSection.count(*Section));
121  };
122
123  SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
124  for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
125    if (IsDead(Sym))
126      DeadSymbols.insert(Sym.get());
127
128  for (const LoadCommand &LC : LoadCommands)
129    for (const std::unique_ptr<Section> &Sec : LC.Sections)
130      for (const RelocationInfo &R : Sec->Relocations)
131        if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
132          return createStringError(std::errc::invalid_argument,
133                                   "symbol '%s' defined in section with index "
134                                   "'%u' cannot be removed because it is "
135                                   "referenced by a relocation in section '%s'",
136                                   (*R.Symbol)->Name.c_str(),
137                                   *((*R.Symbol)->section()),
138                                   Sec->CanonicalName.c_str());
139  SymTable.removeSymbols(IsDead);
140  for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
141    if (S->section())
142      S->n_sect = OldIndexToSection[S->n_sect]->Index;
143  return Error::success();
144}
145
146uint64_t Object::nextAvailableSegmentAddress() const {
147  uint64_t HeaderSize =
148      is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
149  uint64_t Addr = HeaderSize + Header.SizeOfCmds;
150  for (const LoadCommand &LC : LoadCommands) {
151    const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
152    switch (MLC.load_command_data.cmd) {
153    case MachO::LC_SEGMENT:
154      Addr = std::max(Addr,
155                      static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
156                          MLC.segment_command_data.vmsize);
157      break;
158    case MachO::LC_SEGMENT_64:
159      Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
160                                MLC.segment_command_64_data.vmsize);
161      break;
162    default:
163      continue;
164    }
165  }
166  return Addr;
167}
168
169template <typename SegmentType>
170static void
171constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
172                 StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
173  assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
174  memset(&Seg, 0, sizeof(SegmentType));
175  Seg.cmd = CmdType;
176  strncpy(Seg.segname, SegName.data(), SegName.size());
177  Seg.maxprot |=
178      (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
179  Seg.initprot |=
180      (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
181  Seg.vmaddr = SegVMAddr;
182  Seg.vmsize = SegVMSize;
183}
184
185LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
186  LoadCommand LC;
187  const uint64_t SegVMAddr = nextAvailableSegmentAddress();
188  if (is64Bit())
189    constructSegment(LC.MachOLoadCommand.segment_command_64_data,
190                     MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
191  else
192    constructSegment(LC.MachOLoadCommand.segment_command_data,
193                     MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
194
195  LoadCommands.push_back(std::move(LC));
196  return LoadCommands.back();
197}
198
199/// Extracts a segment name from a string which is possibly non-null-terminated.
200static StringRef extractSegmentName(const char *SegName) {
201  return StringRef(SegName,
202                   strnlen(SegName, sizeof(MachO::segment_command::segname)));
203}
204
205std::optional<StringRef> LoadCommand::getSegmentName() const {
206  const MachO::macho_load_command &MLC = MachOLoadCommand;
207  switch (MLC.load_command_data.cmd) {
208  case MachO::LC_SEGMENT:
209    return extractSegmentName(MLC.segment_command_data.segname);
210  case MachO::LC_SEGMENT_64:
211    return extractSegmentName(MLC.segment_command_64_data.segname);
212  default:
213    return std::nullopt;
214  }
215}
216
217std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
218  const MachO::macho_load_command &MLC = MachOLoadCommand;
219  switch (MLC.load_command_data.cmd) {
220  case MachO::LC_SEGMENT:
221    return MLC.segment_command_data.vmaddr;
222  case MachO::LC_SEGMENT_64:
223    return MLC.segment_command_64_data.vmaddr;
224  default:
225    return std::nullopt;
226  }
227}
228