1//===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ObjCopy/MachO/MachOObjcopy.h"
10#include "Archive.h"
11#include "MachOReader.h"
12#include "MachOWriter.h"
13#include "llvm/ADT/DenseSet.h"
14#include "llvm/ObjCopy/CommonConfig.h"
15#include "llvm/ObjCopy/MachO/MachOConfig.h"
16#include "llvm/ObjCopy/MultiFormatConfig.h"
17#include "llvm/ObjCopy/ObjCopy.h"
18#include "llvm/Object/ArchiveWriter.h"
19#include "llvm/Object/MachOUniversal.h"
20#include "llvm/Object/MachOUniversalWriter.h"
21#include "llvm/Support/Errc.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/FileOutputBuffer.h"
24#include "llvm/Support/Path.h"
25#include "llvm/Support/SmallVectorMemoryBuffer.h"
26
27using namespace llvm;
28using namespace llvm::objcopy;
29using namespace llvm::objcopy::macho;
30using namespace llvm::object;
31
32using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>;
33using LoadCommandPred = std::function<bool(const LoadCommand &LC)>;
34
35#ifndef NDEBUG
36static bool isLoadCommandWithPayloadString(const LoadCommand &LC) {
37  // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and
38  // LC_LAZY_LOAD_DYLIB
39  return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH ||
40         LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB ||
41         LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB ||
42         LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB;
43}
44#endif
45
46static StringRef getPayloadString(const LoadCommand &LC) {
47  assert(isLoadCommandWithPayloadString(LC) &&
48         "unsupported load command encountered");
49
50  return StringRef(reinterpret_cast<const char *>(LC.Payload.data()),
51                   LC.Payload.size())
52      .rtrim('\0');
53}
54
55static Error removeSections(const CommonConfig &Config, Object &Obj) {
56  SectionPred RemovePred = [](const std::unique_ptr<Section> &) {
57    return false;
58  };
59
60  if (!Config.ToRemove.empty()) {
61    RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) {
62      return Config.ToRemove.matches(Sec->CanonicalName);
63    };
64  }
65
66  if (Config.StripAll || Config.StripDebug) {
67    // Remove all debug sections.
68    RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) {
69      if (Sec->Segname == "__DWARF")
70        return true;
71
72      return RemovePred(Sec);
73    };
74  }
75
76  if (!Config.OnlySection.empty()) {
77    // Overwrite RemovePred because --only-section takes priority.
78    RemovePred = [&Config](const std::unique_ptr<Section> &Sec) {
79      return !Config.OnlySection.matches(Sec->CanonicalName);
80    };
81  }
82
83  return Obj.removeSections(RemovePred);
84}
85
86static void markSymbols(const CommonConfig &, Object &Obj) {
87  // Symbols referenced from the indirect symbol table must not be removed.
88  for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols)
89    if (ISE.Symbol)
90      (*ISE.Symbol)->Referenced = true;
91}
92
93static void updateAndRemoveSymbols(const CommonConfig &Config,
94                                   const MachOConfig &MachOConfig,
95                                   Object &Obj) {
96  for (SymbolEntry &Sym : Obj.SymTable) {
97    // Weaken symbols first to match ELFObjcopy behavior.
98    bool IsExportedAndDefined =
99        (Sym.n_type & llvm::MachO::N_EXT) &&
100        (Sym.n_type & llvm::MachO::N_TYPE) != llvm::MachO::N_UNDF;
101    if (IsExportedAndDefined &&
102        (Config.Weaken || Config.SymbolsToWeaken.matches(Sym.Name)))
103      Sym.n_desc |= llvm::MachO::N_WEAK_DEF;
104
105    auto I = Config.SymbolsToRename.find(Sym.Name);
106    if (I != Config.SymbolsToRename.end())
107      Sym.Name = std::string(I->getValue());
108  }
109
110  auto RemovePred = [&Config, &MachOConfig,
111                     &Obj](const std::unique_ptr<SymbolEntry> &N) {
112    if (N->Referenced)
113      return false;
114    if (MachOConfig.KeepUndefined && N->isUndefinedSymbol())
115      return false;
116    if (N->n_desc & MachO::REFERENCED_DYNAMICALLY)
117      return false;
118    if (Config.StripAll)
119      return true;
120    if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
121      return true;
122    // This behavior is consistent with cctools' strip.
123    if (Config.StripDebug && (N->n_type & MachO::N_STAB))
124      return true;
125    // This behavior is consistent with cctools' strip.
126    if (MachOConfig.StripSwiftSymbols &&
127        (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion &&
128        *Obj.SwiftVersion && N->isSwiftSymbol())
129      return true;
130    return false;
131  };
132
133  Obj.SymTable.removeSymbols(RemovePred);
134}
135
136template <typename LCType>
137static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) {
138  assert(isLoadCommandWithPayloadString(LC) &&
139         "unsupported load command encountered");
140
141  uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8);
142
143  LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize;
144  LC.Payload.assign(NewCmdsize - sizeof(LCType), 0);
145  std::copy(S.begin(), S.end(), LC.Payload.begin());
146}
147
148static LoadCommand buildRPathLoadCommand(StringRef Path) {
149  LoadCommand LC;
150  MachO::rpath_command RPathLC;
151  RPathLC.cmd = MachO::LC_RPATH;
152  RPathLC.path = sizeof(MachO::rpath_command);
153  RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8);
154  LC.MachOLoadCommand.rpath_command_data = RPathLC;
155  LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0);
156  std::copy(Path.begin(), Path.end(), LC.Payload.begin());
157  return LC;
158}
159
160static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) {
161  // Remove RPaths.
162  DenseSet<StringRef> RPathsToRemove(MachOConfig.RPathsToRemove.begin(),
163                                     MachOConfig.RPathsToRemove.end());
164
165  LoadCommandPred RemovePred = [&RPathsToRemove,
166                                &MachOConfig](const LoadCommand &LC) {
167    if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
168      // When removing all RPaths we don't need to care
169      // about what it contains
170      if (MachOConfig.RemoveAllRpaths)
171        return true;
172
173      StringRef RPath = getPayloadString(LC);
174      if (RPathsToRemove.count(RPath)) {
175        RPathsToRemove.erase(RPath);
176        return true;
177      }
178    }
179    return false;
180  };
181
182  if (Error E = Obj.removeLoadCommands(RemovePred))
183    return E;
184
185  // Emit an error if the Mach-O binary does not contain an rpath path name
186  // specified in -delete_rpath.
187  for (StringRef RPath : MachOConfig.RPathsToRemove) {
188    if (RPathsToRemove.count(RPath))
189      return createStringError(errc::invalid_argument,
190                               "no LC_RPATH load command with path: %s",
191                               RPath.str().c_str());
192  }
193
194  DenseSet<StringRef> RPaths;
195
196  // Get all existing RPaths.
197  for (LoadCommand &LC : Obj.LoadCommands) {
198    if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH)
199      RPaths.insert(getPayloadString(LC));
200  }
201
202  // Throw errors for invalid RPaths.
203  for (const auto &OldNew : MachOConfig.RPathsToUpdate) {
204    StringRef Old = OldNew.getFirst();
205    StringRef New = OldNew.getSecond();
206    if (!RPaths.contains(Old))
207      return createStringError(errc::invalid_argument,
208                               "no LC_RPATH load command with path: " + Old);
209    if (RPaths.contains(New))
210      return createStringError(errc::invalid_argument,
211                               "rpath '" + New +
212                                   "' would create a duplicate load command");
213  }
214
215  // Update load commands.
216  for (LoadCommand &LC : Obj.LoadCommands) {
217    switch (LC.MachOLoadCommand.load_command_data.cmd) {
218    case MachO::LC_ID_DYLIB:
219      if (MachOConfig.SharedLibId)
220        updateLoadCommandPayloadString<MachO::dylib_command>(
221            LC, *MachOConfig.SharedLibId);
222      break;
223
224    case MachO::LC_RPATH: {
225      StringRef RPath = getPayloadString(LC);
226      StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath);
227      if (!NewRPath.empty())
228        updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath);
229      break;
230    }
231
232    // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB
233    // here once llvm-objcopy supports them.
234    case MachO::LC_LOAD_DYLIB:
235    case MachO::LC_LOAD_WEAK_DYLIB:
236      StringRef InstallName = getPayloadString(LC);
237      StringRef NewInstallName =
238          MachOConfig.InstallNamesToUpdate.lookup(InstallName);
239      if (!NewInstallName.empty())
240        updateLoadCommandPayloadString<MachO::dylib_command>(LC,
241                                                             NewInstallName);
242      break;
243    }
244  }
245
246  // Add new RPaths.
247  for (StringRef RPath : MachOConfig.RPathToAdd) {
248    if (RPaths.contains(RPath))
249      return createStringError(errc::invalid_argument,
250                               "rpath '" + RPath +
251                                   "' would create a duplicate load command");
252    RPaths.insert(RPath);
253    Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
254  }
255
256  for (StringRef RPath : MachOConfig.RPathToPrepend) {
257    if (RPaths.contains(RPath))
258      return createStringError(errc::invalid_argument,
259                               "rpath '" + RPath +
260                                   "' would create a duplicate load command");
261
262    RPaths.insert(RPath);
263    Obj.LoadCommands.insert(Obj.LoadCommands.begin(),
264                            buildRPathLoadCommand(RPath));
265  }
266
267  // Unlike appending rpaths, the indexes of subsequent load commands must
268  // be recalculated after prepending one.
269  if (!MachOConfig.RPathToPrepend.empty())
270    Obj.updateLoadCommandIndexes();
271
272  // Remove any empty segments if required.
273  if (!MachOConfig.EmptySegmentsToRemove.empty()) {
274    auto RemovePred = [&MachOConfig](const LoadCommand &LC) {
275      if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 ||
276          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) {
277        return LC.Sections.empty() &&
278               MachOConfig.EmptySegmentsToRemove.contains(*LC.getSegmentName());
279      }
280      return false;
281    };
282    if (Error E = Obj.removeLoadCommands(RemovePred))
283      return E;
284  }
285
286  return Error::success();
287}
288
289static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
290                               Object &Obj) {
291  for (LoadCommand &LC : Obj.LoadCommands)
292    for (const std::unique_ptr<Section> &Sec : LC.Sections) {
293      if (Sec->CanonicalName == SecName) {
294        Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
295            FileOutputBuffer::create(Filename, Sec->Content.size());
296        if (!BufferOrErr)
297          return BufferOrErr.takeError();
298        std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
299        llvm::copy(Sec->Content, Buf->getBufferStart());
300
301        if (Error E = Buf->commit())
302          return E;
303        return Error::success();
304      }
305    }
306
307  return createStringError(object_error::parse_failed, "section '%s' not found",
308                           SecName.str().c_str());
309}
310
311static Error addSection(const NewSectionInfo &NewSection, Object &Obj) {
312  std::pair<StringRef, StringRef> Pair = NewSection.SectionName.split(',');
313  StringRef TargetSegName = Pair.first;
314  Section Sec(TargetSegName, Pair.second);
315  Sec.Content =
316      Obj.NewSectionsContents.save(NewSection.SectionData->getBuffer());
317  Sec.Size = Sec.Content.size();
318
319  // Add the a section into an existing segment.
320  for (LoadCommand &LC : Obj.LoadCommands) {
321    std::optional<StringRef> SegName = LC.getSegmentName();
322    if (SegName && SegName == TargetSegName) {
323      uint64_t Addr = *LC.getSegmentVMAddr();
324      for (const std::unique_ptr<Section> &S : LC.Sections)
325        Addr = std::max(Addr, S->Addr + S->Size);
326      LC.Sections.push_back(std::make_unique<Section>(Sec));
327      LC.Sections.back()->Addr = Addr;
328      return Error::success();
329    }
330  }
331
332  // There's no segment named TargetSegName. Create a new load command and
333  // Insert a new section into it.
334  LoadCommand &NewSegment =
335      Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384));
336  NewSegment.Sections.push_back(std::make_unique<Section>(Sec));
337  NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr();
338  return Error::success();
339}
340
341static Expected<Section &> findSection(StringRef SecName, Object &O) {
342  StringRef SegName;
343  std::tie(SegName, SecName) = SecName.split(",");
344  auto FoundSeg =
345      llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) {
346        return LC.getSegmentName() == SegName;
347      });
348  if (FoundSeg == O.LoadCommands.end())
349    return createStringError(errc::invalid_argument,
350                             "could not find segment with name '%s'",
351                             SegName.str().c_str());
352  auto FoundSec = llvm::find_if(FoundSeg->Sections,
353                                [SecName](const std::unique_ptr<Section> &Sec) {
354                                  return Sec->Sectname == SecName;
355                                });
356  if (FoundSec == FoundSeg->Sections.end())
357    return createStringError(errc::invalid_argument,
358                             "could not find section with name '%s'",
359                             SecName.str().c_str());
360
361  assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str());
362  return **FoundSec;
363}
364
365static Error updateSection(const NewSectionInfo &NewSection, Object &O) {
366  Expected<Section &> SecToUpdateOrErr = findSection(NewSection.SectionName, O);
367
368  if (!SecToUpdateOrErr)
369    return SecToUpdateOrErr.takeError();
370  Section &Sec = *SecToUpdateOrErr;
371
372  if (NewSection.SectionData->getBufferSize() > Sec.Size)
373    return createStringError(
374        errc::invalid_argument,
375        "new section cannot be larger than previous section");
376  Sec.Content = O.NewSectionsContents.save(NewSection.SectionData->getBuffer());
377  Sec.Size = Sec.Content.size();
378  return Error::success();
379}
380
381// isValidMachOCannonicalName returns success if Name is a MachO cannonical name
382// ("<segment>,<section>") and lengths of both segment and section names are
383// valid.
384static Error isValidMachOCannonicalName(StringRef Name) {
385  if (Name.count(',') != 1)
386    return createStringError(errc::invalid_argument,
387                             "invalid section name '%s' (should be formatted "
388                             "as '<segment name>,<section name>')",
389                             Name.str().c_str());
390
391  std::pair<StringRef, StringRef> Pair = Name.split(',');
392  if (Pair.first.size() > 16)
393    return createStringError(errc::invalid_argument,
394                             "too long segment name: '%s'",
395                             Pair.first.str().c_str());
396  if (Pair.second.size() > 16)
397    return createStringError(errc::invalid_argument,
398                             "too long section name: '%s'",
399                             Pair.second.str().c_str());
400  return Error::success();
401}
402
403static Error handleArgs(const CommonConfig &Config,
404                        const MachOConfig &MachOConfig, Object &Obj) {
405  // Dump sections before add/remove for compatibility with GNU objcopy.
406  for (StringRef Flag : Config.DumpSection) {
407    StringRef SectionName;
408    StringRef FileName;
409    std::tie(SectionName, FileName) = Flag.split('=');
410    if (Error E = dumpSectionToFile(SectionName, FileName, Obj))
411      return E;
412  }
413
414  if (Error E = removeSections(Config, Obj))
415    return E;
416
417  // Mark symbols to determine which symbols are still needed.
418  if (Config.StripAll)
419    markSymbols(Config, Obj);
420
421  updateAndRemoveSymbols(Config, MachOConfig, Obj);
422
423  if (Config.StripAll)
424    for (LoadCommand &LC : Obj.LoadCommands)
425      for (std::unique_ptr<Section> &Sec : LC.Sections)
426        Sec->Relocations.clear();
427
428  for (const NewSectionInfo &NewSection : Config.AddSection) {
429    if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
430      return E;
431    if (Error E = addSection(NewSection, Obj))
432      return E;
433  }
434
435  for (const NewSectionInfo &NewSection : Config.UpdateSection) {
436    if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
437      return E;
438    if (Error E = updateSection(NewSection, Obj))
439      return E;
440  }
441
442  if (Error E = processLoadCommands(MachOConfig, Obj))
443    return E;
444
445  return Error::success();
446}
447
448Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
449                                             const MachOConfig &MachOConfig,
450                                             object::MachOObjectFile &In,
451                                             raw_ostream &Out) {
452  MachOReader Reader(In);
453  Expected<std::unique_ptr<Object>> O = Reader.create();
454  if (!O)
455    return createFileError(Config.InputFilename, O.takeError());
456
457  if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD)
458    return createStringError(std::errc::not_supported,
459                             "%s: MH_PRELOAD files are not supported",
460                             Config.InputFilename.str().c_str());
461
462  if (Error E = handleArgs(Config, MachOConfig, **O))
463    return createFileError(Config.InputFilename, std::move(E));
464
465  // Page size used for alignment of segment sizes in Mach-O executables and
466  // dynamic libraries.
467  uint64_t PageSize;
468  switch (In.getArch()) {
469  case Triple::ArchType::arm:
470  case Triple::ArchType::aarch64:
471  case Triple::ArchType::aarch64_32:
472    PageSize = 16384;
473    break;
474  default:
475    PageSize = 4096;
476  }
477
478  MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(),
479                     sys::path::filename(Config.OutputFilename), PageSize, Out);
480  if (auto E = Writer.finalize())
481    return E;
482  return Writer.write();
483}
484
485Error objcopy::macho::executeObjcopyOnMachOUniversalBinary(
486    const MultiFormatConfig &Config, const MachOUniversalBinary &In,
487    raw_ostream &Out) {
488  SmallVector<OwningBinary<Binary>, 2> Binaries;
489  SmallVector<Slice, 2> Slices;
490  for (const auto &O : In.objects()) {
491    Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive();
492    if (ArOrErr) {
493      Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
494          createNewArchiveMembers(Config, **ArOrErr);
495      if (!NewArchiveMembersOrErr)
496        return NewArchiveMembersOrErr.takeError();
497      auto Kind = (*ArOrErr)->kind();
498      if (Kind == object::Archive::K_BSD)
499        Kind = object::Archive::K_DARWIN;
500      Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
501          writeArchiveToBuffer(
502              *NewArchiveMembersOrErr,
503              (*ArOrErr)->hasSymbolTable() ? SymtabWritingMode::NormalSymtab
504                                           : SymtabWritingMode::NoSymtab,
505              Kind, Config.getCommonConfig().DeterministicArchives,
506              (*ArOrErr)->isThin());
507      if (!OutputBufferOrErr)
508        return OutputBufferOrErr.takeError();
509      Expected<std::unique_ptr<Binary>> BinaryOrErr =
510          object::createBinary(**OutputBufferOrErr);
511      if (!BinaryOrErr)
512        return BinaryOrErr.takeError();
513      Binaries.emplace_back(std::move(*BinaryOrErr),
514                            std::move(*OutputBufferOrErr));
515      Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()),
516                          O.getCPUType(), O.getCPUSubType(),
517                          O.getArchFlagName(), O.getAlign());
518      continue;
519    }
520    // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class
521    // ObjectForArch return an Error in case of the type mismatch. We need to
522    // check each in turn to see what kind of slice this is, so ignore errors
523    // produced along the way.
524    consumeError(ArOrErr.takeError());
525
526    Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile();
527    if (!ObjOrErr) {
528      consumeError(ObjOrErr.takeError());
529      return createStringError(
530          std::errc::invalid_argument,
531          "slice for '%s' of the universal Mach-O binary "
532          "'%s' is not a Mach-O object or an archive",
533          O.getArchFlagName().c_str(),
534          Config.getCommonConfig().InputFilename.str().c_str());
535    }
536    std::string ArchFlagName = O.getArchFlagName();
537
538    SmallVector<char, 0> Buffer;
539    raw_svector_ostream MemStream(Buffer);
540
541    Expected<const MachOConfig &> MachO = Config.getMachOConfig();
542    if (!MachO)
543      return MachO.takeError();
544
545    if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO,
546                                         **ObjOrErr, MemStream))
547      return E;
548
549    auto MB = std::make_unique<SmallVectorMemoryBuffer>(
550        std::move(Buffer), ArchFlagName, /*RequiresNullTerminator=*/false);
551    Expected<std::unique_ptr<Binary>> BinaryOrErr = object::createBinary(*MB);
552    if (!BinaryOrErr)
553      return BinaryOrErr.takeError();
554    Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB));
555    Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()),
556                        O.getAlign());
557  }
558
559  if (Error Err = writeUniversalBinaryToStream(Slices, Out))
560    return Err;
561
562  return Error::success();
563}
564