1//===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ObjCopy/MachO/MachOObjcopy.h"
10#include "Archive.h"
11#include "MachOReader.h"
12#include "MachOWriter.h"
13#include "llvm/ADT/DenseSet.h"
14#include "llvm/ObjCopy/CommonConfig.h"
15#include "llvm/ObjCopy/MachO/MachOConfig.h"
16#include "llvm/ObjCopy/MultiFormatConfig.h"
17#include "llvm/ObjCopy/ObjCopy.h"
18#include "llvm/Object/ArchiveWriter.h"
19#include "llvm/Object/MachOUniversal.h"
20#include "llvm/Object/MachOUniversalWriter.h"
21#include "llvm/Support/Errc.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/FileOutputBuffer.h"
24#include "llvm/Support/Path.h"
25#include "llvm/Support/SmallVectorMemoryBuffer.h"
26
27using namespace llvm;
28using namespace llvm::objcopy;
29using namespace llvm::objcopy::macho;
30using namespace llvm::object;
31
32using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>;
33using LoadCommandPred = std::function<bool(const LoadCommand &LC)>;
34
35#ifndef NDEBUG
36static bool isLoadCommandWithPayloadString(const LoadCommand &LC) {
37  // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and
38  // LC_LAZY_LOAD_DYLIB
39  return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH ||
40         LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB ||
41         LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB ||
42         LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB;
43}
44#endif
45
46static StringRef getPayloadString(const LoadCommand &LC) {
47  assert(isLoadCommandWithPayloadString(LC) &&
48         "unsupported load command encountered");
49
50  return StringRef(reinterpret_cast<const char *>(LC.Payload.data()),
51                   LC.Payload.size())
52      .rtrim('\0');
53}
54
55static Error removeSections(const CommonConfig &Config, Object &Obj) {
56  SectionPred RemovePred = [](const std::unique_ptr<Section> &) {
57    return false;
58  };
59
60  if (!Config.ToRemove.empty()) {
61    RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) {
62      return Config.ToRemove.matches(Sec->CanonicalName);
63    };
64  }
65
66  if (Config.StripAll || Config.StripDebug) {
67    // Remove all debug sections.
68    RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) {
69      if (Sec->Segname == "__DWARF")
70        return true;
71
72      return RemovePred(Sec);
73    };
74  }
75
76  if (!Config.OnlySection.empty()) {
77    // Overwrite RemovePred because --only-section takes priority.
78    RemovePred = [&Config](const std::unique_ptr<Section> &Sec) {
79      return !Config.OnlySection.matches(Sec->CanonicalName);
80    };
81  }
82
83  return Obj.removeSections(RemovePred);
84}
85
86static void markSymbols(const CommonConfig &, Object &Obj) {
87  // Symbols referenced from the indirect symbol table must not be removed.
88  for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols)
89    if (ISE.Symbol)
90      (*ISE.Symbol)->Referenced = true;
91}
92
93static void updateAndRemoveSymbols(const CommonConfig &Config,
94                                   const MachOConfig &MachOConfig,
95                                   Object &Obj) {
96  for (SymbolEntry &Sym : Obj.SymTable) {
97    auto I = Config.SymbolsToRename.find(Sym.Name);
98    if (I != Config.SymbolsToRename.end())
99      Sym.Name = std::string(I->getValue());
100  }
101
102  auto RemovePred = [&Config, &MachOConfig,
103                     &Obj](const std::unique_ptr<SymbolEntry> &N) {
104    if (N->Referenced)
105      return false;
106    if (MachOConfig.KeepUndefined && N->isUndefinedSymbol())
107      return false;
108    if (N->n_desc & MachO::REFERENCED_DYNAMICALLY)
109      return false;
110    if (Config.StripAll)
111      return true;
112    if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
113      return true;
114    // This behavior is consistent with cctools' strip.
115    if (MachOConfig.StripSwiftSymbols &&
116        (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion &&
117        *Obj.SwiftVersion && N->isSwiftSymbol())
118      return true;
119    return false;
120  };
121
122  Obj.SymTable.removeSymbols(RemovePred);
123}
124
125template <typename LCType>
126static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) {
127  assert(isLoadCommandWithPayloadString(LC) &&
128         "unsupported load command encountered");
129
130  uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8);
131
132  LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize;
133  LC.Payload.assign(NewCmdsize - sizeof(LCType), 0);
134  std::copy(S.begin(), S.end(), LC.Payload.begin());
135}
136
137static LoadCommand buildRPathLoadCommand(StringRef Path) {
138  LoadCommand LC;
139  MachO::rpath_command RPathLC;
140  RPathLC.cmd = MachO::LC_RPATH;
141  RPathLC.path = sizeof(MachO::rpath_command);
142  RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8);
143  LC.MachOLoadCommand.rpath_command_data = RPathLC;
144  LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0);
145  std::copy(Path.begin(), Path.end(), LC.Payload.begin());
146  return LC;
147}
148
149static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) {
150  // Remove RPaths.
151  DenseSet<StringRef> RPathsToRemove(MachOConfig.RPathsToRemove.begin(),
152                                     MachOConfig.RPathsToRemove.end());
153
154  LoadCommandPred RemovePred = [&RPathsToRemove,
155                                &MachOConfig](const LoadCommand &LC) {
156    if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
157      // When removing all RPaths we don't need to care
158      // about what it contains
159      if (MachOConfig.RemoveAllRpaths)
160        return true;
161
162      StringRef RPath = getPayloadString(LC);
163      if (RPathsToRemove.count(RPath)) {
164        RPathsToRemove.erase(RPath);
165        return true;
166      }
167    }
168    return false;
169  };
170
171  if (Error E = Obj.removeLoadCommands(RemovePred))
172    return E;
173
174  // Emit an error if the Mach-O binary does not contain an rpath path name
175  // specified in -delete_rpath.
176  for (StringRef RPath : MachOConfig.RPathsToRemove) {
177    if (RPathsToRemove.count(RPath))
178      return createStringError(errc::invalid_argument,
179                               "no LC_RPATH load command with path: %s",
180                               RPath.str().c_str());
181  }
182
183  DenseSet<StringRef> RPaths;
184
185  // Get all existing RPaths.
186  for (LoadCommand &LC : Obj.LoadCommands) {
187    if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH)
188      RPaths.insert(getPayloadString(LC));
189  }
190
191  // Throw errors for invalid RPaths.
192  for (const auto &OldNew : MachOConfig.RPathsToUpdate) {
193    StringRef Old = OldNew.getFirst();
194    StringRef New = OldNew.getSecond();
195    if (!RPaths.contains(Old))
196      return createStringError(errc::invalid_argument,
197                               "no LC_RPATH load command with path: " + Old);
198    if (RPaths.contains(New))
199      return createStringError(errc::invalid_argument,
200                               "rpath '" + New +
201                                   "' would create a duplicate load command");
202  }
203
204  // Update load commands.
205  for (LoadCommand &LC : Obj.LoadCommands) {
206    switch (LC.MachOLoadCommand.load_command_data.cmd) {
207    case MachO::LC_ID_DYLIB:
208      if (MachOConfig.SharedLibId)
209        updateLoadCommandPayloadString<MachO::dylib_command>(
210            LC, *MachOConfig.SharedLibId);
211      break;
212
213    case MachO::LC_RPATH: {
214      StringRef RPath = getPayloadString(LC);
215      StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath);
216      if (!NewRPath.empty())
217        updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath);
218      break;
219    }
220
221    // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB
222    // here once llvm-objcopy supports them.
223    case MachO::LC_LOAD_DYLIB:
224    case MachO::LC_LOAD_WEAK_DYLIB:
225      StringRef InstallName = getPayloadString(LC);
226      StringRef NewInstallName =
227          MachOConfig.InstallNamesToUpdate.lookup(InstallName);
228      if (!NewInstallName.empty())
229        updateLoadCommandPayloadString<MachO::dylib_command>(LC,
230                                                             NewInstallName);
231      break;
232    }
233  }
234
235  // Add new RPaths.
236  for (StringRef RPath : MachOConfig.RPathToAdd) {
237    if (RPaths.contains(RPath))
238      return createStringError(errc::invalid_argument,
239                               "rpath '" + RPath +
240                                   "' would create a duplicate load command");
241    RPaths.insert(RPath);
242    Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
243  }
244
245  for (StringRef RPath : MachOConfig.RPathToPrepend) {
246    if (RPaths.contains(RPath))
247      return createStringError(errc::invalid_argument,
248                               "rpath '" + RPath +
249                                   "' would create a duplicate load command");
250
251    RPaths.insert(RPath);
252    Obj.LoadCommands.insert(Obj.LoadCommands.begin(),
253                            buildRPathLoadCommand(RPath));
254  }
255
256  // Unlike appending rpaths, the indexes of subsequent load commands must
257  // be recalculated after prepending one.
258  if (!MachOConfig.RPathToPrepend.empty())
259    Obj.updateLoadCommandIndexes();
260
261  // Remove any empty segments if required.
262  if (!MachOConfig.EmptySegmentsToRemove.empty()) {
263    auto RemovePred = [&MachOConfig](const LoadCommand &LC) {
264      if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 ||
265          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) {
266        return LC.Sections.empty() &&
267               MachOConfig.EmptySegmentsToRemove.contains(*LC.getSegmentName());
268      }
269      return false;
270    };
271    if (Error E = Obj.removeLoadCommands(RemovePred))
272      return E;
273  }
274
275  return Error::success();
276}
277
278static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
279                               Object &Obj) {
280  for (LoadCommand &LC : Obj.LoadCommands)
281    for (const std::unique_ptr<Section> &Sec : LC.Sections) {
282      if (Sec->CanonicalName == SecName) {
283        Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
284            FileOutputBuffer::create(Filename, Sec->Content.size());
285        if (!BufferOrErr)
286          return BufferOrErr.takeError();
287        std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
288        llvm::copy(Sec->Content, Buf->getBufferStart());
289
290        if (Error E = Buf->commit())
291          return E;
292        return Error::success();
293      }
294    }
295
296  return createStringError(object_error::parse_failed, "section '%s' not found",
297                           SecName.str().c_str());
298}
299
300static Error addSection(const NewSectionInfo &NewSection, Object &Obj) {
301  std::pair<StringRef, StringRef> Pair = NewSection.SectionName.split(',');
302  StringRef TargetSegName = Pair.first;
303  Section Sec(TargetSegName, Pair.second);
304  Sec.Content =
305      Obj.NewSectionsContents.save(NewSection.SectionData->getBuffer());
306  Sec.Size = Sec.Content.size();
307
308  // Add the a section into an existing segment.
309  for (LoadCommand &LC : Obj.LoadCommands) {
310    std::optional<StringRef> SegName = LC.getSegmentName();
311    if (SegName && SegName == TargetSegName) {
312      uint64_t Addr = *LC.getSegmentVMAddr();
313      for (const std::unique_ptr<Section> &S : LC.Sections)
314        Addr = std::max(Addr, S->Addr + S->Size);
315      LC.Sections.push_back(std::make_unique<Section>(Sec));
316      LC.Sections.back()->Addr = Addr;
317      return Error::success();
318    }
319  }
320
321  // There's no segment named TargetSegName. Create a new load command and
322  // Insert a new section into it.
323  LoadCommand &NewSegment =
324      Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384));
325  NewSegment.Sections.push_back(std::make_unique<Section>(Sec));
326  NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr();
327  return Error::success();
328}
329
330static Expected<Section &> findSection(StringRef SecName, Object &O) {
331  StringRef SegName;
332  std::tie(SegName, SecName) = SecName.split(",");
333  auto FoundSeg =
334      llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) {
335        return LC.getSegmentName() == SegName;
336      });
337  if (FoundSeg == O.LoadCommands.end())
338    return createStringError(errc::invalid_argument,
339                             "could not find segment with name '%s'",
340                             SegName.str().c_str());
341  auto FoundSec = llvm::find_if(FoundSeg->Sections,
342                                [SecName](const std::unique_ptr<Section> &Sec) {
343                                  return Sec->Sectname == SecName;
344                                });
345  if (FoundSec == FoundSeg->Sections.end())
346    return createStringError(errc::invalid_argument,
347                             "could not find section with name '%s'",
348                             SecName.str().c_str());
349
350  assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str());
351  return **FoundSec;
352}
353
354static Error updateSection(const NewSectionInfo &NewSection, Object &O) {
355  Expected<Section &> SecToUpdateOrErr = findSection(NewSection.SectionName, O);
356
357  if (!SecToUpdateOrErr)
358    return SecToUpdateOrErr.takeError();
359  Section &Sec = *SecToUpdateOrErr;
360
361  if (NewSection.SectionData->getBufferSize() > Sec.Size)
362    return createStringError(
363        errc::invalid_argument,
364        "new section cannot be larger than previous section");
365  Sec.Content = O.NewSectionsContents.save(NewSection.SectionData->getBuffer());
366  Sec.Size = Sec.Content.size();
367  return Error::success();
368}
369
370// isValidMachOCannonicalName returns success if Name is a MachO cannonical name
371// ("<segment>,<section>") and lengths of both segment and section names are
372// valid.
373static Error isValidMachOCannonicalName(StringRef Name) {
374  if (Name.count(',') != 1)
375    return createStringError(errc::invalid_argument,
376                             "invalid section name '%s' (should be formatted "
377                             "as '<segment name>,<section name>')",
378                             Name.str().c_str());
379
380  std::pair<StringRef, StringRef> Pair = Name.split(',');
381  if (Pair.first.size() > 16)
382    return createStringError(errc::invalid_argument,
383                             "too long segment name: '%s'",
384                             Pair.first.str().c_str());
385  if (Pair.second.size() > 16)
386    return createStringError(errc::invalid_argument,
387                             "too long section name: '%s'",
388                             Pair.second.str().c_str());
389  return Error::success();
390}
391
392static Error handleArgs(const CommonConfig &Config,
393                        const MachOConfig &MachOConfig, Object &Obj) {
394  // Dump sections before add/remove for compatibility with GNU objcopy.
395  for (StringRef Flag : Config.DumpSection) {
396    StringRef SectionName;
397    StringRef FileName;
398    std::tie(SectionName, FileName) = Flag.split('=');
399    if (Error E = dumpSectionToFile(SectionName, FileName, Obj))
400      return E;
401  }
402
403  if (Error E = removeSections(Config, Obj))
404    return E;
405
406  // Mark symbols to determine which symbols are still needed.
407  if (Config.StripAll)
408    markSymbols(Config, Obj);
409
410  updateAndRemoveSymbols(Config, MachOConfig, Obj);
411
412  if (Config.StripAll)
413    for (LoadCommand &LC : Obj.LoadCommands)
414      for (std::unique_ptr<Section> &Sec : LC.Sections)
415        Sec->Relocations.clear();
416
417  for (const NewSectionInfo &NewSection : Config.AddSection) {
418    if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
419      return E;
420    if (Error E = addSection(NewSection, Obj))
421      return E;
422  }
423
424  for (const NewSectionInfo &NewSection : Config.UpdateSection) {
425    if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
426      return E;
427    if (Error E = updateSection(NewSection, Obj))
428      return E;
429  }
430
431  if (Error E = processLoadCommands(MachOConfig, Obj))
432    return E;
433
434  return Error::success();
435}
436
437Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
438                                             const MachOConfig &MachOConfig,
439                                             object::MachOObjectFile &In,
440                                             raw_ostream &Out) {
441  MachOReader Reader(In);
442  Expected<std::unique_ptr<Object>> O = Reader.create();
443  if (!O)
444    return createFileError(Config.InputFilename, O.takeError());
445
446  if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD)
447    return createStringError(std::errc::not_supported,
448                             "%s: MH_PRELOAD files are not supported",
449                             Config.InputFilename.str().c_str());
450
451  if (Error E = handleArgs(Config, MachOConfig, **O))
452    return createFileError(Config.InputFilename, std::move(E));
453
454  // Page size used for alignment of segment sizes in Mach-O executables and
455  // dynamic libraries.
456  uint64_t PageSize;
457  switch (In.getArch()) {
458  case Triple::ArchType::arm:
459  case Triple::ArchType::aarch64:
460  case Triple::ArchType::aarch64_32:
461    PageSize = 16384;
462    break;
463  default:
464    PageSize = 4096;
465  }
466
467  MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(),
468                     sys::path::filename(Config.OutputFilename), PageSize, Out);
469  if (auto E = Writer.finalize())
470    return E;
471  return Writer.write();
472}
473
474Error objcopy::macho::executeObjcopyOnMachOUniversalBinary(
475    const MultiFormatConfig &Config, const MachOUniversalBinary &In,
476    raw_ostream &Out) {
477  SmallVector<OwningBinary<Binary>, 2> Binaries;
478  SmallVector<Slice, 2> Slices;
479  for (const auto &O : In.objects()) {
480    Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive();
481    if (ArOrErr) {
482      Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
483          createNewArchiveMembers(Config, **ArOrErr);
484      if (!NewArchiveMembersOrErr)
485        return NewArchiveMembersOrErr.takeError();
486      auto Kind = (*ArOrErr)->kind();
487      if (Kind == object::Archive::K_BSD)
488        Kind = object::Archive::K_DARWIN;
489      Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
490          writeArchiveToBuffer(*NewArchiveMembersOrErr,
491                               (*ArOrErr)->hasSymbolTable(), Kind,
492                               Config.getCommonConfig().DeterministicArchives,
493                               (*ArOrErr)->isThin());
494      if (!OutputBufferOrErr)
495        return OutputBufferOrErr.takeError();
496      Expected<std::unique_ptr<Binary>> BinaryOrErr =
497          object::createBinary(**OutputBufferOrErr);
498      if (!BinaryOrErr)
499        return BinaryOrErr.takeError();
500      Binaries.emplace_back(std::move(*BinaryOrErr),
501                            std::move(*OutputBufferOrErr));
502      Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()),
503                          O.getCPUType(), O.getCPUSubType(),
504                          O.getArchFlagName(), O.getAlign());
505      continue;
506    }
507    // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class
508    // ObjectForArch return an Error in case of the type mismatch. We need to
509    // check each in turn to see what kind of slice this is, so ignore errors
510    // produced along the way.
511    consumeError(ArOrErr.takeError());
512
513    Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile();
514    if (!ObjOrErr) {
515      consumeError(ObjOrErr.takeError());
516      return createStringError(
517          std::errc::invalid_argument,
518          "slice for '%s' of the universal Mach-O binary "
519          "'%s' is not a Mach-O object or an archive",
520          O.getArchFlagName().c_str(),
521          Config.getCommonConfig().InputFilename.str().c_str());
522    }
523    std::string ArchFlagName = O.getArchFlagName();
524
525    SmallVector<char, 0> Buffer;
526    raw_svector_ostream MemStream(Buffer);
527
528    Expected<const MachOConfig &> MachO = Config.getMachOConfig();
529    if (!MachO)
530      return MachO.takeError();
531
532    if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO,
533                                         **ObjOrErr, MemStream))
534      return E;
535
536    auto MB = std::make_unique<SmallVectorMemoryBuffer>(
537        std::move(Buffer), ArchFlagName, /*RequiresNullTerminator=*/false);
538    Expected<std::unique_ptr<Binary>> BinaryOrErr = object::createBinary(*MB);
539    if (!BinaryOrErr)
540      return BinaryOrErr.takeError();
541    Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB));
542    Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()),
543                        O.getAlign());
544  }
545
546  if (Error Err = writeUniversalBinaryToStream(Slices, Out))
547    return Err;
548
549  return Error::success();
550}
551