1//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_INPUT_FILES_H
10#define LLD_MACHO_INPUT_FILES_H
11
12#include "MachOStructs.h"
13#include "Target.h"
14
15#include "lld/Common/DWARF.h"
16#include "lld/Common/LLVM.h"
17#include "lld/Common/Memory.h"
18#include "llvm/ADT/CachedHashString.h"
19#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/SetVector.h"
21#include "llvm/BinaryFormat/MachO.h"
22#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23#include "llvm/Object/Archive.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/Threading.h"
26#include "llvm/TextAPI/TextAPIReader.h"
27
28#include <vector>
29
30namespace llvm {
31namespace lto {
32class InputFile;
33} // namespace lto
34namespace MachO {
35class InterfaceFile;
36} // namespace MachO
37class TarWriter;
38} // namespace llvm
39
40namespace lld {
41namespace macho {
42
43struct PlatformInfo;
44class ConcatInputSection;
45class Symbol;
46class Defined;
47class AliasSymbol;
48struct Reloc;
49enum class RefState : uint8_t;
50
51// If --reproduce option is given, all input files are written
52// to this tar archive.
53extern std::unique_ptr<llvm::TarWriter> tar;
54
55// If .subsections_via_symbols is set, each InputSection will be split along
56// symbol boundaries. The field offset represents the offset of the subsection
57// from the start of the original pre-split InputSection.
58struct Subsection {
59  uint64_t offset = 0;
60  InputSection *isec = nullptr;
61};
62
63using Subsections = std::vector<Subsection>;
64class InputFile;
65
66class Section {
67public:
68  InputFile *file;
69  StringRef segname;
70  StringRef name;
71  uint32_t flags;
72  uint64_t addr;
73  Subsections subsections;
74
75  Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
76          uint64_t addr)
77      : file(file), segname(segname), name(name), flags(flags), addr(addr) {}
78  // Ensure pointers to Sections are never invalidated.
79  Section(const Section &) = delete;
80  Section &operator=(const Section &) = delete;
81  Section(Section &&) = delete;
82  Section &operator=(Section &&) = delete;
83
84private:
85  // Whether we have already split this section into individual subsections.
86  // For sections that cannot be split (e.g. literal sections), this is always
87  // false.
88  bool doneSplitting = false;
89  friend class ObjFile;
90};
91
92// Represents a call graph profile edge.
93struct CallGraphEntry {
94  // The index of the caller in the symbol table.
95  uint32_t fromIndex;
96  // The index of the callee in the symbol table.
97  uint32_t toIndex;
98  // Number of calls from callee to caller in the profile.
99  uint64_t count;
100
101  CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
102      : fromIndex(fromIndex), toIndex(toIndex), count(count) {}
103};
104
105class InputFile {
106public:
107  enum Kind {
108    ObjKind,
109    OpaqueKind,
110    DylibKind,
111    ArchiveKind,
112    BitcodeKind,
113  };
114
115  virtual ~InputFile() = default;
116  Kind kind() const { return fileKind; }
117  StringRef getName() const { return name; }
118  static void resetIdCount() { idCount = 0; }
119
120  MemoryBufferRef mb;
121
122  std::vector<Symbol *> symbols;
123  std::vector<Section *> sections;
124  ArrayRef<uint8_t> objCImageInfo;
125
126  // If not empty, this stores the name of the archive containing this file.
127  // We use this string for creating error messages.
128  std::string archiveName;
129
130  // Provides an easy way to sort InputFiles deterministically.
131  const int id;
132
133  // True if this is a lazy ObjFile or BitcodeFile.
134  bool lazy = false;
135
136protected:
137  InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
138      : mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
139        name(mb.getBufferIdentifier()) {}
140
141  InputFile(Kind, const llvm::MachO::InterfaceFile &);
142
143private:
144  const Kind fileKind;
145  const StringRef name;
146
147  static int idCount;
148};
149
150struct FDE {
151  uint32_t funcLength;
152  Symbol *personality;
153  InputSection *lsda;
154};
155
156// .o file
157class ObjFile final : public InputFile {
158public:
159  ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
160          bool lazy = false, bool forceHidden = false);
161  ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
162  ArrayRef<uint8_t> getOptimizationHints() const;
163  template <class LP> void parse();
164
165  static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
166
167  std::string sourceFile() const;
168  // Parses line table information for diagnostics. compileUnit should be used
169  // for other purposes.
170  lld::DWARFCache *getDwarf();
171
172  llvm::DWARFUnit *compileUnit = nullptr;
173  std::unique_ptr<lld::DWARFCache> dwarfCache;
174  Section *addrSigSection = nullptr;
175  const uint32_t modTime;
176  bool forceHidden;
177  std::vector<ConcatInputSection *> debugSections;
178  std::vector<CallGraphEntry> callGraph;
179  llvm::DenseMap<ConcatInputSection *, FDE> fdes;
180  std::vector<AliasSymbol *> aliases;
181
182private:
183  llvm::once_flag initDwarf;
184  template <class LP> void parseLazy();
185  template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
186  template <class LP>
187  void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
188                    ArrayRef<typename LP::nlist> nList, const char *strtab,
189                    bool subsectionsViaSymbols);
190  template <class NList>
191  Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab);
192  template <class SectionHeader>
193  void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
194                        const SectionHeader &, Section &);
195  void parseDebugInfo();
196  void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
197  void registerCompactUnwind(Section &compactUnwindSection);
198  void registerEhFrames(Section &ehFrameSection);
199};
200
201// command-line -sectcreate file
202class OpaqueFile final : public InputFile {
203public:
204  OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
205  static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
206};
207
208// .dylib or .tbd file
209class DylibFile final : public InputFile {
210public:
211  // Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
212  // symbols in those sub-libraries will be available under the umbrella
213  // library's namespace. Those sub-libraries can also have their own
214  // re-exports. When loading a re-exported dylib, `umbrella` should be set to
215  // the root dylib to ensure symbols in the child library are correctly bound
216  // to the root. On the other hand, if a dylib is being directly loaded
217  // (through an -lfoo flag), then `umbrella` should be a nullptr.
218  explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
219                     bool isBundleLoader, bool explicitlyLinked);
220  explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
221                     DylibFile *umbrella, bool isBundleLoader,
222                     bool explicitlyLinked);
223  explicit DylibFile(DylibFile *umbrella);
224
225  void parseLoadCommands(MemoryBufferRef mb);
226  void parseReexports(const llvm::MachO::InterfaceFile &interface);
227  bool isReferenced() const { return numReferencedSymbols > 0; }
228  bool isExplicitlyLinked() const;
229  void setExplicitlyLinked() { explicitlyLinked = true; }
230
231  static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
232
233  StringRef installName;
234  DylibFile *exportingFile = nullptr;
235  DylibFile *umbrella;
236  SmallVector<StringRef, 2> rpaths;
237  uint32_t compatibilityVersion = 0;
238  uint32_t currentVersion = 0;
239  int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
240  unsigned numReferencedSymbols = 0;
241  RefState refState;
242  bool reexport = false;
243  bool forceNeeded = false;
244  bool forceWeakImport = false;
245  bool deadStrippable = false;
246
247private:
248  bool explicitlyLinked = false; // Access via isExplicitlyLinked().
249
250public:
251  // An executable can be used as a bundle loader that will load the output
252  // file being linked, and that contains symbols referenced, but not
253  // implemented in the bundle. When used like this, it is very similar
254  // to a dylib, so we've used the same class to represent it.
255  bool isBundleLoader;
256
257  // Synthetic Dylib objects created by $ld$previous symbols in this dylib.
258  // Usually empty. These synthetic dylibs won't have synthetic dylibs
259  // themselves.
260  SmallVector<DylibFile *, 2> extraDylibs;
261
262private:
263  DylibFile *getSyntheticDylib(StringRef installName, uint32_t currentVersion,
264                               uint32_t compatVersion);
265
266  bool handleLDSymbol(StringRef originalName);
267  void handleLDPreviousSymbol(StringRef name, StringRef originalName);
268  void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
269  void handleLDHideSymbol(StringRef name, StringRef originalName);
270  void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
271  void parseExportedSymbols(uint32_t offset, uint32_t size);
272  void loadReexport(StringRef path, DylibFile *umbrella,
273                    const llvm::MachO::InterfaceFile *currentTopLevelTapi);
274
275  llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
276};
277
278// .a file
279class ArchiveFile final : public InputFile {
280public:
281  explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file,
282                       bool forceHidden);
283  void addLazySymbols();
284  void fetch(const llvm::object::Archive::Symbol &);
285  // LLD normally doesn't use Error for error-handling, but the underlying
286  // Archive library does, so this is the cleanest way to wrap it.
287  Error fetch(const llvm::object::Archive::Child &, StringRef reason);
288  const llvm::object::Archive &getArchive() const { return *file; };
289  static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
290
291private:
292  std::unique_ptr<llvm::object::Archive> file;
293  // Keep track of children fetched from the archive by tracking
294  // which address offsets have been fetched already.
295  llvm::DenseSet<uint64_t> seen;
296  // Load all symbols with hidden visibility (-load_hidden).
297  bool forceHidden;
298};
299
300class BitcodeFile final : public InputFile {
301public:
302  explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
303                       uint64_t offsetInArchive, bool lazy = false,
304                       bool forceHidden = false);
305  static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
306  void parse();
307
308  std::unique_ptr<llvm::lto::InputFile> obj;
309  bool forceHidden;
310
311private:
312  void parseLazy();
313};
314
315extern llvm::SetVector<InputFile *> inputFiles;
316extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
317
318std::optional<MemoryBufferRef> readFile(StringRef path);
319
320void extract(InputFile &file, StringRef reason);
321
322namespace detail {
323
324template <class CommandType, class... Types>
325std::vector<const CommandType *>
326findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
327  std::vector<const CommandType *> cmds;
328  std::initializer_list<uint32_t> typesList{types...};
329  const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
330  const uint8_t *p =
331      reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
332  for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
333    auto *cmd = reinterpret_cast<const CommandType *>(p);
334    if (llvm::is_contained(typesList, cmd->cmd)) {
335      cmds.push_back(cmd);
336      if (cmds.size() == maxCommands)
337        return cmds;
338    }
339    p += cmd->cmdsize;
340  }
341  return cmds;
342}
343
344} // namespace detail
345
346// anyHdr should be a pointer to either mach_header or mach_header_64
347template <class CommandType = llvm::MachO::load_command, class... Types>
348const CommandType *findCommand(const void *anyHdr, Types... types) {
349  std::vector<const CommandType *> cmds =
350      detail::findCommands<CommandType>(anyHdr, 1, types...);
351  return cmds.size() ? cmds[0] : nullptr;
352}
353
354template <class CommandType = llvm::MachO::load_command, class... Types>
355std::vector<const CommandType *> findCommands(const void *anyHdr,
356                                              Types... types) {
357  return detail::findCommands<CommandType>(anyHdr, 0, types...);
358}
359
360std::string replaceThinLTOSuffix(StringRef path);
361} // namespace macho
362
363std::string toString(const macho::InputFile *file);
364std::string toString(const macho::Section &);
365} // namespace lld
366
367#endif
368