1//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_ELF_INPUT_FILES_H
10#define LLD_ELF_INPUT_FILES_H
11
12#include "Config.h"
13#include "lld/Common/ErrorHandler.h"
14#include "lld/Common/LLVM.h"
15#include "lld/Common/Reproduce.h"
16#include "llvm/ADT/CachedHashString.h"
17#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/IR/Comdat.h"
20#include "llvm/Object/Archive.h"
21#include "llvm/Object/ELF.h"
22#include "llvm/Object/IRObjectFile.h"
23#include "llvm/Support/Threading.h"
24#include <map>
25
26namespace llvm {
27struct DILineInfo;
28class TarWriter;
29namespace lto {
30class InputFile;
31}
32} // namespace llvm
33
34namespace lld {
35class DWARFCache;
36
37// Returns "<internal>", "foo.a(bar.o)" or "baz.o".
38std::string toString(const elf::InputFile *f);
39
40namespace elf {
41class InputFile;
42class InputSectionBase;
43
44using llvm::object::Archive;
45
46class Symbol;
47
48// If -reproduce option is given, all input files are written
49// to this tar archive.
50extern std::unique_ptr<llvm::TarWriter> tar;
51
52// Opens a given file.
53llvm::Optional<MemoryBufferRef> readFile(StringRef path);
54
55// Add symbols in File to the symbol table.
56void parseFile(InputFile *file);
57
58// The root class of input files.
59class InputFile {
60public:
61  enum Kind {
62    ObjKind,
63    SharedKind,
64    LazyObjKind,
65    ArchiveKind,
66    BitcodeKind,
67    BinaryKind,
68  };
69
70  Kind kind() const { return fileKind; }
71
72  bool isElf() const {
73    Kind k = kind();
74    return k == ObjKind || k == SharedKind;
75  }
76
77  StringRef getName() const { return mb.getBufferIdentifier(); }
78  MemoryBufferRef mb;
79
80  // Returns sections. It is a runtime error to call this function
81  // on files that don't have the notion of sections.
82  ArrayRef<InputSectionBase *> getSections() const {
83    assert(fileKind == ObjKind || fileKind == BinaryKind);
84    return sections;
85  }
86
87  // Returns object file symbols. It is a runtime error to call this
88  // function on files of other types.
89  ArrayRef<Symbol *> getSymbols() { return getMutableSymbols(); }
90
91  MutableArrayRef<Symbol *> getMutableSymbols() {
92    assert(fileKind == BinaryKind || fileKind == ObjKind ||
93           fileKind == BitcodeKind);
94    return symbols;
95  }
96
97  // Filename of .a which contained this file. If this file was
98  // not in an archive file, it is the empty string. We use this
99  // string for creating error messages.
100  std::string archiveName;
101
102  // If this is an architecture-specific file, the following members
103  // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type.
104  ELFKind ekind = ELFNoneKind;
105  uint16_t emachine = llvm::ELF::EM_NONE;
106  uint8_t osabi = 0;
107  uint8_t abiVersion = 0;
108
109  // Cache for toString(). Only toString() should use this member.
110  mutable std::string toStringCache;
111
112  std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec,
113                        uint64_t offset);
114
115  // True if this is an argument for --just-symbols. Usually false.
116  bool justSymbols = false;
117
118  // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE
119  // to compute offsets in PLT call stubs.
120  uint32_t ppc32Got2OutSecOff = 0;
121
122  // On PPC64 we need to keep track of which files contain small code model
123  // relocations that access the .toc section. To minimize the chance of a
124  // relocation overflow, files that do contain said relocations should have
125  // their .toc sections sorted closer to the .got section than files that do
126  // not contain any small code model relocations. Thats because the toc-pointer
127  // is defined to point at .got + 0x8000 and the instructions used with small
128  // code model relocations support immediates in the range [-0x8000, 0x7FFC],
129  // making the addressable range relative to the toc pointer
130  // [.got, .got + 0xFFFC].
131  bool ppc64SmallCodeModelTocRelocs = false;
132
133  // groupId is used for --warn-backrefs which is an optional error
134  // checking feature. All files within the same --{start,end}-group or
135  // --{start,end}-lib get the same group ID. Otherwise, each file gets a new
136  // group ID. For more info, see checkDependency() in SymbolTable.cpp.
137  uint32_t groupId;
138  static bool isInGroup;
139  static uint32_t nextGroupId;
140
141  // Index of MIPS GOT built for this file.
142  llvm::Optional<size_t> mipsGotIndex;
143
144  std::vector<Symbol *> symbols;
145
146protected:
147  InputFile(Kind k, MemoryBufferRef m);
148  std::vector<InputSectionBase *> sections;
149
150private:
151  const Kind fileKind;
152};
153
154class ELFFileBase : public InputFile {
155public:
156  ELFFileBase(Kind k, MemoryBufferRef m);
157  static bool classof(const InputFile *f) { return f->isElf(); }
158
159  template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const {
160    return check(llvm::object::ELFFile<ELFT>::create(mb.getBuffer()));
161  }
162
163  StringRef getStringTable() const { return stringTable; }
164
165  template <typename ELFT> typename ELFT::SymRange getELFSyms() const {
166    return typename ELFT::SymRange(
167        reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms);
168  }
169  template <typename ELFT> typename ELFT::SymRange getGlobalELFSyms() const {
170    return getELFSyms<ELFT>().slice(firstGlobal);
171  }
172
173protected:
174  // Initializes this class's member variables.
175  template <typename ELFT> void init();
176
177  const void *elfSyms = nullptr;
178  size_t numELFSyms = 0;
179  uint32_t firstGlobal = 0;
180  StringRef stringTable;
181};
182
183// .o file.
184template <class ELFT> class ObjFile : public ELFFileBase {
185  using Elf_Rel = typename ELFT::Rel;
186  using Elf_Rela = typename ELFT::Rela;
187  using Elf_Sym = typename ELFT::Sym;
188  using Elf_Shdr = typename ELFT::Shdr;
189  using Elf_Word = typename ELFT::Word;
190  using Elf_CGProfile = typename ELFT::CGProfile;
191
192public:
193  static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
194
195  llvm::object::ELFFile<ELFT> getObj() const {
196    return this->ELFFileBase::getObj<ELFT>();
197  }
198
199  ArrayRef<Symbol *> getLocalSymbols();
200  ArrayRef<Symbol *> getGlobalSymbols();
201
202  ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) {
203    this->archiveName = archiveName;
204  }
205
206  void parse(bool ignoreComdats = false);
207
208  StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
209                                 const Elf_Shdr &sec);
210
211  Symbol &getSymbol(uint32_t symbolIndex) const {
212    if (symbolIndex >= this->symbols.size())
213      fatal(toString(this) + ": invalid symbol index");
214    return *this->symbols[symbolIndex];
215  }
216
217  uint32_t getSectionIndex(const Elf_Sym &sym) const;
218
219  template <typename RelT> Symbol &getRelocTargetSym(const RelT &rel) const {
220    uint32_t symIndex = rel.getSymbol(config->isMips64EL);
221    return getSymbol(symIndex);
222  }
223
224  llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t);
225  llvm::Optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef name);
226
227  // MIPS GP0 value defined by this file. This value represents the gp value
228  // used to create the relocatable object and required to support
229  // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations.
230  uint32_t mipsGp0 = 0;
231
232  uint32_t andFeatures = 0;
233
234  // Name of source file obtained from STT_FILE symbol value,
235  // or empty string if there is no such symbol in object file
236  // symbol table.
237  StringRef sourceFile;
238
239  // True if the file defines functions compiled with
240  // -fsplit-stack. Usually false.
241  bool splitStack = false;
242
243  // True if the file defines functions compiled with -fsplit-stack,
244  // but had one or more functions with the no_split_stack attribute.
245  bool someNoSplitStack = false;
246
247  // Pointer to this input file's .llvm_addrsig section, if it has one.
248  const Elf_Shdr *addrsigSec = nullptr;
249
250  // SHT_LLVM_CALL_GRAPH_PROFILE table
251  ArrayRef<Elf_CGProfile> cgProfile;
252
253private:
254  void initializeSections(bool ignoreComdats);
255  void initializeSymbols();
256  void initializeJustSymbols();
257  void initializeDwarf();
258  InputSectionBase *getRelocTarget(const Elf_Shdr &sec);
259  InputSectionBase *createInputSection(const Elf_Shdr &sec);
260  StringRef getSectionName(const Elf_Shdr &sec);
261
262  bool shouldMerge(const Elf_Shdr &sec, StringRef name);
263
264  // Each ELF symbol contains a section index which the symbol belongs to.
265  // However, because the number of bits dedicated for that is limited, a
266  // symbol can directly point to a section only when the section index is
267  // equal to or smaller than 65280.
268  //
269  // If an object file contains more than 65280 sections, the file must
270  // contain .symtab_shndx section. The section contains an array of
271  // 32-bit integers whose size is the same as the number of symbols.
272  // Nth symbol's section index is in the Nth entry of .symtab_shndx.
273  //
274  // The following variable contains the contents of .symtab_shndx.
275  // If the section does not exist (which is common), the array is empty.
276  ArrayRef<Elf_Word> shndxTable;
277
278  // .shstrtab contents.
279  StringRef sectionStringTable;
280
281  // Debugging information to retrieve source file and line for error
282  // reporting. Linker may find reasonable number of errors in a
283  // single object file, so we cache debugging information in order to
284  // parse it only once for each object file we link.
285  DWARFCache *dwarf;
286  llvm::once_flag initDwarfLine;
287};
288
289// LazyObjFile is analogous to ArchiveFile in the sense that
290// the file contains lazy symbols. The difference is that
291// LazyObjFile wraps a single file instead of multiple files.
292//
293// This class is used for --start-lib and --end-lib options which
294// instruct the linker to link object files between them with the
295// archive file semantics.
296class LazyObjFile : public InputFile {
297public:
298  LazyObjFile(MemoryBufferRef m, StringRef archiveName,
299              uint64_t offsetInArchive)
300      : InputFile(LazyObjKind, m), offsetInArchive(offsetInArchive) {
301    this->archiveName = archiveName;
302  }
303
304  static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; }
305
306  template <class ELFT> void parse();
307  void fetch();
308
309private:
310  uint64_t offsetInArchive;
311};
312
313// An ArchiveFile object represents a .a file.
314class ArchiveFile : public InputFile {
315public:
316  explicit ArchiveFile(std::unique_ptr<Archive> &&file);
317  static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
318  void parse();
319
320  // Pulls out an object file that contains a definition for Sym and
321  // returns it. If the same file was instantiated before, this
322  // function does nothing (so we don't instantiate the same file
323  // more than once.)
324  void fetch(const Archive::Symbol &sym);
325
326private:
327  std::unique_ptr<Archive> file;
328  llvm::DenseSet<uint64_t> seen;
329};
330
331class BitcodeFile : public InputFile {
332public:
333  BitcodeFile(MemoryBufferRef m, StringRef archiveName,
334              uint64_t offsetInArchive);
335  static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
336  template <class ELFT> void parse();
337  std::unique_ptr<llvm::lto::InputFile> obj;
338};
339
340// .so file.
341class SharedFile : public ELFFileBase {
342public:
343  SharedFile(MemoryBufferRef m, StringRef defaultSoName)
344      : ELFFileBase(SharedKind, m), soName(defaultSoName),
345        isNeeded(!config->asNeeded) {}
346
347  // This is actually a vector of Elf_Verdef pointers.
348  std::vector<const void *> verdefs;
349
350  // If the output file needs Elf_Verneed data structures for this file, this is
351  // a vector of Elf_Vernaux version identifiers that map onto the entries in
352  // Verdefs, otherwise it is empty.
353  std::vector<unsigned> vernauxs;
354
355  static unsigned vernauxNum;
356
357  std::vector<StringRef> dtNeeded;
358  std::string soName;
359
360  static bool classof(const InputFile *f) { return f->kind() == SharedKind; }
361
362  template <typename ELFT> void parse();
363
364  // Used for --no-allow-shlib-undefined.
365  bool allNeededIsKnown;
366
367  // Used for --as-needed
368  bool isNeeded;
369};
370
371class BinaryFile : public InputFile {
372public:
373  explicit BinaryFile(MemoryBufferRef m) : InputFile(BinaryKind, m) {}
374  static bool classof(const InputFile *f) { return f->kind() == BinaryKind; }
375  void parse();
376};
377
378InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "",
379                            uint64_t offsetInArchive = 0);
380
381inline bool isBitcode(MemoryBufferRef mb) {
382  return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
383}
384
385std::string replaceThinLTOSuffix(StringRef path);
386
387extern std::vector<BinaryFile *> binaryFiles;
388extern std::vector<BitcodeFile *> bitcodeFiles;
389extern std::vector<LazyObjFile *> lazyObjFiles;
390extern std::vector<InputFile *> objectFiles;
391extern std::vector<SharedFile *> sharedFiles;
392
393} // namespace elf
394} // namespace lld
395
396#endif
397