1//===- Symbols.h ------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines various types of Symbols.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLD_ELF_SYMBOLS_H
14#define LLD_ELF_SYMBOLS_H
15
16#include "InputFiles.h"
17#include "InputSection.h"
18#include "lld/Common/LLVM.h"
19#include "lld/Common/Strings.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/Object/Archive.h"
22#include "llvm/Object/ELF.h"
23
24namespace lld {
25// Returns a string representation for a symbol for diagnostics.
26std::string toString(const elf::Symbol &);
27
28// There are two different ways to convert an Archive::Symbol to a string:
29// One for Microsoft name mangling and one for Itanium name mangling.
30// Call the functions toCOFFString and toELFString, not just toString.
31std::string toELFString(const llvm::object::Archive::Symbol &);
32
33namespace elf {
34class CommonSymbol;
35class Defined;
36class InputFile;
37class LazyArchive;
38class LazyObject;
39class SharedSymbol;
40class Symbol;
41class Undefined;
42
43// This is a StringRef-like container that doesn't run strlen().
44//
45// ELF string tables contain a lot of null-terminated strings. Most of them
46// are not necessary for the linker because they are names of local symbols,
47// and the linker doesn't use local symbol names for name resolution. So, we
48// use this class to represents strings read from string tables.
49struct StringRefZ {
50  StringRefZ(const char *s) : data(s), size(-1) {}
51  StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
52
53  const char *data;
54  const uint32_t size;
55};
56
57// The base class for real symbol classes.
58class Symbol {
59public:
60  enum Kind {
61    PlaceholderKind,
62    DefinedKind,
63    CommonKind,
64    SharedKind,
65    UndefinedKind,
66    LazyArchiveKind,
67    LazyObjectKind,
68  };
69
70  Kind kind() const { return static_cast<Kind>(symbolKind); }
71
72  // The file from which this symbol was created.
73  InputFile *file;
74
75protected:
76  const char *nameData;
77  mutable uint32_t nameSize;
78
79public:
80  uint32_t dynsymIndex = 0;
81  uint32_t gotIndex = -1;
82  uint32_t pltIndex = -1;
83
84  uint32_t globalDynIndex = -1;
85
86  // This field is a index to the symbol's version definition.
87  uint32_t verdefIndex = -1;
88
89  // Version definition index.
90  uint16_t versionId;
91
92  // Symbol binding. This is not overwritten by replace() to track
93  // changes during resolution. In particular:
94  //  - An undefined weak is still weak when it resolves to a shared library.
95  //  - An undefined weak will not fetch archive members, but we have to
96  //    remember it is weak.
97  uint8_t binding;
98
99  // The following fields have the same meaning as the ELF symbol attributes.
100  uint8_t type;    // symbol type
101  uint8_t stOther; // st_other field value
102
103  uint8_t symbolKind;
104
105  // Symbol visibility. This is the computed minimum visibility of all
106  // observed non-DSO symbols.
107  uint8_t visibility : 2;
108
109  // True if the symbol was used for linking and thus need to be added to the
110  // output file's symbol table. This is true for all symbols except for
111  // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
112  // are unreferenced except by other bitcode objects.
113  uint8_t isUsedInRegularObj : 1;
114
115  // Used by a Defined symbol with protected or default visibility, to record
116  // whether it is required to be exported into .dynsym. This is set when any of
117  // the following conditions hold:
118  //
119  // - If there is an interposable symbol from a DSO.
120  // - If -shared or --export-dynamic is specified, any symbol in an object
121  //   file/bitcode sets this property, unless suppressed by LTO
122  //   canBeOmittedFromSymbolTable().
123  uint8_t exportDynamic : 1;
124
125  // True if the symbol is in the --dynamic-list file. A Defined symbol with
126  // protected or default visibility with this property is required to be
127  // exported into .dynsym.
128  uint8_t inDynamicList : 1;
129
130  // False if LTO shouldn't inline whatever this symbol points to. If a symbol
131  // is overwritten after LTO, LTO shouldn't inline the symbol because it
132  // doesn't know the final contents of the symbol.
133  uint8_t canInline : 1;
134
135  // Used by Undefined and SharedSymbol to track if there has been at least one
136  // undefined reference to the symbol. The binding may change to STB_WEAK if
137  // the first undefined reference from a non-shared object is weak.
138  uint8_t referenced : 1;
139
140  // True if this symbol is specified by --trace-symbol option.
141  uint8_t traced : 1;
142
143  inline void replace(const Symbol &newSym);
144
145  bool includeInDynsym() const;
146  uint8_t computeBinding() const;
147  bool isWeak() const { return binding == llvm::ELF::STB_WEAK; }
148
149  bool isUndefined() const { return symbolKind == UndefinedKind; }
150  bool isCommon() const { return symbolKind == CommonKind; }
151  bool isDefined() const { return symbolKind == DefinedKind; }
152  bool isShared() const { return symbolKind == SharedKind; }
153  bool isPlaceholder() const { return symbolKind == PlaceholderKind; }
154
155  bool isLocal() const { return binding == llvm::ELF::STB_LOCAL; }
156
157  bool isLazy() const {
158    return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
159  }
160
161  // True if this is an undefined weak symbol. This only works once
162  // all input files have been added.
163  bool isUndefWeak() const {
164    // See comment on lazy symbols for details.
165    return isWeak() && (isUndefined() || isLazy());
166  }
167
168  StringRef getName() const {
169    if (nameSize == (uint32_t)-1)
170      nameSize = strlen(nameData);
171    return {nameData, nameSize};
172  }
173
174  void setName(StringRef s) {
175    nameData = s.data();
176    nameSize = s.size();
177  }
178
179  void parseSymbolVersion();
180
181  bool isInGot() const { return gotIndex != -1U; }
182  bool isInPlt() const { return pltIndex != -1U; }
183
184  uint64_t getVA(int64_t addend = 0) const;
185
186  uint64_t getGotOffset() const;
187  uint64_t getGotVA() const;
188  uint64_t getGotPltOffset() const;
189  uint64_t getGotPltVA() const;
190  uint64_t getPltVA() const;
191  uint64_t getSize() const;
192  OutputSection *getOutputSection() const;
193
194  // The following two functions are used for symbol resolution.
195  //
196  // You are expected to call mergeProperties for all symbols in input
197  // files so that attributes that are attached to names rather than
198  // indivisual symbol (such as visibility) are merged together.
199  //
200  // Every time you read a new symbol from an input, you are supposed
201  // to call resolve() with the new symbol. That function replaces
202  // "this" object as a result of name resolution if the new symbol is
203  // more appropriate to be included in the output.
204  //
205  // For example, if "this" is an undefined symbol and a new symbol is
206  // a defined symbol, "this" is replaced with the new symbol.
207  void mergeProperties(const Symbol &other);
208  void resolve(const Symbol &other);
209
210  // If this is a lazy symbol, fetch an input file and add the symbol
211  // in the file to the symbol table. Calling this function on
212  // non-lazy object causes a runtime error.
213  void fetch() const;
214
215private:
216  static bool isExportDynamic(Kind k, uint8_t visibility) {
217    if (k == SharedKind)
218      return visibility == llvm::ELF::STV_DEFAULT;
219    return config->shared || config->exportDynamic;
220  }
221
222  void resolveUndefined(const Undefined &other);
223  void resolveCommon(const CommonSymbol &other);
224  void resolveDefined(const Defined &other);
225  template <class LazyT> void resolveLazy(const LazyT &other);
226  void resolveShared(const SharedSymbol &other);
227
228  int compare(const Symbol *other) const;
229
230  inline size_t getSymbolSize() const;
231
232protected:
233  Symbol(Kind k, InputFile *file, StringRefZ name, uint8_t binding,
234         uint8_t stOther, uint8_t type)
235      : file(file), nameData(name.data), nameSize(name.size), binding(binding),
236        type(type), stOther(stOther), symbolKind(k), visibility(stOther & 3),
237        isUsedInRegularObj(!file || file->kind() == InputFile::ObjKind),
238        exportDynamic(isExportDynamic(k, visibility)), inDynamicList(false),
239        canInline(false), referenced(false), traced(false), needsPltAddr(false),
240        isInIplt(false), gotInIgot(false), isPreemptible(false),
241        used(!config->gcSections), needsTocRestore(false),
242        scriptDefined(false) {}
243
244public:
245  // True the symbol should point to its PLT entry.
246  // For SharedSymbol only.
247  uint8_t needsPltAddr : 1;
248
249  // True if this symbol is in the Iplt sub-section of the Plt and the Igot
250  // sub-section of the .got.plt or .got.
251  uint8_t isInIplt : 1;
252
253  // True if this symbol needs a GOT entry and its GOT entry is actually in
254  // Igot. This will be true only for certain non-preemptible ifuncs.
255  uint8_t gotInIgot : 1;
256
257  // True if this symbol is preemptible at load time.
258  uint8_t isPreemptible : 1;
259
260  // True if an undefined or shared symbol is used from a live section.
261  //
262  // NOTE: In Writer.cpp the field is used to mark local defined symbols
263  // which are referenced by relocations when -r or --emit-relocs is given.
264  uint8_t used : 1;
265
266  // True if a call to this symbol needs to be followed by a restore of the
267  // PPC64 toc pointer.
268  uint8_t needsTocRestore : 1;
269
270  // True if this symbol is defined by a linker script.
271  uint8_t scriptDefined : 1;
272
273  // The partition whose dynamic symbol table contains this symbol's definition.
274  uint8_t partition = 1;
275
276  bool isSection() const { return type == llvm::ELF::STT_SECTION; }
277  bool isTls() const { return type == llvm::ELF::STT_TLS; }
278  bool isFunc() const { return type == llvm::ELF::STT_FUNC; }
279  bool isGnuIFunc() const { return type == llvm::ELF::STT_GNU_IFUNC; }
280  bool isObject() const { return type == llvm::ELF::STT_OBJECT; }
281  bool isFile() const { return type == llvm::ELF::STT_FILE; }
282};
283
284// Represents a symbol that is defined in the current output file.
285class Defined : public Symbol {
286public:
287  Defined(InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther,
288          uint8_t type, uint64_t value, uint64_t size, SectionBase *section)
289      : Symbol(DefinedKind, file, name, binding, stOther, type), value(value),
290        size(size), section(section) {}
291
292  static bool classof(const Symbol *s) { return s->isDefined(); }
293
294  uint64_t value;
295  uint64_t size;
296  SectionBase *section;
297};
298
299// Represents a common symbol.
300//
301// On Unix, it is traditionally allowed to write variable definitions
302// without initialization expressions (such as "int foo;") to header
303// files. Such definition is called "tentative definition".
304//
305// Using tentative definition is usually considered a bad practice
306// because you should write only declarations (such as "extern int
307// foo;") to header files. Nevertheless, the linker and the compiler
308// have to do something to support bad code by allowing duplicate
309// definitions for this particular case.
310//
311// Common symbols represent variable definitions without initializations.
312// The compiler creates common symbols when it sees variable definitions
313// without initialization (you can suppress this behavior and let the
314// compiler create a regular defined symbol by -fno-common).
315//
316// The linker allows common symbols to be replaced by regular defined
317// symbols. If there are remaining common symbols after name resolution is
318// complete, they are converted to regular defined symbols in a .bss
319// section. (Therefore, the later passes don't see any CommonSymbols.)
320class CommonSymbol : public Symbol {
321public:
322  CommonSymbol(InputFile *file, StringRefZ name, uint8_t binding,
323               uint8_t stOther, uint8_t type, uint64_t alignment, uint64_t size)
324      : Symbol(CommonKind, file, name, binding, stOther, type),
325        alignment(alignment), size(size) {}
326
327  static bool classof(const Symbol *s) { return s->isCommon(); }
328
329  uint32_t alignment;
330  uint64_t size;
331};
332
333class Undefined : public Symbol {
334public:
335  Undefined(InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther,
336            uint8_t type, uint32_t discardedSecIdx = 0)
337      : Symbol(UndefinedKind, file, name, binding, stOther, type),
338        discardedSecIdx(discardedSecIdx) {}
339
340  static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
341
342  // The section index if in a discarded section, 0 otherwise.
343  uint32_t discardedSecIdx;
344};
345
346class SharedSymbol : public Symbol {
347public:
348  static bool classof(const Symbol *s) { return s->kind() == SharedKind; }
349
350  SharedSymbol(InputFile &file, StringRef name, uint8_t binding,
351               uint8_t stOther, uint8_t type, uint64_t value, uint64_t size,
352               uint32_t alignment, uint32_t verdefIndex)
353      : Symbol(SharedKind, &file, name, binding, stOther, type), value(value),
354        size(size), alignment(alignment) {
355    this->verdefIndex = verdefIndex;
356    // GNU ifunc is a mechanism to allow user-supplied functions to
357    // resolve PLT slot values at load-time. This is contrary to the
358    // regular symbol resolution scheme in which symbols are resolved just
359    // by name. Using this hook, you can program how symbols are solved
360    // for you program. For example, you can make "memcpy" to be resolved
361    // to a SSE-enabled version of memcpy only when a machine running the
362    // program supports the SSE instruction set.
363    //
364    // Naturally, such symbols should always be called through their PLT
365    // slots. What GNU ifunc symbols point to are resolver functions, and
366    // calling them directly doesn't make sense (unless you are writing a
367    // loader).
368    //
369    // For DSO symbols, we always call them through PLT slots anyway.
370    // So there's no difference between GNU ifunc and regular function
371    // symbols if they are in DSOs. So we can handle GNU_IFUNC as FUNC.
372    if (this->type == llvm::ELF::STT_GNU_IFUNC)
373      this->type = llvm::ELF::STT_FUNC;
374  }
375
376  SharedFile &getFile() const { return *cast<SharedFile>(file); }
377
378  uint64_t value; // st_value
379  uint64_t size;  // st_size
380  uint32_t alignment;
381};
382
383// LazyArchive and LazyObject represent a symbols that is not yet in the link,
384// but we know where to find it if needed. If the resolver finds both Undefined
385// and Lazy for the same name, it will ask the Lazy to load a file.
386//
387// A special complication is the handling of weak undefined symbols. They should
388// not load a file, but we have to remember we have seen both the weak undefined
389// and the lazy. We represent that with a lazy symbol with a weak binding. This
390// means that code looking for undefined symbols normally also has to take lazy
391// symbols into consideration.
392
393// This class represents a symbol defined in an archive file. It is
394// created from an archive file header, and it knows how to load an
395// object file from an archive to replace itself with a defined
396// symbol.
397class LazyArchive : public Symbol {
398public:
399  LazyArchive(InputFile &file, const llvm::object::Archive::Symbol s)
400      : Symbol(LazyArchiveKind, &file, s.getName(), llvm::ELF::STB_GLOBAL,
401               llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE),
402        sym(s) {}
403
404  static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
405
406  MemoryBufferRef getMemberBuffer();
407
408  const llvm::object::Archive::Symbol sym;
409};
410
411// LazyObject symbols represents symbols in object files between
412// --start-lib and --end-lib options.
413class LazyObject : public Symbol {
414public:
415  LazyObject(InputFile &file, StringRef name)
416      : Symbol(LazyObjectKind, &file, name, llvm::ELF::STB_GLOBAL,
417               llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {}
418
419  static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
420};
421
422// Some linker-generated symbols need to be created as
423// Defined symbols.
424struct ElfSym {
425  // __bss_start
426  static Defined *bss;
427
428  // etext and _etext
429  static Defined *etext1;
430  static Defined *etext2;
431
432  // edata and _edata
433  static Defined *edata1;
434  static Defined *edata2;
435
436  // end and _end
437  static Defined *end1;
438  static Defined *end2;
439
440  // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to
441  // be at some offset from the base of the .got section, usually 0 or
442  // the end of the .got.
443  static Defined *globalOffsetTable;
444
445  // _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS.
446  static Defined *mipsGp;
447  static Defined *mipsGpDisp;
448  static Defined *mipsLocalGp;
449
450  // __rel{,a}_iplt_{start,end} symbols.
451  static Defined *relaIpltStart;
452  static Defined *relaIpltEnd;
453
454  // __global_pointer$ for RISC-V.
455  static Defined *riscvGlobalPointer;
456
457  // _TLS_MODULE_BASE_ on targets that support TLSDESC.
458  static Defined *tlsModuleBase;
459};
460
461// A buffer class that is large enough to hold any Symbol-derived
462// object. We allocate memory using this class and instantiate a symbol
463// using the placement new.
464union SymbolUnion {
465  alignas(Defined) char a[sizeof(Defined)];
466  alignas(CommonSymbol) char b[sizeof(CommonSymbol)];
467  alignas(Undefined) char c[sizeof(Undefined)];
468  alignas(SharedSymbol) char d[sizeof(SharedSymbol)];
469  alignas(LazyArchive) char e[sizeof(LazyArchive)];
470  alignas(LazyObject) char f[sizeof(LazyObject)];
471};
472
473// It is important to keep the size of SymbolUnion small for performance and
474// memory usage reasons. 80 bytes is a soft limit based on the size of Defined
475// on a 64-bit system.
476static_assert(sizeof(SymbolUnion) <= 80, "SymbolUnion too large");
477
478template <typename T> struct AssertSymbol {
479  static_assert(std::is_trivially_destructible<T>(),
480                "Symbol types must be trivially destructible");
481  static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
482  static_assert(alignof(T) <= alignof(SymbolUnion),
483                "SymbolUnion not aligned enough");
484};
485
486static inline void assertSymbols() {
487  AssertSymbol<Defined>();
488  AssertSymbol<CommonSymbol>();
489  AssertSymbol<Undefined>();
490  AssertSymbol<SharedSymbol>();
491  AssertSymbol<LazyArchive>();
492  AssertSymbol<LazyObject>();
493}
494
495void printTraceSymbol(const Symbol *sym);
496
497size_t Symbol::getSymbolSize() const {
498  switch (kind()) {
499  case CommonKind:
500    return sizeof(CommonSymbol);
501  case DefinedKind:
502    return sizeof(Defined);
503  case LazyArchiveKind:
504    return sizeof(LazyArchive);
505  case LazyObjectKind:
506    return sizeof(LazyObject);
507  case SharedKind:
508    return sizeof(SharedSymbol);
509  case UndefinedKind:
510    return sizeof(Undefined);
511  case PlaceholderKind:
512    return sizeof(Symbol);
513  }
514  llvm_unreachable("unknown symbol kind");
515}
516
517// replace() replaces "this" object with a given symbol by memcpy'ing
518// it over to "this". This function is called as a result of name
519// resolution, e.g. to replace an undefind symbol with a defined symbol.
520void Symbol::replace(const Symbol &newSym) {
521  using llvm::ELF::STT_TLS;
522
523  // st_value of STT_TLS represents the assigned offset, not the actual address
524  // which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can only be
525  // referenced by special TLS relocations. It is usually an error if a STT_TLS
526  // symbol is replaced by a non-STT_TLS symbol, vice versa. There are two
527  // exceptions: (a) a STT_NOTYPE lazy/undefined symbol can be replaced by a
528  // STT_TLS symbol, (b) a STT_TLS undefined symbol can be replaced by a
529  // STT_NOTYPE lazy symbol.
530  if (symbolKind != PlaceholderKind && !newSym.isLazy() &&
531      (type == STT_TLS) != (newSym.type == STT_TLS) &&
532      type != llvm::ELF::STT_NOTYPE)
533    error("TLS attribute mismatch: " + toString(*this) + "\n>>> defined in " +
534          toString(newSym.file) + "\n>>> defined in " + toString(file));
535
536  Symbol old = *this;
537  memcpy(this, &newSym, newSym.getSymbolSize());
538
539  // old may be a placeholder. The referenced fields must be initialized in
540  // SymbolTable::insert.
541  versionId = old.versionId;
542  visibility = old.visibility;
543  isUsedInRegularObj = old.isUsedInRegularObj;
544  exportDynamic = old.exportDynamic;
545  inDynamicList = old.inDynamicList;
546  canInline = old.canInline;
547  referenced = old.referenced;
548  traced = old.traced;
549  isPreemptible = old.isPreemptible;
550  scriptDefined = old.scriptDefined;
551  partition = old.partition;
552
553  // Symbol length is computed lazily. If we already know a symbol length,
554  // propagate it.
555  if (nameData == old.nameData && nameSize == 0 && old.nameSize != 0)
556    nameSize = old.nameSize;
557
558  // Print out a log message if --trace-symbol was specified.
559  // This is for debugging.
560  if (traced)
561    printTraceSymbol(this);
562}
563
564void maybeWarnUnorderableSymbol(const Symbol *sym);
565bool computeIsPreemptible(const Symbol &sym);
566void reportBackrefs();
567
568// A mapping from a symbol to an InputFile referencing it backward. Used by
569// --warn-backrefs.
570extern llvm::DenseMap<const Symbol *, const InputFile *> backwardReferences;
571
572} // namespace elf
573} // namespace lld
574
575#endif
576