LinkerScript.h revision 341825
1//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2//
3//                             The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLD_ELF_LINKER_SCRIPT_H
11#define LLD_ELF_LINKER_SCRIPT_H
12
13#include "Config.h"
14#include "Writer.h"
15#include "lld/Common/LLVM.h"
16#include "lld/Common/Strings.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/Support/MemoryBuffer.h"
23#include <cstddef>
24#include <cstdint>
25#include <functional>
26#include <memory>
27#include <vector>
28
29namespace lld {
30namespace elf {
31
32class Defined;
33class Symbol;
34class InputSectionBase;
35class InputSection;
36class OutputSection;
37class InputSectionBase;
38class SectionBase;
39
40// This represents an r-value in the linker script.
41struct ExprValue {
42  ExprValue(SectionBase *Sec, bool ForceAbsolute, uint64_t Val,
43            const Twine &Loc)
44      : Sec(Sec), ForceAbsolute(ForceAbsolute), Val(Val), Loc(Loc.str()) {}
45
46  ExprValue(uint64_t Val) : ExprValue(nullptr, false, Val, "") {}
47
48  bool isAbsolute() const { return ForceAbsolute || Sec == nullptr; }
49  uint64_t getValue() const;
50  uint64_t getSecAddr() const;
51  uint64_t getSectionOffset() const;
52
53  // If a value is relative to a section, it has a non-null Sec.
54  SectionBase *Sec;
55
56  // True if this expression is enclosed in ABSOLUTE().
57  // This flag affects the return value of getValue().
58  bool ForceAbsolute;
59
60  uint64_t Val;
61  uint64_t Alignment = 1;
62
63  // Original source location. Used for error messages.
64  std::string Loc;
65};
66
67// This represents an expression in the linker script.
68// ScriptParser::readExpr reads an expression and returns an Expr.
69// Later, we evaluate the expression by calling the function.
70typedef std::function<ExprValue()> Expr;
71
72// This enum is used to implement linker script SECTIONS command.
73// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
74enum SectionsCommandKind {
75  AssignmentKind, // . = expr or <sym> = expr
76  OutputSectionKind,
77  InputSectionKind,
78  ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
79};
80
81struct BaseCommand {
82  BaseCommand(int K) : Kind(K) {}
83  int Kind;
84};
85
86// This represents ". = <expr>" or "<symbol> = <expr>".
87struct SymbolAssignment : BaseCommand {
88  SymbolAssignment(StringRef Name, Expr E, std::string Loc)
89      : BaseCommand(AssignmentKind), Name(Name), Expression(E), Location(Loc) {}
90
91  static bool classof(const BaseCommand *C) {
92    return C->Kind == AssignmentKind;
93  }
94
95  // The LHS of an expression. Name is either a symbol name or ".".
96  StringRef Name;
97  Defined *Sym = nullptr;
98
99  // The RHS of an expression.
100  Expr Expression;
101
102  // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
103  bool Provide = false;
104  bool Hidden = false;
105
106  // Holds file name and line number for error reporting.
107  std::string Location;
108
109  // A string representation of this command. We use this for -Map.
110  std::string CommandString;
111
112  // Address of this assignment command.
113  unsigned Addr;
114
115  // Size of this assignment command. This is usually 0, but if
116  // you move '.' this may be greater than 0.
117  unsigned Size;
118};
119
120// Linker scripts allow additional constraints to be put on ouput sections.
121// If an output section is marked as ONLY_IF_RO, the section is created
122// only if its input sections are read-only. Likewise, an output section
123// with ONLY_IF_RW is created if all input sections are RW.
124enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
125
126// This struct is used to represent the location and size of regions of
127// target memory. Instances of the struct are created by parsing the
128// MEMORY command.
129struct MemoryRegion {
130  MemoryRegion(StringRef Name, uint64_t Origin, uint64_t Length, uint32_t Flags,
131               uint32_t NegFlags)
132      : Name(Name), Origin(Origin), Length(Length), Flags(Flags),
133        NegFlags(NegFlags) {}
134
135  std::string Name;
136  uint64_t Origin;
137  uint64_t Length;
138  uint32_t Flags;
139  uint32_t NegFlags;
140  uint64_t CurPos = 0;
141};
142
143// This struct represents one section match pattern in SECTIONS() command.
144// It can optionally have negative match pattern for EXCLUDED_FILE command.
145// Also it may be surrounded with SORT() command, so contains sorting rules.
146struct SectionPattern {
147  SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2)
148      : ExcludedFilePat(Pat1), SectionPat(Pat2) {}
149
150  StringMatcher ExcludedFilePat;
151  StringMatcher SectionPat;
152  SortSectionPolicy SortOuter;
153  SortSectionPolicy SortInner;
154};
155
156class ThunkSection;
157struct InputSectionDescription : BaseCommand {
158  InputSectionDescription(StringRef FilePattern)
159      : BaseCommand(InputSectionKind), FilePat(FilePattern) {}
160
161  static bool classof(const BaseCommand *C) {
162    return C->Kind == InputSectionKind;
163  }
164
165  StringMatcher FilePat;
166
167  // Input sections that matches at least one of SectionPatterns
168  // will be associated with this InputSectionDescription.
169  std::vector<SectionPattern> SectionPatterns;
170
171  std::vector<InputSection *> Sections;
172
173  // Temporary record of synthetic ThunkSection instances and the pass that
174  // they were created in. This is used to insert newly created ThunkSections
175  // into Sections at the end of a createThunks() pass.
176  std::vector<std::pair<ThunkSection *, uint32_t>> ThunkSections;
177};
178
179// Represents BYTE(), SHORT(), LONG(), or QUAD().
180struct ByteCommand : BaseCommand {
181  ByteCommand(Expr E, unsigned Size, std::string CommandString)
182      : BaseCommand(ByteKind), CommandString(CommandString), Expression(E),
183        Size(Size) {}
184
185  static bool classof(const BaseCommand *C) { return C->Kind == ByteKind; }
186
187  // Keeps string representing the command. Used for -Map" is perhaps better.
188  std::string CommandString;
189
190  Expr Expression;
191
192  // This is just an offset of this assignment command in the output section.
193  unsigned Offset;
194
195  // Size of this data command.
196  unsigned Size;
197};
198
199struct PhdrsCommand {
200  StringRef Name;
201  unsigned Type = llvm::ELF::PT_NULL;
202  bool HasFilehdr = false;
203  bool HasPhdrs = false;
204  llvm::Optional<unsigned> Flags;
205  Expr LMAExpr = nullptr;
206};
207
208class LinkerScript final {
209  // Temporary state used in processSectionCommands() and assignAddresses()
210  // that must be reinitialized for each call to the above functions, and must
211  // not be used outside of the scope of a call to the above functions.
212  struct AddressState {
213    AddressState();
214    uint64_t ThreadBssOffset = 0;
215    OutputSection *OutSec = nullptr;
216    MemoryRegion *MemRegion = nullptr;
217    MemoryRegion *LMARegion = nullptr;
218    uint64_t LMAOffset = 0;
219  };
220
221  llvm::DenseMap<StringRef, OutputSection *> NameToOutputSection;
222
223  void addSymbol(SymbolAssignment *Cmd);
224  void assignSymbol(SymbolAssignment *Cmd, bool InSec);
225  void setDot(Expr E, const Twine &Loc, bool InSec);
226  void expandOutputSection(uint64_t Size);
227  void expandMemoryRegions(uint64_t Size);
228
229  std::vector<InputSection *>
230  computeInputSections(const InputSectionDescription *);
231
232  std::vector<InputSection *> createInputSectionList(OutputSection &Cmd);
233
234  std::vector<size_t> getPhdrIndices(OutputSection *Sec);
235
236  MemoryRegion *findMemoryRegion(OutputSection *Sec);
237
238  void switchTo(OutputSection *Sec);
239  uint64_t advance(uint64_t Size, unsigned Align);
240  void output(InputSection *Sec);
241
242  void assignOffsets(OutputSection *Sec);
243
244  // Ctx captures the local AddressState and makes it accessible
245  // deliberately. This is needed as there are some cases where we cannot just
246  // thread the current state through to a lambda function created by the
247  // script parser.
248  // This should remain a plain pointer as its lifetime is smaller than
249  // LinkerScript.
250  AddressState *Ctx = nullptr;
251
252  OutputSection *Aether;
253
254  uint64_t Dot;
255
256public:
257  OutputSection *createOutputSection(StringRef Name, StringRef Location);
258  OutputSection *getOrCreateOutputSection(StringRef Name);
259
260  bool hasPhdrsCommands() { return !PhdrsCommands.empty(); }
261  uint64_t getDot() { return Dot; }
262  void discard(ArrayRef<InputSection *> V);
263
264  ExprValue getSymbolValue(StringRef Name, const Twine &Loc);
265
266  void addOrphanSections();
267  void adjustSectionsBeforeSorting();
268  void adjustSectionsAfterSorting();
269
270  std::vector<PhdrEntry *> createPhdrs();
271  bool needsInterpSection();
272
273  bool shouldKeep(InputSectionBase *S);
274  void assignAddresses();
275  void allocateHeaders(std::vector<PhdrEntry *> &Phdrs);
276  void processSectionCommands();
277  void declareSymbols();
278
279  // Used to handle INSERT AFTER statements.
280  void processInsertCommands();
281
282  // SECTIONS command list.
283  std::vector<BaseCommand *> SectionCommands;
284
285  // PHDRS command list.
286  std::vector<PhdrsCommand> PhdrsCommands;
287
288  bool HasSectionsCommand = false;
289  bool ErrorOnMissingSection = false;
290
291  // List of section patterns specified with KEEP commands. They will
292  // be kept even if they are unused and --gc-sections is specified.
293  std::vector<InputSectionDescription *> KeptSections;
294
295  // A map from memory region name to a memory region descriptor.
296  llvm::MapVector<llvm::StringRef, MemoryRegion *> MemoryRegions;
297
298  // A list of symbols referenced by the script.
299  std::vector<llvm::StringRef> ReferencedSymbols;
300
301  // Used to implement INSERT [AFTER|BEFORE]. Contains commands that need
302  // to be inserted into SECTIONS commands list.
303  llvm::DenseMap<StringRef, std::vector<BaseCommand *>> InsertAfterCommands;
304  llvm::DenseMap<StringRef, std::vector<BaseCommand *>> InsertBeforeCommands;
305};
306
307extern LinkerScript *Script;
308
309} // end namespace elf
310} // end namespace lld
311
312#endif // LLD_ELF_LINKER_SCRIPT_H
313