LinkerScript.h revision 344779
1//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2//
3//                             The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLD_ELF_LINKER_SCRIPT_H
11#define LLD_ELF_LINKER_SCRIPT_H
12
13#include "Config.h"
14#include "Writer.h"
15#include "lld/Common/LLVM.h"
16#include "lld/Common/Strings.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/Support/MemoryBuffer.h"
23#include <cstddef>
24#include <cstdint>
25#include <functional>
26#include <memory>
27#include <vector>
28
29namespace lld {
30namespace elf {
31
32class Defined;
33class InputSection;
34class InputSectionBase;
35class InputSectionBase;
36class OutputSection;
37class SectionBase;
38class Symbol;
39class ThunkSection;
40
41// This represents an r-value in the linker script.
42struct ExprValue {
43  ExprValue(SectionBase *Sec, bool ForceAbsolute, uint64_t Val,
44            const Twine &Loc)
45      : Sec(Sec), ForceAbsolute(ForceAbsolute), Val(Val), Loc(Loc.str()) {}
46
47  ExprValue(uint64_t Val) : ExprValue(nullptr, false, Val, "") {}
48
49  bool isAbsolute() const { return ForceAbsolute || Sec == nullptr; }
50  uint64_t getValue() const;
51  uint64_t getSecAddr() const;
52  uint64_t getSectionOffset() const;
53
54  // If a value is relative to a section, it has a non-null Sec.
55  SectionBase *Sec;
56
57  // True if this expression is enclosed in ABSOLUTE().
58  // This flag affects the return value of getValue().
59  bool ForceAbsolute;
60
61  uint64_t Val;
62  uint64_t Alignment = 1;
63
64  // Original source location. Used for error messages.
65  std::string Loc;
66};
67
68// This represents an expression in the linker script.
69// ScriptParser::readExpr reads an expression and returns an Expr.
70// Later, we evaluate the expression by calling the function.
71typedef std::function<ExprValue()> Expr;
72
73// This enum is used to implement linker script SECTIONS command.
74// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
75enum SectionsCommandKind {
76  AssignmentKind, // . = expr or <sym> = expr
77  OutputSectionKind,
78  InputSectionKind,
79  ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
80};
81
82struct BaseCommand {
83  BaseCommand(int K) : Kind(K) {}
84  int Kind;
85};
86
87// This represents ". = <expr>" or "<symbol> = <expr>".
88struct SymbolAssignment : BaseCommand {
89  SymbolAssignment(StringRef Name, Expr E, std::string Loc)
90      : BaseCommand(AssignmentKind), Name(Name), Expression(E), Location(Loc) {}
91
92  static bool classof(const BaseCommand *C) {
93    return C->Kind == AssignmentKind;
94  }
95
96  // The LHS of an expression. Name is either a symbol name or ".".
97  StringRef Name;
98  Defined *Sym = nullptr;
99
100  // The RHS of an expression.
101  Expr Expression;
102
103  // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
104  bool Provide = false;
105  bool Hidden = false;
106
107  // Holds file name and line number for error reporting.
108  std::string Location;
109
110  // A string representation of this command. We use this for -Map.
111  std::string CommandString;
112
113  // Address of this assignment command.
114  unsigned Addr;
115
116  // Size of this assignment command. This is usually 0, but if
117  // you move '.' this may be greater than 0.
118  unsigned Size;
119};
120
121// Linker scripts allow additional constraints to be put on ouput sections.
122// If an output section is marked as ONLY_IF_RO, the section is created
123// only if its input sections are read-only. Likewise, an output section
124// with ONLY_IF_RW is created if all input sections are RW.
125enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
126
127// This struct is used to represent the location and size of regions of
128// target memory. Instances of the struct are created by parsing the
129// MEMORY command.
130struct MemoryRegion {
131  MemoryRegion(StringRef Name, uint64_t Origin, uint64_t Length, uint32_t Flags,
132               uint32_t NegFlags)
133      : Name(Name), Origin(Origin), Length(Length), Flags(Flags),
134        NegFlags(NegFlags) {}
135
136  std::string Name;
137  uint64_t Origin;
138  uint64_t Length;
139  uint32_t Flags;
140  uint32_t NegFlags;
141  uint64_t CurPos = 0;
142};
143
144// This struct represents one section match pattern in SECTIONS() command.
145// It can optionally have negative match pattern for EXCLUDED_FILE command.
146// Also it may be surrounded with SORT() command, so contains sorting rules.
147struct SectionPattern {
148  SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2)
149      : ExcludedFilePat(Pat1), SectionPat(Pat2),
150        SortOuter(SortSectionPolicy::Default),
151        SortInner(SortSectionPolicy::Default) {}
152
153  StringMatcher ExcludedFilePat;
154  StringMatcher SectionPat;
155  SortSectionPolicy SortOuter;
156  SortSectionPolicy SortInner;
157};
158
159struct InputSectionDescription : BaseCommand {
160  InputSectionDescription(StringRef FilePattern)
161      : BaseCommand(InputSectionKind), FilePat(FilePattern) {}
162
163  static bool classof(const BaseCommand *C) {
164    return C->Kind == InputSectionKind;
165  }
166
167  StringMatcher FilePat;
168
169  // Input sections that matches at least one of SectionPatterns
170  // will be associated with this InputSectionDescription.
171  std::vector<SectionPattern> SectionPatterns;
172
173  std::vector<InputSection *> Sections;
174
175  // Temporary record of synthetic ThunkSection instances and the pass that
176  // they were created in. This is used to insert newly created ThunkSections
177  // into Sections at the end of a createThunks() pass.
178  std::vector<std::pair<ThunkSection *, uint32_t>> ThunkSections;
179};
180
181// Represents BYTE(), SHORT(), LONG(), or QUAD().
182struct ByteCommand : BaseCommand {
183  ByteCommand(Expr E, unsigned Size, std::string CommandString)
184      : BaseCommand(ByteKind), CommandString(CommandString), Expression(E),
185        Size(Size) {}
186
187  static bool classof(const BaseCommand *C) { return C->Kind == ByteKind; }
188
189  // Keeps string representing the command. Used for -Map" is perhaps better.
190  std::string CommandString;
191
192  Expr Expression;
193
194  // This is just an offset of this assignment command in the output section.
195  unsigned Offset;
196
197  // Size of this data command.
198  unsigned Size;
199};
200
201struct PhdrsCommand {
202  StringRef Name;
203  unsigned Type = llvm::ELF::PT_NULL;
204  bool HasFilehdr = false;
205  bool HasPhdrs = false;
206  llvm::Optional<unsigned> Flags;
207  Expr LMAExpr = nullptr;
208};
209
210class LinkerScript final {
211  // Temporary state used in processSectionCommands() and assignAddresses()
212  // that must be reinitialized for each call to the above functions, and must
213  // not be used outside of the scope of a call to the above functions.
214  struct AddressState {
215    AddressState();
216    uint64_t ThreadBssOffset = 0;
217    OutputSection *OutSec = nullptr;
218    MemoryRegion *MemRegion = nullptr;
219    MemoryRegion *LMARegion = nullptr;
220    uint64_t LMAOffset = 0;
221  };
222
223  llvm::DenseMap<StringRef, OutputSection *> NameToOutputSection;
224
225  void addSymbol(SymbolAssignment *Cmd);
226  void assignSymbol(SymbolAssignment *Cmd, bool InSec);
227  void setDot(Expr E, const Twine &Loc, bool InSec);
228  void expandOutputSection(uint64_t Size);
229  void expandMemoryRegions(uint64_t Size);
230
231  std::vector<InputSection *>
232  computeInputSections(const InputSectionDescription *);
233
234  std::vector<InputSection *> createInputSectionList(OutputSection &Cmd);
235
236  std::vector<size_t> getPhdrIndices(OutputSection *Sec);
237
238  MemoryRegion *findMemoryRegion(OutputSection *Sec);
239
240  void switchTo(OutputSection *Sec);
241  uint64_t advance(uint64_t Size, unsigned Align);
242  void output(InputSection *Sec);
243
244  void assignOffsets(OutputSection *Sec);
245
246  // Ctx captures the local AddressState and makes it accessible
247  // deliberately. This is needed as there are some cases where we cannot just
248  // thread the current state through to a lambda function created by the
249  // script parser.
250  // This should remain a plain pointer as its lifetime is smaller than
251  // LinkerScript.
252  AddressState *Ctx = nullptr;
253
254  OutputSection *Aether;
255
256  uint64_t Dot;
257
258public:
259  OutputSection *createOutputSection(StringRef Name, StringRef Location);
260  OutputSection *getOrCreateOutputSection(StringRef Name);
261
262  bool hasPhdrsCommands() { return !PhdrsCommands.empty(); }
263  uint64_t getDot() { return Dot; }
264  void discard(ArrayRef<InputSection *> V);
265
266  ExprValue getSymbolValue(StringRef Name, const Twine &Loc);
267
268  void addOrphanSections();
269  void adjustSectionsBeforeSorting();
270  void adjustSectionsAfterSorting();
271
272  std::vector<PhdrEntry *> createPhdrs();
273  bool needsInterpSection();
274
275  bool shouldKeep(InputSectionBase *S);
276  void assignAddresses();
277  void allocateHeaders(std::vector<PhdrEntry *> &Phdrs);
278  void processSectionCommands();
279  void declareSymbols();
280
281  // Used to handle INSERT AFTER statements.
282  void processInsertCommands();
283
284  // SECTIONS command list.
285  std::vector<BaseCommand *> SectionCommands;
286
287  // PHDRS command list.
288  std::vector<PhdrsCommand> PhdrsCommands;
289
290  bool HasSectionsCommand = false;
291  bool ErrorOnMissingSection = false;
292
293  // List of section patterns specified with KEEP commands. They will
294  // be kept even if they are unused and --gc-sections is specified.
295  std::vector<InputSectionDescription *> KeptSections;
296
297  // A map from memory region name to a memory region descriptor.
298  llvm::MapVector<llvm::StringRef, MemoryRegion *> MemoryRegions;
299
300  // A list of symbols referenced by the script.
301  std::vector<llvm::StringRef> ReferencedSymbols;
302
303  // Used to implement INSERT [AFTER|BEFORE]. Contains commands that need
304  // to be inserted into SECTIONS commands list.
305  llvm::DenseMap<StringRef, std::vector<BaseCommand *>> InsertAfterCommands;
306  llvm::DenseMap<StringRef, std::vector<BaseCommand *>> InsertBeforeCommands;
307};
308
309extern LinkerScript *Script;
310
311} // end namespace elf
312} // end namespace lld
313
314#endif // LLD_ELF_LINKER_SCRIPT_H
315