1//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_ELF_LINKER_SCRIPT_H
10#define LLD_ELF_LINKER_SCRIPT_H
11
12#include "Config.h"
13#include "Writer.h"
14#include "lld/Common/LLVM.h"
15#include "lld/Common/Strings.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/Support/MemoryBuffer.h"
22#include <cstddef>
23#include <cstdint>
24#include <functional>
25#include <memory>
26#include <vector>
27
28namespace lld {
29namespace elf {
30
31class Defined;
32class InputSection;
33class InputSectionBase;
34class OutputSection;
35class SectionBase;
36class Symbol;
37class ThunkSection;
38
39// This represents an r-value in the linker script.
40struct ExprValue {
41  ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
42            const Twine &loc)
43      : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {}
44
45  ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
46
47  bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
48  uint64_t getValue() const;
49  uint64_t getSecAddr() const;
50  uint64_t getSectionOffset() const;
51
52  // If a value is relative to a section, it has a non-null Sec.
53  SectionBase *sec;
54
55  // True if this expression is enclosed in ABSOLUTE().
56  // This flag affects the return value of getValue().
57  bool forceAbsolute;
58
59  uint64_t val;
60  uint64_t alignment = 1;
61
62  // Original source location. Used for error messages.
63  std::string loc;
64};
65
66// This represents an expression in the linker script.
67// ScriptParser::readExpr reads an expression and returns an Expr.
68// Later, we evaluate the expression by calling the function.
69using Expr = std::function<ExprValue()>;
70
71// This enum is used to implement linker script SECTIONS command.
72// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
73enum SectionsCommandKind {
74  AssignmentKind, // . = expr or <sym> = expr
75  OutputSectionKind,
76  InputSectionKind,
77  ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
78};
79
80struct BaseCommand {
81  BaseCommand(int k) : kind(k) {}
82  int kind;
83};
84
85// This represents ". = <expr>" or "<symbol> = <expr>".
86struct SymbolAssignment : BaseCommand {
87  SymbolAssignment(StringRef name, Expr e, std::string loc)
88      : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {}
89
90  static bool classof(const BaseCommand *c) {
91    return c->kind == AssignmentKind;
92  }
93
94  // The LHS of an expression. Name is either a symbol name or ".".
95  StringRef name;
96  Defined *sym = nullptr;
97
98  // The RHS of an expression.
99  Expr expression;
100
101  // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
102  bool provide = false;
103  bool hidden = false;
104
105  // Holds file name and line number for error reporting.
106  std::string location;
107
108  // A string representation of this command. We use this for -Map.
109  std::string commandString;
110
111  // Address of this assignment command.
112  unsigned addr;
113
114  // Size of this assignment command. This is usually 0, but if
115  // you move '.' this may be greater than 0.
116  unsigned size;
117};
118
119// Linker scripts allow additional constraints to be put on output sections.
120// If an output section is marked as ONLY_IF_RO, the section is created
121// only if its input sections are read-only. Likewise, an output section
122// with ONLY_IF_RW is created if all input sections are RW.
123enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
124
125// This struct is used to represent the location and size of regions of
126// target memory. Instances of the struct are created by parsing the
127// MEMORY command.
128struct MemoryRegion {
129  MemoryRegion(StringRef name, uint64_t origin, uint64_t length, uint32_t flags,
130               uint32_t negFlags)
131      : name(name), origin(origin), length(length), flags(flags),
132        negFlags(negFlags) {}
133
134  std::string name;
135  uint64_t origin;
136  uint64_t length;
137  uint32_t flags;
138  uint32_t negFlags;
139  uint64_t curPos = 0;
140};
141
142// This struct represents one section match pattern in SECTIONS() command.
143// It can optionally have negative match pattern for EXCLUDED_FILE command.
144// Also it may be surrounded with SORT() command, so contains sorting rules.
145struct SectionPattern {
146  SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
147      : excludedFilePat(pat1), sectionPat(pat2),
148        sortOuter(SortSectionPolicy::Default),
149        sortInner(SortSectionPolicy::Default) {}
150
151  StringMatcher excludedFilePat;
152  StringMatcher sectionPat;
153  SortSectionPolicy sortOuter;
154  SortSectionPolicy sortInner;
155};
156
157struct InputSectionDescription : BaseCommand {
158  InputSectionDescription(StringRef filePattern)
159      : BaseCommand(InputSectionKind), filePat(filePattern) {}
160
161  static bool classof(const BaseCommand *c) {
162    return c->kind == InputSectionKind;
163  }
164
165  StringMatcher filePat;
166
167  // Input sections that matches at least one of SectionPatterns
168  // will be associated with this InputSectionDescription.
169  std::vector<SectionPattern> sectionPatterns;
170
171  // Includes InputSections and MergeInputSections. Used temporarily during
172  // assignment of input sections to output sections.
173  std::vector<InputSectionBase *> sectionBases;
174
175  // Used after the finalizeInputSections() pass. MergeInputSections have been
176  // merged into MergeSyntheticSections.
177  std::vector<InputSection *> sections;
178
179  // Temporary record of synthetic ThunkSection instances and the pass that
180  // they were created in. This is used to insert newly created ThunkSections
181  // into Sections at the end of a createThunks() pass.
182  std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections;
183};
184
185// Represents BYTE(), SHORT(), LONG(), or QUAD().
186struct ByteCommand : BaseCommand {
187  ByteCommand(Expr e, unsigned size, std::string commandString)
188      : BaseCommand(ByteKind), commandString(commandString), expression(e),
189        size(size) {}
190
191  static bool classof(const BaseCommand *c) { return c->kind == ByteKind; }
192
193  // Keeps string representing the command. Used for -Map" is perhaps better.
194  std::string commandString;
195
196  Expr expression;
197
198  // This is just an offset of this assignment command in the output section.
199  unsigned offset;
200
201  // Size of this data command.
202  unsigned size;
203};
204
205struct PhdrsCommand {
206  StringRef name;
207  unsigned type = llvm::ELF::PT_NULL;
208  bool hasFilehdr = false;
209  bool hasPhdrs = false;
210  llvm::Optional<unsigned> flags;
211  Expr lmaExpr = nullptr;
212};
213
214class LinkerScript final {
215  // Temporary state used in processSectionCommands() and assignAddresses()
216  // that must be reinitialized for each call to the above functions, and must
217  // not be used outside of the scope of a call to the above functions.
218  struct AddressState {
219    AddressState();
220    uint64_t threadBssOffset = 0;
221    OutputSection *outSec = nullptr;
222    MemoryRegion *memRegion = nullptr;
223    MemoryRegion *lmaRegion = nullptr;
224    uint64_t lmaOffset = 0;
225  };
226
227  llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection;
228
229  void addSymbol(SymbolAssignment *cmd);
230  void assignSymbol(SymbolAssignment *cmd, bool inSec);
231  void setDot(Expr e, const Twine &loc, bool inSec);
232  void expandOutputSection(uint64_t size);
233  void expandMemoryRegions(uint64_t size);
234
235  std::vector<InputSectionBase *>
236  computeInputSections(const InputSectionDescription *);
237
238  std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd);
239
240  std::vector<size_t> getPhdrIndices(OutputSection *sec);
241
242  MemoryRegion *findMemoryRegion(OutputSection *sec);
243
244  void switchTo(OutputSection *sec);
245  uint64_t advance(uint64_t size, unsigned align);
246  void output(InputSection *sec);
247
248  void assignOffsets(OutputSection *sec);
249
250  // Ctx captures the local AddressState and makes it accessible
251  // deliberately. This is needed as there are some cases where we cannot just
252  // thread the current state through to a lambda function created by the
253  // script parser.
254  // This should remain a plain pointer as its lifetime is smaller than
255  // LinkerScript.
256  AddressState *ctx = nullptr;
257
258  OutputSection *aether;
259
260  uint64_t dot;
261
262public:
263  OutputSection *createOutputSection(StringRef name, StringRef location);
264  OutputSection *getOrCreateOutputSection(StringRef name);
265
266  bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
267  uint64_t getDot() { return dot; }
268  void discard(InputSectionBase *s);
269
270  ExprValue getSymbolValue(StringRef name, const Twine &loc);
271
272  void addOrphanSections();
273  void adjustSectionsBeforeSorting();
274  void adjustSectionsAfterSorting();
275
276  std::vector<PhdrEntry *> createPhdrs();
277  bool needsInterpSection();
278
279  bool shouldKeep(InputSectionBase *s);
280  const Defined *assignAddresses();
281  void allocateHeaders(std::vector<PhdrEntry *> &phdrs);
282  void processSectionCommands();
283  void processSymbolAssignments();
284  void declareSymbols();
285
286  // Used to handle INSERT AFTER statements.
287  void processInsertCommands();
288
289  // SECTIONS command list.
290  std::vector<BaseCommand *> sectionCommands;
291
292  // PHDRS command list.
293  std::vector<PhdrsCommand> phdrsCommands;
294
295  bool hasSectionsCommand = false;
296  bool errorOnMissingSection = false;
297
298  // List of section patterns specified with KEEP commands. They will
299  // be kept even if they are unused and --gc-sections is specified.
300  std::vector<InputSectionDescription *> keptSections;
301
302  // A map from memory region name to a memory region descriptor.
303  llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
304
305  // A list of symbols referenced by the script.
306  std::vector<llvm::StringRef> referencedSymbols;
307
308  // Used to implement INSERT [AFTER|BEFORE]. Contains commands that need
309  // to be inserted into SECTIONS commands list.
310  llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertAfterCommands;
311  llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertBeforeCommands;
312};
313
314extern LinkerScript *script;
315
316} // end namespace elf
317} // end namespace lld
318
319#endif // LLD_ELF_LINKER_SCRIPT_H
320