1//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_ELF_LINKER_SCRIPT_H
10#define LLD_ELF_LINKER_SCRIPT_H
11
12#include "Config.h"
13#include "Writer.h"
14#include "lld/Common/LLVM.h"
15#include "lld/Common/Strings.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/Support/MemoryBuffer.h"
22#include <cstddef>
23#include <cstdint>
24#include <functional>
25#include <memory>
26#include <vector>
27
28namespace lld {
29namespace elf {
30
31class Defined;
32class InputSection;
33class InputSectionBase;
34class OutputSection;
35class SectionBase;
36class Symbol;
37class ThunkSection;
38
39// This represents an r-value in the linker script.
40struct ExprValue {
41  ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
42            const Twine &loc)
43      : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {}
44
45  ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
46
47  bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
48  uint64_t getValue() const;
49  uint64_t getSecAddr() const;
50  uint64_t getSectionOffset() const;
51
52  // If a value is relative to a section, it has a non-null Sec.
53  SectionBase *sec;
54
55  // True if this expression is enclosed in ABSOLUTE().
56  // This flag affects the return value of getValue().
57  bool forceAbsolute;
58
59  uint64_t val;
60  uint64_t alignment = 1;
61
62  // The original st_type if the expression represents a symbol. Any operation
63  // resets type to STT_NOTYPE.
64  uint8_t type = llvm::ELF::STT_NOTYPE;
65
66  // Original source location. Used for error messages.
67  std::string loc;
68};
69
70// This represents an expression in the linker script.
71// ScriptParser::readExpr reads an expression and returns an Expr.
72// Later, we evaluate the expression by calling the function.
73using Expr = std::function<ExprValue()>;
74
75// This enum is used to implement linker script SECTIONS command.
76// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
77enum SectionsCommandKind {
78  AssignmentKind, // . = expr or <sym> = expr
79  OutputSectionKind,
80  InputSectionKind,
81  ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
82};
83
84struct BaseCommand {
85  BaseCommand(int k) : kind(k) {}
86  int kind;
87};
88
89// This represents ". = <expr>" or "<symbol> = <expr>".
90struct SymbolAssignment : BaseCommand {
91  SymbolAssignment(StringRef name, Expr e, std::string loc)
92      : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {}
93
94  static bool classof(const BaseCommand *c) {
95    return c->kind == AssignmentKind;
96  }
97
98  // The LHS of an expression. Name is either a symbol name or ".".
99  StringRef name;
100  Defined *sym = nullptr;
101
102  // The RHS of an expression.
103  Expr expression;
104
105  // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
106  bool provide = false;
107  bool hidden = false;
108
109  // Holds file name and line number for error reporting.
110  std::string location;
111
112  // A string representation of this command. We use this for -Map.
113  std::string commandString;
114
115  // Address of this assignment command.
116  uint64_t addr;
117
118  // Size of this assignment command. This is usually 0, but if
119  // you move '.' this may be greater than 0.
120  uint64_t size;
121};
122
123// Linker scripts allow additional constraints to be put on output sections.
124// If an output section is marked as ONLY_IF_RO, the section is created
125// only if its input sections are read-only. Likewise, an output section
126// with ONLY_IF_RW is created if all input sections are RW.
127enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
128
129// This struct is used to represent the location and size of regions of
130// target memory. Instances of the struct are created by parsing the
131// MEMORY command.
132struct MemoryRegion {
133  MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
134               uint32_t negFlags)
135      : name(std::string(name)), origin(origin), length(length), flags(flags),
136        negFlags(negFlags) {}
137
138  std::string name;
139  Expr origin;
140  Expr length;
141  uint32_t flags;
142  uint32_t negFlags;
143  uint64_t curPos = 0;
144};
145
146// This struct represents one section match pattern in SECTIONS() command.
147// It can optionally have negative match pattern for EXCLUDED_FILE command.
148// Also it may be surrounded with SORT() command, so contains sorting rules.
149struct SectionPattern {
150  SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
151      : excludedFilePat(pat1), sectionPat(pat2),
152        sortOuter(SortSectionPolicy::Default),
153        sortInner(SortSectionPolicy::Default) {}
154
155  StringMatcher excludedFilePat;
156  StringMatcher sectionPat;
157  SortSectionPolicy sortOuter;
158  SortSectionPolicy sortInner;
159};
160
161struct InputSectionDescription : BaseCommand {
162  InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
163                          uint64_t withoutFlags = 0)
164      : BaseCommand(InputSectionKind), filePat(filePattern),
165        withFlags(withFlags), withoutFlags(withoutFlags) {}
166
167  static bool classof(const BaseCommand *c) {
168    return c->kind == InputSectionKind;
169  }
170
171  SingleStringMatcher filePat;
172
173  // Input sections that matches at least one of SectionPatterns
174  // will be associated with this InputSectionDescription.
175  std::vector<SectionPattern> sectionPatterns;
176
177  // Includes InputSections and MergeInputSections. Used temporarily during
178  // assignment of input sections to output sections.
179  std::vector<InputSectionBase *> sectionBases;
180
181  // Used after the finalizeInputSections() pass. MergeInputSections have been
182  // merged into MergeSyntheticSections.
183  std::vector<InputSection *> sections;
184
185  // Temporary record of synthetic ThunkSection instances and the pass that
186  // they were created in. This is used to insert newly created ThunkSections
187  // into Sections at the end of a createThunks() pass.
188  std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections;
189
190  // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
191  uint64_t withFlags;
192  uint64_t withoutFlags;
193};
194
195// Represents BYTE(), SHORT(), LONG(), or QUAD().
196struct ByteCommand : BaseCommand {
197  ByteCommand(Expr e, unsigned size, std::string commandString)
198      : BaseCommand(ByteKind), commandString(commandString), expression(e),
199        size(size) {}
200
201  static bool classof(const BaseCommand *c) { return c->kind == ByteKind; }
202
203  // Keeps string representing the command. Used for -Map" is perhaps better.
204  std::string commandString;
205
206  Expr expression;
207
208  // This is just an offset of this assignment command in the output section.
209  unsigned offset;
210
211  // Size of this data command.
212  unsigned size;
213};
214
215struct InsertCommand {
216  OutputSection *os;
217  bool isAfter;
218  StringRef where;
219};
220
221struct PhdrsCommand {
222  StringRef name;
223  unsigned type = llvm::ELF::PT_NULL;
224  bool hasFilehdr = false;
225  bool hasPhdrs = false;
226  llvm::Optional<unsigned> flags;
227  Expr lmaExpr = nullptr;
228};
229
230class LinkerScript final {
231  // Temporary state used in processSectionCommands() and assignAddresses()
232  // that must be reinitialized for each call to the above functions, and must
233  // not be used outside of the scope of a call to the above functions.
234  struct AddressState {
235    AddressState();
236    uint64_t threadBssOffset = 0;
237    OutputSection *outSec = nullptr;
238    MemoryRegion *memRegion = nullptr;
239    MemoryRegion *lmaRegion = nullptr;
240    uint64_t lmaOffset = 0;
241  };
242
243  llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection;
244
245  void addSymbol(SymbolAssignment *cmd);
246  void assignSymbol(SymbolAssignment *cmd, bool inSec);
247  void setDot(Expr e, const Twine &loc, bool inSec);
248  void expandOutputSection(uint64_t size);
249  void expandMemoryRegions(uint64_t size);
250
251  std::vector<InputSectionBase *>
252  computeInputSections(const InputSectionDescription *,
253                       ArrayRef<InputSectionBase *>);
254
255  std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd);
256
257  void discardSynthetic(OutputSection &);
258
259  std::vector<size_t> getPhdrIndices(OutputSection *sec);
260
261  MemoryRegion *findMemoryRegion(OutputSection *sec);
262
263  void switchTo(OutputSection *sec);
264  uint64_t advance(uint64_t size, unsigned align);
265  void output(InputSection *sec);
266
267  void assignOffsets(OutputSection *sec);
268
269  // Ctx captures the local AddressState and makes it accessible
270  // deliberately. This is needed as there are some cases where we cannot just
271  // thread the current state through to a lambda function created by the
272  // script parser.
273  // This should remain a plain pointer as its lifetime is smaller than
274  // LinkerScript.
275  AddressState *ctx = nullptr;
276
277  OutputSection *aether;
278
279  uint64_t dot;
280
281public:
282  OutputSection *createOutputSection(StringRef name, StringRef location);
283  OutputSection *getOrCreateOutputSection(StringRef name);
284
285  bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
286  uint64_t getDot() { return dot; }
287  void discard(InputSectionBase *s);
288
289  ExprValue getSymbolValue(StringRef name, const Twine &loc);
290
291  void addOrphanSections();
292  void diagnoseOrphanHandling() const;
293  void adjustSectionsBeforeSorting();
294  void adjustSectionsAfterSorting();
295
296  std::vector<PhdrEntry *> createPhdrs();
297  bool needsInterpSection();
298
299  bool shouldKeep(InputSectionBase *s);
300  const Defined *assignAddresses();
301  void allocateHeaders(std::vector<PhdrEntry *> &phdrs);
302  void processSectionCommands();
303  void processSymbolAssignments();
304  void declareSymbols();
305
306  // Used to handle INSERT AFTER statements.
307  void processInsertCommands();
308
309  // SECTIONS command list.
310  std::vector<BaseCommand *> sectionCommands;
311
312  // PHDRS command list.
313  std::vector<PhdrsCommand> phdrsCommands;
314
315  bool hasSectionsCommand = false;
316  bool errorOnMissingSection = false;
317
318  // List of section patterns specified with KEEP commands. They will
319  // be kept even if they are unused and --gc-sections is specified.
320  std::vector<InputSectionDescription *> keptSections;
321
322  // A map from memory region name to a memory region descriptor.
323  llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
324
325  // A list of symbols referenced by the script.
326  std::vector<llvm::StringRef> referencedSymbols;
327
328  // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
329  // to be reordered.
330  std::vector<InsertCommand> insertCommands;
331
332  // Sections that will be warned/errored by --orphan-handling.
333  std::vector<const InputSectionBase *> orphanSections;
334};
335
336extern LinkerScript *script;
337
338} // end namespace elf
339} // end namespace lld
340
341#endif // LLD_ELF_LINKER_SCRIPT_H
342