1//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_ELF_LINKER_SCRIPT_H
10#define LLD_ELF_LINKER_SCRIPT_H
11
12#include "Config.h"
13#include "Writer.h"
14#include "lld/Common/LLVM.h"
15#include "lld/Common/Strings.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/Compiler.h"
21#include <cstddef>
22#include <cstdint>
23#include <functional>
24#include <memory>
25
26namespace lld::elf {
27
28class Defined;
29class InputFile;
30class InputSection;
31class InputSectionBase;
32class OutputSection;
33class SectionBase;
34class ThunkSection;
35struct OutputDesc;
36
37// This represents an r-value in the linker script.
38struct ExprValue {
39  ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
40            const Twine &loc)
41      : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
42
43  ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
44
45  bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
46  uint64_t getValue() const;
47  uint64_t getSecAddr() const;
48  uint64_t getSectionOffset() const;
49
50  // If a value is relative to a section, it has a non-null Sec.
51  SectionBase *sec;
52
53  uint64_t val;
54  uint64_t alignment = 1;
55
56  // The original st_type if the expression represents a symbol. Any operation
57  // resets type to STT_NOTYPE.
58  uint8_t type = llvm::ELF::STT_NOTYPE;
59
60  // True if this expression is enclosed in ABSOLUTE().
61  // This flag affects the return value of getValue().
62  bool forceAbsolute;
63
64  // Original source location. Used for error messages.
65  std::string loc;
66};
67
68// This represents an expression in the linker script.
69// ScriptParser::readExpr reads an expression and returns an Expr.
70// Later, we evaluate the expression by calling the function.
71using Expr = std::function<ExprValue()>;
72
73// This enum is used to implement linker script SECTIONS command.
74// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
75enum SectionsCommandKind {
76  AssignmentKind, // . = expr or <sym> = expr
77  OutputSectionKind,
78  InputSectionKind,
79  ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
80};
81
82struct SectionCommand {
83  SectionCommand(int k) : kind(k) {}
84  int kind;
85};
86
87// This represents ". = <expr>" or "<symbol> = <expr>".
88struct SymbolAssignment : SectionCommand {
89  SymbolAssignment(StringRef name, Expr e, std::string loc)
90      : SectionCommand(AssignmentKind), name(name), expression(e),
91        location(loc) {}
92
93  static bool classof(const SectionCommand *c) {
94    return c->kind == AssignmentKind;
95  }
96
97  // The LHS of an expression. Name is either a symbol name or ".".
98  StringRef name;
99  Defined *sym = nullptr;
100
101  // The RHS of an expression.
102  Expr expression;
103
104  // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
105  bool provide = false;
106  bool hidden = false;
107
108  // Holds file name and line number for error reporting.
109  std::string location;
110
111  // A string representation of this command. We use this for -Map.
112  std::string commandString;
113
114  // Address of this assignment command.
115  uint64_t addr;
116
117  // Size of this assignment command. This is usually 0, but if
118  // you move '.' this may be greater than 0.
119  uint64_t size;
120};
121
122// Linker scripts allow additional constraints to be put on output sections.
123// If an output section is marked as ONLY_IF_RO, the section is created
124// only if its input sections are read-only. Likewise, an output section
125// with ONLY_IF_RW is created if all input sections are RW.
126enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
127
128// This struct is used to represent the location and size of regions of
129// target memory. Instances of the struct are created by parsing the
130// MEMORY command.
131struct MemoryRegion {
132  MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
133               uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
134      : name(std::string(name)), origin(origin), length(length), flags(flags),
135        invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
136
137  std::string name;
138  Expr origin;
139  Expr length;
140  // A section can be assigned to the region if any of these ELF section flags
141  // are set...
142  uint32_t flags;
143  // ... or any of these flags are not set.
144  // For example, the memory region attribute "r" maps to SHF_WRITE.
145  uint32_t invFlags;
146  // A section cannot be assigned to the region if any of these ELF section
147  // flags are set...
148  uint32_t negFlags;
149  // ... or any of these flags are not set.
150  // For example, the memory region attribute "!r" maps to SHF_WRITE.
151  uint32_t negInvFlags;
152  uint64_t curPos = 0;
153
154  bool compatibleWith(uint32_t secFlags) const {
155    if ((secFlags & negFlags) || (~secFlags & negInvFlags))
156      return false;
157    return (secFlags & flags) || (~secFlags & invFlags);
158  }
159};
160
161// This struct represents one section match pattern in SECTIONS() command.
162// It can optionally have negative match pattern for EXCLUDED_FILE command.
163// Also it may be surrounded with SORT() command, so contains sorting rules.
164class SectionPattern {
165  StringMatcher excludedFilePat;
166
167  // Cache of the most recent input argument and result of excludesFile().
168  mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache;
169
170public:
171  SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
172      : excludedFilePat(pat1), sectionPat(pat2),
173        sortOuter(SortSectionPolicy::Default),
174        sortInner(SortSectionPolicy::Default) {}
175
176  bool excludesFile(const InputFile *file) const;
177
178  StringMatcher sectionPat;
179  SortSectionPolicy sortOuter;
180  SortSectionPolicy sortInner;
181};
182
183class InputSectionDescription : public SectionCommand {
184  SingleStringMatcher filePat;
185
186  // Cache of the most recent input argument and result of matchesFile().
187  mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache;
188
189public:
190  InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
191                          uint64_t withoutFlags = 0)
192      : SectionCommand(InputSectionKind), filePat(filePattern),
193        withFlags(withFlags), withoutFlags(withoutFlags) {}
194
195  static bool classof(const SectionCommand *c) {
196    return c->kind == InputSectionKind;
197  }
198
199  bool matchesFile(const InputFile *file) const;
200
201  // Input sections that matches at least one of SectionPatterns
202  // will be associated with this InputSectionDescription.
203  SmallVector<SectionPattern, 0> sectionPatterns;
204
205  // Includes InputSections and MergeInputSections. Used temporarily during
206  // assignment of input sections to output sections.
207  SmallVector<InputSectionBase *, 0> sectionBases;
208
209  // Used after the finalizeInputSections() pass. MergeInputSections have been
210  // merged into MergeSyntheticSections.
211  SmallVector<InputSection *, 0> sections;
212
213  // Temporary record of synthetic ThunkSection instances and the pass that
214  // they were created in. This is used to insert newly created ThunkSections
215  // into Sections at the end of a createThunks() pass.
216  SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
217
218  // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
219  uint64_t withFlags;
220  uint64_t withoutFlags;
221};
222
223// Represents BYTE(), SHORT(), LONG(), or QUAD().
224struct ByteCommand : SectionCommand {
225  ByteCommand(Expr e, unsigned size, std::string commandString)
226      : SectionCommand(ByteKind), commandString(commandString), expression(e),
227        size(size) {}
228
229  static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
230
231  // Keeps string representing the command. Used for -Map" is perhaps better.
232  std::string commandString;
233
234  Expr expression;
235
236  // This is just an offset of this assignment command in the output section.
237  unsigned offset;
238
239  // Size of this data command.
240  unsigned size;
241};
242
243struct InsertCommand {
244  SmallVector<StringRef, 0> names;
245  bool isAfter;
246  StringRef where;
247};
248
249struct PhdrsCommand {
250  StringRef name;
251  unsigned type = llvm::ELF::PT_NULL;
252  bool hasFilehdr = false;
253  bool hasPhdrs = false;
254  std::optional<unsigned> flags;
255  Expr lmaExpr = nullptr;
256};
257
258class LinkerScript final {
259  // Temporary state used in processSectionCommands() and assignAddresses()
260  // that must be reinitialized for each call to the above functions, and must
261  // not be used outside of the scope of a call to the above functions.
262  struct AddressState {
263    AddressState();
264    OutputSection *outSec = nullptr;
265    MemoryRegion *memRegion = nullptr;
266    MemoryRegion *lmaRegion = nullptr;
267    uint64_t lmaOffset = 0;
268    uint64_t tbssAddr = 0;
269  };
270
271  llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;
272
273  void addSymbol(SymbolAssignment *cmd);
274  void assignSymbol(SymbolAssignment *cmd, bool inSec);
275  void setDot(Expr e, const Twine &loc, bool inSec);
276  void expandOutputSection(uint64_t size);
277  void expandMemoryRegions(uint64_t size);
278
279  SmallVector<InputSectionBase *, 0>
280  computeInputSections(const InputSectionDescription *,
281                       ArrayRef<InputSectionBase *>);
282
283  SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
284
285  void discardSynthetic(OutputSection &);
286
287  SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);
288
289  std::pair<MemoryRegion *, MemoryRegion *>
290  findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
291
292  void assignOffsets(OutputSection *sec);
293
294  // This captures the local AddressState and makes it accessible
295  // deliberately. This is needed as there are some cases where we cannot just
296  // thread the current state through to a lambda function created by the
297  // script parser.
298  // This should remain a plain pointer as its lifetime is smaller than
299  // LinkerScript.
300  AddressState *state = nullptr;
301
302  OutputSection *aether;
303
304  uint64_t dot;
305
306public:
307  OutputDesc *createOutputSection(StringRef name, StringRef location);
308  OutputDesc *getOrCreateOutputSection(StringRef name);
309
310  bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
311  uint64_t getDot() { return dot; }
312  void discard(InputSectionBase &s);
313
314  ExprValue getSymbolValue(StringRef name, const Twine &loc);
315
316  void addOrphanSections();
317  void diagnoseOrphanHandling() const;
318  void adjustOutputSections();
319  void adjustSectionsAfterSorting();
320
321  SmallVector<PhdrEntry *, 0> createPhdrs();
322  bool needsInterpSection();
323
324  bool shouldKeep(InputSectionBase *s);
325  const Defined *assignAddresses();
326  void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
327  void processSectionCommands();
328  void processSymbolAssignments();
329  void declareSymbols();
330
331  bool isDiscarded(const OutputSection *sec) const;
332
333  // Used to handle INSERT AFTER statements.
334  void processInsertCommands();
335
336  // SECTIONS command list.
337  SmallVector<SectionCommand *, 0> sectionCommands;
338
339  // PHDRS command list.
340  SmallVector<PhdrsCommand, 0> phdrsCommands;
341
342  bool hasSectionsCommand = false;
343  bool errorOnMissingSection = false;
344
345  // List of section patterns specified with KEEP commands. They will
346  // be kept even if they are unused and --gc-sections is specified.
347  SmallVector<InputSectionDescription *, 0> keptSections;
348
349  // A map from memory region name to a memory region descriptor.
350  llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
351
352  // A list of symbols referenced by the script.
353  SmallVector<llvm::StringRef, 0> referencedSymbols;
354
355  // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
356  // to be reordered.
357  SmallVector<InsertCommand, 0> insertCommands;
358
359  // OutputSections specified by OVERWRITE_SECTIONS.
360  SmallVector<OutputDesc *, 0> overwriteSections;
361
362  // Sections that will be warned/errored by --orphan-handling.
363  SmallVector<const InputSectionBase *, 0> orphanSections;
364};
365
366LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;
367
368} // end namespace lld::elf
369
370#endif // LLD_ELF_LINKER_SCRIPT_H
371