1//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_ELF_LINKER_SCRIPT_H
10#define LLD_ELF_LINKER_SCRIPT_H
11
12#include "Config.h"
13#include "Writer.h"
14#include "lld/Common/LLVM.h"
15#include "lld/Common/Strings.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/Compiler.h"
21#include <cstddef>
22#include <cstdint>
23#include <functional>
24#include <memory>
25
26namespace lld::elf {
27
28class Defined;
29class InputFile;
30class InputSection;
31class InputSectionBase;
32class OutputSection;
33class SectionBase;
34class ThunkSection;
35struct OutputDesc;
36
37// This represents an r-value in the linker script.
38struct ExprValue {
39  ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
40            const Twine &loc)
41      : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
42
43  ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
44
45  bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
46  uint64_t getValue() const;
47  uint64_t getSecAddr() const;
48  uint64_t getSectionOffset() const;
49
50  // If a value is relative to a section, it has a non-null Sec.
51  SectionBase *sec;
52
53  uint64_t val;
54  uint64_t alignment = 1;
55
56  // The original st_type if the expression represents a symbol. Any operation
57  // resets type to STT_NOTYPE.
58  uint8_t type = llvm::ELF::STT_NOTYPE;
59
60  // True if this expression is enclosed in ABSOLUTE().
61  // This flag affects the return value of getValue().
62  bool forceAbsolute;
63
64  // Original source location. Used for error messages.
65  std::string loc;
66};
67
68// This represents an expression in the linker script.
69// ScriptParser::readExpr reads an expression and returns an Expr.
70// Later, we evaluate the expression by calling the function.
71using Expr = std::function<ExprValue()>;
72
73// This enum is used to implement linker script SECTIONS command.
74// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
75enum SectionsCommandKind {
76  AssignmentKind, // . = expr or <sym> = expr
77  OutputSectionKind,
78  InputSectionKind,
79  ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
80};
81
82struct SectionCommand {
83  SectionCommand(int k) : kind(k) {}
84  int kind;
85};
86
87// This represents ". = <expr>" or "<symbol> = <expr>".
88struct SymbolAssignment : SectionCommand {
89  SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc)
90      : SectionCommand(AssignmentKind), name(name), expression(e),
91        symOrder(symOrder), location(loc) {}
92
93  static bool classof(const SectionCommand *c) {
94    return c->kind == AssignmentKind;
95  }
96
97  // The LHS of an expression. Name is either a symbol name or ".".
98  StringRef name;
99  Defined *sym = nullptr;
100
101  // The RHS of an expression.
102  Expr expression;
103
104  // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
105  bool provide = false;
106  bool hidden = false;
107
108  // This assignment references DATA_SEGMENT_RELRO_END.
109  bool dataSegmentRelroEnd = false;
110
111  unsigned symOrder;
112
113  // Holds file name and line number for error reporting.
114  std::string location;
115
116  // A string representation of this command. We use this for -Map.
117  std::string commandString;
118
119  // Address of this assignment command.
120  uint64_t addr;
121
122  // Size of this assignment command. This is usually 0, but if
123  // you move '.' this may be greater than 0.
124  uint64_t size;
125};
126
127// Linker scripts allow additional constraints to be put on output sections.
128// If an output section is marked as ONLY_IF_RO, the section is created
129// only if its input sections are read-only. Likewise, an output section
130// with ONLY_IF_RW is created if all input sections are RW.
131enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
132
133// This struct is used to represent the location and size of regions of
134// target memory. Instances of the struct are created by parsing the
135// MEMORY command.
136struct MemoryRegion {
137  MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
138               uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
139      : name(std::string(name)), origin(origin), length(length), flags(flags),
140        invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
141
142  std::string name;
143  Expr origin;
144  Expr length;
145  // A section can be assigned to the region if any of these ELF section flags
146  // are set...
147  uint32_t flags;
148  // ... or any of these flags are not set.
149  // For example, the memory region attribute "r" maps to SHF_WRITE.
150  uint32_t invFlags;
151  // A section cannot be assigned to the region if any of these ELF section
152  // flags are set...
153  uint32_t negFlags;
154  // ... or any of these flags are not set.
155  // For example, the memory region attribute "!r" maps to SHF_WRITE.
156  uint32_t negInvFlags;
157  uint64_t curPos = 0;
158
159  uint64_t getOrigin() const { return origin().getValue(); }
160  uint64_t getLength() const { return length().getValue(); }
161
162  bool compatibleWith(uint32_t secFlags) const {
163    if ((secFlags & negFlags) || (~secFlags & negInvFlags))
164      return false;
165    return (secFlags & flags) || (~secFlags & invFlags);
166  }
167};
168
169// This struct represents one section match pattern in SECTIONS() command.
170// It can optionally have negative match pattern for EXCLUDED_FILE command.
171// Also it may be surrounded with SORT() command, so contains sorting rules.
172class SectionPattern {
173  StringMatcher excludedFilePat;
174
175  // Cache of the most recent input argument and result of excludesFile().
176  mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache;
177
178public:
179  SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
180      : excludedFilePat(pat1), sectionPat(pat2),
181        sortOuter(SortSectionPolicy::Default),
182        sortInner(SortSectionPolicy::Default) {}
183
184  bool excludesFile(const InputFile *file) const;
185
186  StringMatcher sectionPat;
187  SortSectionPolicy sortOuter;
188  SortSectionPolicy sortInner;
189};
190
191class InputSectionDescription : public SectionCommand {
192  SingleStringMatcher filePat;
193
194  // Cache of the most recent input argument and result of matchesFile().
195  mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache;
196
197public:
198  InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
199                          uint64_t withoutFlags = 0)
200      : SectionCommand(InputSectionKind), filePat(filePattern),
201        withFlags(withFlags), withoutFlags(withoutFlags) {}
202
203  static bool classof(const SectionCommand *c) {
204    return c->kind == InputSectionKind;
205  }
206
207  bool matchesFile(const InputFile *file) const;
208
209  // Input sections that matches at least one of SectionPatterns
210  // will be associated with this InputSectionDescription.
211  SmallVector<SectionPattern, 0> sectionPatterns;
212
213  // Includes InputSections and MergeInputSections. Used temporarily during
214  // assignment of input sections to output sections.
215  SmallVector<InputSectionBase *, 0> sectionBases;
216
217  // Used after the finalizeInputSections() pass. MergeInputSections have been
218  // merged into MergeSyntheticSections.
219  SmallVector<InputSection *, 0> sections;
220
221  // Temporary record of synthetic ThunkSection instances and the pass that
222  // they were created in. This is used to insert newly created ThunkSections
223  // into Sections at the end of a createThunks() pass.
224  SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
225
226  // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
227  uint64_t withFlags;
228  uint64_t withoutFlags;
229};
230
231// Represents BYTE(), SHORT(), LONG(), or QUAD().
232struct ByteCommand : SectionCommand {
233  ByteCommand(Expr e, unsigned size, std::string commandString)
234      : SectionCommand(ByteKind), commandString(commandString), expression(e),
235        size(size) {}
236
237  static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
238
239  // Keeps string representing the command. Used for -Map" is perhaps better.
240  std::string commandString;
241
242  Expr expression;
243
244  // This is just an offset of this assignment command in the output section.
245  unsigned offset;
246
247  // Size of this data command.
248  unsigned size;
249};
250
251struct InsertCommand {
252  SmallVector<StringRef, 0> names;
253  bool isAfter;
254  StringRef where;
255};
256
257struct PhdrsCommand {
258  StringRef name;
259  unsigned type = llvm::ELF::PT_NULL;
260  bool hasFilehdr = false;
261  bool hasPhdrs = false;
262  std::optional<unsigned> flags;
263  Expr lmaExpr = nullptr;
264};
265
266class LinkerScript final {
267  // Temporary state used in processSectionCommands() and assignAddresses()
268  // that must be reinitialized for each call to the above functions, and must
269  // not be used outside of the scope of a call to the above functions.
270  struct AddressState {
271    AddressState();
272    OutputSection *outSec = nullptr;
273    MemoryRegion *memRegion = nullptr;
274    MemoryRegion *lmaRegion = nullptr;
275    uint64_t lmaOffset = 0;
276    uint64_t tbssAddr = 0;
277  };
278
279  llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;
280
281  void addSymbol(SymbolAssignment *cmd);
282  void assignSymbol(SymbolAssignment *cmd, bool inSec);
283  void setDot(Expr e, const Twine &loc, bool inSec);
284  void expandOutputSection(uint64_t size);
285  void expandMemoryRegions(uint64_t size);
286
287  SmallVector<InputSectionBase *, 0>
288  computeInputSections(const InputSectionDescription *,
289                       ArrayRef<InputSectionBase *>);
290
291  SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
292
293  void discardSynthetic(OutputSection &);
294
295  SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);
296
297  std::pair<MemoryRegion *, MemoryRegion *>
298  findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
299
300  void assignOffsets(OutputSection *sec);
301
302  // This captures the local AddressState and makes it accessible
303  // deliberately. This is needed as there are some cases where we cannot just
304  // thread the current state through to a lambda function created by the
305  // script parser.
306  // This should remain a plain pointer as its lifetime is smaller than
307  // LinkerScript.
308  AddressState *state = nullptr;
309
310  OutputSection *aether;
311
312  uint64_t dot;
313
314public:
315  OutputDesc *createOutputSection(StringRef name, StringRef location);
316  OutputDesc *getOrCreateOutputSection(StringRef name);
317
318  bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
319  uint64_t getDot() { return dot; }
320  void discard(InputSectionBase &s);
321
322  ExprValue getSymbolValue(StringRef name, const Twine &loc);
323
324  void addOrphanSections();
325  void diagnoseOrphanHandling() const;
326  void diagnoseMissingSGSectionAddress() const;
327  void adjustOutputSections();
328  void adjustSectionsAfterSorting();
329
330  SmallVector<PhdrEntry *, 0> createPhdrs();
331  bool needsInterpSection();
332
333  bool shouldKeep(InputSectionBase *s);
334  const Defined *assignAddresses();
335  void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
336  void processSectionCommands();
337  void processSymbolAssignments();
338  void declareSymbols();
339
340  bool isDiscarded(const OutputSection *sec) const;
341
342  // Used to handle INSERT AFTER statements.
343  void processInsertCommands();
344
345  // Describe memory region usage.
346  void printMemoryUsage(raw_ostream &os);
347
348  // Check backward location counter assignment and memory region/LMA overflows.
349  void checkFinalScriptConditions() const;
350
351  // SECTIONS command list.
352  SmallVector<SectionCommand *, 0> sectionCommands;
353
354  // PHDRS command list.
355  SmallVector<PhdrsCommand, 0> phdrsCommands;
356
357  bool hasSectionsCommand = false;
358  bool seenDataAlign = false;
359  bool seenRelroEnd = false;
360  bool errorOnMissingSection = false;
361  std::string backwardDotErr;
362
363  // List of section patterns specified with KEEP commands. They will
364  // be kept even if they are unused and --gc-sections is specified.
365  SmallVector<InputSectionDescription *, 0> keptSections;
366
367  // A map from memory region name to a memory region descriptor.
368  llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
369
370  // A list of symbols referenced by the script.
371  SmallVector<llvm::StringRef, 0> referencedSymbols;
372
373  // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
374  // to be reordered.
375  SmallVector<InsertCommand, 0> insertCommands;
376
377  // OutputSections specified by OVERWRITE_SECTIONS.
378  SmallVector<OutputDesc *, 0> overwriteSections;
379
380  // Sections that will be warned/errored by --orphan-handling.
381  SmallVector<const InputSectionBase *, 0> orphanSections;
382};
383
384LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;
385
386} // end namespace lld::elf
387
388#endif // LLD_ELF_LINKER_SCRIPT_H
389