1303239Sdim//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2303239Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6303239Sdim//
7303239Sdim//===----------------------------------------------------------------------===//
8303239Sdim
9303239Sdim#ifndef LLD_ELF_LINKER_SCRIPT_H
10303239Sdim#define LLD_ELF_LINKER_SCRIPT_H
11303239Sdim
12314564Sdim#include "Config.h"
13314564Sdim#include "Writer.h"
14327952Sdim#include "lld/Common/LLVM.h"
15341825Sdim#include "lld/Common/Strings.h"
16314564Sdim#include "llvm/ADT/ArrayRef.h"
17321369Sdim#include "llvm/ADT/DenseMap.h"
18314564Sdim#include "llvm/ADT/DenseSet.h"
19327952Sdim#include "llvm/ADT/MapVector.h"
20314564Sdim#include "llvm/ADT/StringRef.h"
21303239Sdim#include "llvm/Support/MemoryBuffer.h"
22314564Sdim#include <cstddef>
23314564Sdim#include <cstdint>
24314564Sdim#include <functional>
25314564Sdim#include <memory>
26314564Sdim#include <vector>
27303239Sdim
28303239Sdimnamespace lld {
29303239Sdimnamespace elf {
30303239Sdim
31327952Sdimclass Defined;
32344779Sdimclass InputSection;
33321369Sdimclass InputSectionBase;
34321369Sdimclass OutputSection;
35321369Sdimclass SectionBase;
36344779Sdimclass Symbol;
37344779Sdimclass ThunkSection;
38314564Sdim
39327952Sdim// This represents an r-value in the linker script.
40321369Sdimstruct ExprValue {
41353358Sdim  ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
42353358Sdim            const Twine &loc)
43353358Sdim      : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {}
44327952Sdim
45353358Sdim  ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
46327952Sdim
47353358Sdim  bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
48321369Sdim  uint64_t getValue() const;
49321369Sdim  uint64_t getSecAddr() const;
50327952Sdim  uint64_t getSectionOffset() const;
51327952Sdim
52327952Sdim  // If a value is relative to a section, it has a non-null Sec.
53353358Sdim  SectionBase *sec;
54327952Sdim
55327952Sdim  // True if this expression is enclosed in ABSOLUTE().
56327952Sdim  // This flag affects the return value of getValue().
57353358Sdim  bool forceAbsolute;
58327952Sdim
59353358Sdim  uint64_t val;
60353358Sdim  uint64_t alignment = 1;
61327952Sdim
62327952Sdim  // Original source location. Used for error messages.
63353358Sdim  std::string loc;
64314564Sdim};
65314564Sdim
66321369Sdim// This represents an expression in the linker script.
67321369Sdim// ScriptParser::readExpr reads an expression and returns an Expr.
68321369Sdim// Later, we evaluate the expression by calling the function.
69353358Sdimusing Expr = std::function<ExprValue()>;
70303239Sdim
71314564Sdim// This enum is used to implement linker script SECTIONS command.
72314564Sdim// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
73314564Sdimenum SectionsCommandKind {
74314564Sdim  AssignmentKind, // . = expr or <sym> = expr
75314564Sdim  OutputSectionKind,
76314564Sdim  InputSectionKind,
77327952Sdim  ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
78314564Sdim};
79303239Sdim
80314564Sdimstruct BaseCommand {
81353358Sdim  BaseCommand(int k) : kind(k) {}
82353358Sdim  int kind;
83303239Sdim};
84303239Sdim
85314564Sdim// This represents ". = <expr>" or "<symbol> = <expr>".
86314564Sdimstruct SymbolAssignment : BaseCommand {
87353358Sdim  SymbolAssignment(StringRef name, Expr e, std::string loc)
88353358Sdim      : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {}
89303239Sdim
90353358Sdim  static bool classof(const BaseCommand *c) {
91353358Sdim    return c->kind == AssignmentKind;
92327952Sdim  }
93314564Sdim
94314564Sdim  // The LHS of an expression. Name is either a symbol name or ".".
95353358Sdim  StringRef name;
96353358Sdim  Defined *sym = nullptr;
97314564Sdim
98314564Sdim  // The RHS of an expression.
99353358Sdim  Expr expression;
100314564Sdim
101314564Sdim  // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
102353358Sdim  bool provide = false;
103353358Sdim  bool hidden = false;
104321369Sdim
105321369Sdim  // Holds file name and line number for error reporting.
106353358Sdim  std::string location;
107341825Sdim
108341825Sdim  // A string representation of this command. We use this for -Map.
109353358Sdim  std::string commandString;
110341825Sdim
111341825Sdim  // Address of this assignment command.
112353358Sdim  unsigned addr;
113341825Sdim
114341825Sdim  // Size of this assignment command. This is usually 0, but if
115341825Sdim  // you move '.' this may be greater than 0.
116353358Sdim  unsigned size;
117303239Sdim};
118303239Sdim
119360784Sdim// Linker scripts allow additional constraints to be put on output sections.
120314564Sdim// If an output section is marked as ONLY_IF_RO, the section is created
121314564Sdim// only if its input sections are read-only. Likewise, an output section
122314564Sdim// with ONLY_IF_RW is created if all input sections are RW.
123314564Sdimenum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
124314564Sdim
125321369Sdim// This struct is used to represent the location and size of regions of
126321369Sdim// target memory. Instances of the struct are created by parsing the
127321369Sdim// MEMORY command.
128321369Sdimstruct MemoryRegion {
129353358Sdim  MemoryRegion(StringRef name, uint64_t origin, uint64_t length, uint32_t flags,
130353358Sdim               uint32_t negFlags)
131353358Sdim      : name(name), origin(origin), length(length), flags(flags),
132353358Sdim        negFlags(negFlags) {}
133328544Semaste
134353358Sdim  std::string name;
135353358Sdim  uint64_t origin;
136353358Sdim  uint64_t length;
137353358Sdim  uint32_t flags;
138353358Sdim  uint32_t negFlags;
139353358Sdim  uint64_t curPos = 0;
140321369Sdim};
141321369Sdim
142314564Sdim// This struct represents one section match pattern in SECTIONS() command.
143314564Sdim// It can optionally have negative match pattern for EXCLUDED_FILE command.
144314564Sdim// Also it may be surrounded with SORT() command, so contains sorting rules.
145314564Sdimstruct SectionPattern {
146353358Sdim  SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
147353358Sdim      : excludedFilePat(pat1), sectionPat(pat2),
148353358Sdim        sortOuter(SortSectionPolicy::Default),
149353358Sdim        sortInner(SortSectionPolicy::Default) {}
150314564Sdim
151353358Sdim  StringMatcher excludedFilePat;
152353358Sdim  StringMatcher sectionPat;
153353358Sdim  SortSectionPolicy sortOuter;
154353358Sdim  SortSectionPolicy sortInner;
155314564Sdim};
156314564Sdim
157314564Sdimstruct InputSectionDescription : BaseCommand {
158353358Sdim  InputSectionDescription(StringRef filePattern)
159353358Sdim      : BaseCommand(InputSectionKind), filePat(filePattern) {}
160314564Sdim
161353358Sdim  static bool classof(const BaseCommand *c) {
162353358Sdim    return c->kind == InputSectionKind;
163327952Sdim  }
164314564Sdim
165353358Sdim  StringMatcher filePat;
166314564Sdim
167314564Sdim  // Input sections that matches at least one of SectionPatterns
168314564Sdim  // will be associated with this InputSectionDescription.
169353358Sdim  std::vector<SectionPattern> sectionPatterns;
170314564Sdim
171360784Sdim  // Includes InputSections and MergeInputSections. Used temporarily during
172360784Sdim  // assignment of input sections to output sections.
173360784Sdim  std::vector<InputSectionBase *> sectionBases;
174360784Sdim
175360784Sdim  // Used after the finalizeInputSections() pass. MergeInputSections have been
176360784Sdim  // merged into MergeSyntheticSections.
177353358Sdim  std::vector<InputSection *> sections;
178327952Sdim
179327952Sdim  // Temporary record of synthetic ThunkSection instances and the pass that
180327952Sdim  // they were created in. This is used to insert newly created ThunkSections
181327952Sdim  // into Sections at the end of a createThunks() pass.
182353358Sdim  std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections;
183314564Sdim};
184314564Sdim
185314564Sdim// Represents BYTE(), SHORT(), LONG(), or QUAD().
186327952Sdimstruct ByteCommand : BaseCommand {
187353358Sdim  ByteCommand(Expr e, unsigned size, std::string commandString)
188353358Sdim      : BaseCommand(ByteKind), commandString(commandString), expression(e),
189353358Sdim        size(size) {}
190314564Sdim
191353358Sdim  static bool classof(const BaseCommand *c) { return c->kind == ByteKind; }
192314564Sdim
193341825Sdim  // Keeps string representing the command. Used for -Map" is perhaps better.
194353358Sdim  std::string commandString;
195341825Sdim
196353358Sdim  Expr expression;
197341825Sdim
198341825Sdim  // This is just an offset of this assignment command in the output section.
199353358Sdim  unsigned offset;
200341825Sdim
201341825Sdim  // Size of this data command.
202353358Sdim  unsigned size;
203314564Sdim};
204314564Sdim
205314564Sdimstruct PhdrsCommand {
206353358Sdim  StringRef name;
207353358Sdim  unsigned type = llvm::ELF::PT_NULL;
208353358Sdim  bool hasFilehdr = false;
209353358Sdim  bool hasPhdrs = false;
210353358Sdim  llvm::Optional<unsigned> flags;
211353358Sdim  Expr lmaExpr = nullptr;
212314564Sdim};
213314564Sdim
214321369Sdimclass LinkerScript final {
215327952Sdim  // Temporary state used in processSectionCommands() and assignAddresses()
216321369Sdim  // that must be reinitialized for each call to the above functions, and must
217321369Sdim  // not be used outside of the scope of a call to the above functions.
218321369Sdim  struct AddressState {
219327952Sdim    AddressState();
220353358Sdim    uint64_t threadBssOffset = 0;
221353358Sdim    OutputSection *outSec = nullptr;
222353358Sdim    MemoryRegion *memRegion = nullptr;
223353358Sdim    MemoryRegion *lmaRegion = nullptr;
224353358Sdim    uint64_t lmaOffset = 0;
225321369Sdim  };
226303239Sdim
227353358Sdim  llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection;
228327952Sdim
229353358Sdim  void addSymbol(SymbolAssignment *cmd);
230353358Sdim  void assignSymbol(SymbolAssignment *cmd, bool inSec);
231353358Sdim  void setDot(Expr e, const Twine &loc, bool inSec);
232353358Sdim  void expandOutputSection(uint64_t size);
233353358Sdim  void expandMemoryRegions(uint64_t size);
234303239Sdim
235360784Sdim  std::vector<InputSectionBase *>
236341825Sdim  computeInputSections(const InputSectionDescription *);
237321369Sdim
238360784Sdim  std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd);
239321369Sdim
240353358Sdim  std::vector<size_t> getPhdrIndices(OutputSection *sec);
241321369Sdim
242353358Sdim  MemoryRegion *findMemoryRegion(OutputSection *sec);
243321369Sdim
244353358Sdim  void switchTo(OutputSection *sec);
245353358Sdim  uint64_t advance(uint64_t size, unsigned align);
246353358Sdim  void output(InputSection *sec);
247321369Sdim
248353358Sdim  void assignOffsets(OutputSection *sec);
249327952Sdim
250327952Sdim  // Ctx captures the local AddressState and makes it accessible
251327952Sdim  // deliberately. This is needed as there are some cases where we cannot just
252327952Sdim  // thread the current state through to a lambda function created by the
253327952Sdim  // script parser.
254327952Sdim  // This should remain a plain pointer as its lifetime is smaller than
255327952Sdim  // LinkerScript.
256353358Sdim  AddressState *ctx = nullptr;
257327952Sdim
258353358Sdim  OutputSection *aether;
259321369Sdim
260353358Sdim  uint64_t dot;
261321369Sdim
262303239Sdimpublic:
263353358Sdim  OutputSection *createOutputSection(StringRef name, StringRef location);
264353358Sdim  OutputSection *getOrCreateOutputSection(StringRef name);
265314564Sdim
266353358Sdim  bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
267353358Sdim  uint64_t getDot() { return dot; }
268360784Sdim  void discard(InputSectionBase *s);
269321369Sdim
270353358Sdim  ExprValue getSymbolValue(StringRef name, const Twine &loc);
271321369Sdim
272327952Sdim  void addOrphanSections();
273314564Sdim  void adjustSectionsBeforeSorting();
274314564Sdim  void adjustSectionsAfterSorting();
275314564Sdim
276327952Sdim  std::vector<PhdrEntry *> createPhdrs();
277327952Sdim  bool needsInterpSection();
278314564Sdim
279353358Sdim  bool shouldKeep(InputSectionBase *s);
280360784Sdim  const Defined *assignAddresses();
281353358Sdim  void allocateHeaders(std::vector<PhdrEntry *> &phdrs);
282327952Sdim  void processSectionCommands();
283360784Sdim  void processSymbolAssignments();
284341825Sdim  void declareSymbols();
285303239Sdim
286341825Sdim  // Used to handle INSERT AFTER statements.
287341825Sdim  void processInsertCommands();
288341825Sdim
289327952Sdim  // SECTIONS command list.
290353358Sdim  std::vector<BaseCommand *> sectionCommands;
291327952Sdim
292327952Sdim  // PHDRS command list.
293353358Sdim  std::vector<PhdrsCommand> phdrsCommands;
294327952Sdim
295353358Sdim  bool hasSectionsCommand = false;
296353358Sdim  bool errorOnMissingSection = false;
297327952Sdim
298327952Sdim  // List of section patterns specified with KEEP commands. They will
299327952Sdim  // be kept even if they are unused and --gc-sections is specified.
300353358Sdim  std::vector<InputSectionDescription *> keptSections;
301327952Sdim
302327952Sdim  // A map from memory region name to a memory region descriptor.
303353358Sdim  llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
304327952Sdim
305327952Sdim  // A list of symbols referenced by the script.
306353358Sdim  std::vector<llvm::StringRef> referencedSymbols;
307341825Sdim
308341825Sdim  // Used to implement INSERT [AFTER|BEFORE]. Contains commands that need
309341825Sdim  // to be inserted into SECTIONS commands list.
310353358Sdim  llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertAfterCommands;
311353358Sdim  llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertBeforeCommands;
312303239Sdim};
313303239Sdim
314353358Sdimextern LinkerScript *script;
315303239Sdim
316314564Sdim} // end namespace elf
317314564Sdim} // end namespace lld
318314564Sdim
319314564Sdim#endif // LLD_ELF_LINKER_SCRIPT_H
320