LinkerScript.h revision 321369
1//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2//
3//                             The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLD_ELF_LINKER_SCRIPT_H
11#define LLD_ELF_LINKER_SCRIPT_H
12
13#include "Config.h"
14#include "Strings.h"
15#include "Writer.h"
16#include "lld/Core/LLVM.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/Support/MemoryBuffer.h"
22#include <cstddef>
23#include <cstdint>
24#include <functional>
25#include <memory>
26#include <vector>
27
28namespace lld {
29namespace elf {
30
31class DefinedCommon;
32class SymbolBody;
33class InputSectionBase;
34class InputSection;
35class OutputSection;
36class OutputSectionFactory;
37class InputSectionBase;
38class SectionBase;
39
40struct ExprValue {
41  SectionBase *Sec;
42  uint64_t Val;
43  bool ForceAbsolute;
44  uint64_t Alignment = 1;
45  std::string Loc;
46
47  ExprValue(SectionBase *Sec, bool ForceAbsolute, uint64_t Val,
48            const Twine &Loc)
49      : Sec(Sec), Val(Val), ForceAbsolute(ForceAbsolute), Loc(Loc.str()) {}
50  ExprValue(SectionBase *Sec, uint64_t Val, const Twine &Loc)
51      : ExprValue(Sec, false, Val, Loc) {}
52  ExprValue(uint64_t Val) : ExprValue(nullptr, Val, "") {}
53  bool isAbsolute() const { return ForceAbsolute || Sec == nullptr; }
54  uint64_t getValue() const;
55  uint64_t getSecAddr() const;
56};
57
58// This represents an expression in the linker script.
59// ScriptParser::readExpr reads an expression and returns an Expr.
60// Later, we evaluate the expression by calling the function.
61typedef std::function<ExprValue()> Expr;
62
63// This enum is used to implement linker script SECTIONS command.
64// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
65enum SectionsCommandKind {
66  AssignmentKind, // . = expr or <sym> = expr
67  OutputSectionKind,
68  InputSectionKind,
69  AssertKind,   // ASSERT(expr)
70  BytesDataKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
71};
72
73struct BaseCommand {
74  BaseCommand(int K) : Kind(K) {}
75  int Kind;
76};
77
78// This represents ". = <expr>" or "<symbol> = <expr>".
79struct SymbolAssignment : BaseCommand {
80  SymbolAssignment(StringRef Name, Expr E, std::string Loc)
81      : BaseCommand(AssignmentKind), Name(Name), Expression(E), Location(Loc) {}
82
83  static bool classof(const BaseCommand *C);
84
85  // The LHS of an expression. Name is either a symbol name or ".".
86  StringRef Name;
87  SymbolBody *Sym = nullptr;
88
89  // The RHS of an expression.
90  Expr Expression;
91
92  // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
93  bool Provide = false;
94  bool Hidden = false;
95
96  // Holds file name and line number for error reporting.
97  std::string Location;
98};
99
100// Linker scripts allow additional constraints to be put on ouput sections.
101// If an output section is marked as ONLY_IF_RO, the section is created
102// only if its input sections are read-only. Likewise, an output section
103// with ONLY_IF_RW is created if all input sections are RW.
104enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
105
106// This struct is used to represent the location and size of regions of
107// target memory. Instances of the struct are created by parsing the
108// MEMORY command.
109struct MemoryRegion {
110  std::string Name;
111  uint64_t Origin;
112  uint64_t Length;
113  uint32_t Flags;
114  uint32_t NegFlags;
115};
116
117struct OutputSectionCommand : BaseCommand {
118  OutputSectionCommand(StringRef Name)
119      : BaseCommand(OutputSectionKind), Name(Name) {}
120
121  static bool classof(const BaseCommand *C);
122
123  OutputSection *Sec = nullptr;
124  MemoryRegion *MemRegion = nullptr;
125  StringRef Name;
126  Expr AddrExpr;
127  Expr AlignExpr;
128  Expr LMAExpr;
129  Expr SubalignExpr;
130  std::vector<BaseCommand *> Commands;
131  std::vector<StringRef> Phdrs;
132  llvm::Optional<uint32_t> Filler;
133  ConstraintKind Constraint = ConstraintKind::NoConstraint;
134  std::string Location;
135  std::string MemoryRegionName;
136  bool Noload = false;
137
138  template <class ELFT> void finalize();
139  template <class ELFT> void writeTo(uint8_t *Buf);
140  template <class ELFT> void maybeCompress();
141  uint32_t getFiller();
142
143  void sort(std::function<int(InputSectionBase *S)> Order);
144  void sortInitFini();
145  void sortCtorsDtors();
146};
147
148// This struct represents one section match pattern in SECTIONS() command.
149// It can optionally have negative match pattern for EXCLUDED_FILE command.
150// Also it may be surrounded with SORT() command, so contains sorting rules.
151struct SectionPattern {
152  SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2)
153      : ExcludedFilePat(Pat1), SectionPat(Pat2) {}
154
155  StringMatcher ExcludedFilePat;
156  StringMatcher SectionPat;
157  SortSectionPolicy SortOuter;
158  SortSectionPolicy SortInner;
159};
160
161struct InputSectionDescription : BaseCommand {
162  InputSectionDescription(StringRef FilePattern)
163      : BaseCommand(InputSectionKind), FilePat(FilePattern) {}
164
165  static bool classof(const BaseCommand *C);
166
167  StringMatcher FilePat;
168
169  // Input sections that matches at least one of SectionPatterns
170  // will be associated with this InputSectionDescription.
171  std::vector<SectionPattern> SectionPatterns;
172
173  std::vector<InputSection *> Sections;
174};
175
176// Represents an ASSERT().
177struct AssertCommand : BaseCommand {
178  AssertCommand(Expr E) : BaseCommand(AssertKind), Expression(E) {}
179
180  static bool classof(const BaseCommand *C);
181
182  Expr Expression;
183};
184
185// Represents BYTE(), SHORT(), LONG(), or QUAD().
186struct BytesDataCommand : BaseCommand {
187  BytesDataCommand(Expr E, unsigned Size)
188      : BaseCommand(BytesDataKind), Expression(E), Size(Size) {}
189
190  static bool classof(const BaseCommand *C);
191
192  Expr Expression;
193  unsigned Offset;
194  unsigned Size;
195};
196
197struct PhdrsCommand {
198  StringRef Name;
199  unsigned Type;
200  bool HasFilehdr;
201  bool HasPhdrs;
202  unsigned Flags;
203  Expr LMAExpr;
204};
205
206// ScriptConfiguration holds linker script parse results.
207struct ScriptConfiguration {
208  // Used to assign addresses to sections.
209  std::vector<BaseCommand *> Commands;
210
211  // Used to assign sections to headers.
212  std::vector<PhdrsCommand> PhdrsCommands;
213
214  bool HasSections = false;
215
216  // List of section patterns specified with KEEP commands. They will
217  // be kept even if they are unused and --gc-sections is specified.
218  std::vector<InputSectionDescription *> KeptSections;
219
220  // A map from memory region name to a memory region descriptor.
221  llvm::DenseMap<llvm::StringRef, MemoryRegion> MemoryRegions;
222
223  // A list of symbols referenced by the script.
224  std::vector<llvm::StringRef> ReferencedSymbols;
225};
226
227class LinkerScript final {
228  // Temporary state used in processCommands() and assignAddresses()
229  // that must be reinitialized for each call to the above functions, and must
230  // not be used outside of the scope of a call to the above functions.
231  struct AddressState {
232    uint64_t ThreadBssOffset = 0;
233    OutputSection *OutSec = nullptr;
234    MemoryRegion *MemRegion = nullptr;
235    llvm::DenseMap<const MemoryRegion *, uint64_t> MemRegionOffset;
236    std::function<uint64_t()> LMAOffset;
237    AddressState(const ScriptConfiguration &Opt);
238  };
239  llvm::DenseMap<OutputSection *, OutputSectionCommand *> SecToCommand;
240  llvm::DenseMap<StringRef, OutputSectionCommand *> NameToOutputSectionCommand;
241
242  void assignSymbol(SymbolAssignment *Cmd, bool InSec);
243  void setDot(Expr E, const Twine &Loc, bool InSec);
244
245  std::vector<InputSection *>
246  computeInputSections(const InputSectionDescription *);
247
248  std::vector<InputSectionBase *>
249  createInputSectionList(OutputSectionCommand &Cmd);
250
251  std::vector<size_t> getPhdrIndices(OutputSectionCommand *Cmd);
252  size_t getPhdrIndex(const Twine &Loc, StringRef PhdrName);
253
254  MemoryRegion *findMemoryRegion(OutputSectionCommand *Cmd);
255
256  void switchTo(OutputSection *Sec);
257  uint64_t advance(uint64_t Size, unsigned Align);
258  void output(InputSection *Sec);
259  void process(BaseCommand &Base);
260
261  AddressState *CurAddressState = nullptr;
262  OutputSection *Aether;
263
264  uint64_t Dot;
265
266public:
267  bool ErrorOnMissingSection = false;
268  OutputSectionCommand *createOutputSectionCommand(StringRef Name,
269                                                   StringRef Location);
270  OutputSectionCommand *getOrCreateOutputSectionCommand(StringRef Name);
271
272  OutputSectionCommand *getCmd(OutputSection *Sec) const;
273  bool hasPhdrsCommands() { return !Opt.PhdrsCommands.empty(); }
274  uint64_t getDot() { return Dot; }
275  void discard(ArrayRef<InputSectionBase *> V);
276
277  ExprValue getSymbolValue(const Twine &Loc, StringRef S);
278  bool isDefined(StringRef S);
279
280  void fabricateDefaultCommands();
281  void addOrphanSections(OutputSectionFactory &Factory);
282  void removeEmptyCommands();
283  void adjustSectionsBeforeSorting();
284  void adjustSectionsAfterSorting();
285
286  std::vector<PhdrEntry> createPhdrs();
287  bool ignoreInterpSection();
288
289  bool shouldKeep(InputSectionBase *S);
290  void assignOffsets(OutputSectionCommand *Cmd);
291  void processNonSectionCommands();
292  void assignAddresses();
293  void allocateHeaders(std::vector<PhdrEntry> &Phdrs);
294  void addSymbol(SymbolAssignment *Cmd);
295  void processCommands(OutputSectionFactory &Factory);
296
297  // Parsed linker script configurations are set to this struct.
298  ScriptConfiguration Opt;
299};
300
301extern LinkerScript *Script;
302
303} // end namespace elf
304} // end namespace lld
305
306#endif // LLD_ELF_LINKER_SCRIPT_H
307