LinkerScript.h revision 328544
1303239Sdim//===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2303239Sdim//
3303239Sdim//                             The LLVM Linker
4303239Sdim//
5303239Sdim// This file is distributed under the University of Illinois Open Source
6303239Sdim// License. See LICENSE.TXT for details.
7303239Sdim//
8303239Sdim//===----------------------------------------------------------------------===//
9303239Sdim
10303239Sdim#ifndef LLD_ELF_LINKER_SCRIPT_H
11303239Sdim#define LLD_ELF_LINKER_SCRIPT_H
12303239Sdim
13314564Sdim#include "Config.h"
14314564Sdim#include "Strings.h"
15314564Sdim#include "Writer.h"
16327952Sdim#include "lld/Common/LLVM.h"
17314564Sdim#include "llvm/ADT/ArrayRef.h"
18321369Sdim#include "llvm/ADT/DenseMap.h"
19314564Sdim#include "llvm/ADT/DenseSet.h"
20327952Sdim#include "llvm/ADT/MapVector.h"
21314564Sdim#include "llvm/ADT/StringRef.h"
22303239Sdim#include "llvm/Support/MemoryBuffer.h"
23314564Sdim#include <cstddef>
24314564Sdim#include <cstdint>
25314564Sdim#include <functional>
26314564Sdim#include <memory>
27314564Sdim#include <vector>
28303239Sdim
29303239Sdimnamespace lld {
30303239Sdimnamespace elf {
31303239Sdim
32327952Sdimclass Defined;
33327952Sdimclass Symbol;
34321369Sdimclass InputSectionBase;
35321369Sdimclass InputSection;
36321369Sdimclass OutputSection;
37321369Sdimclass InputSectionBase;
38321369Sdimclass SectionBase;
39314564Sdim
40327952Sdim// This represents an r-value in the linker script.
41321369Sdimstruct ExprValue {
42321369Sdim  ExprValue(SectionBase *Sec, bool ForceAbsolute, uint64_t Val,
43321369Sdim            const Twine &Loc)
44327952Sdim      : Sec(Sec), ForceAbsolute(ForceAbsolute), Val(Val), Loc(Loc.str()) {}
45327952Sdim
46327952Sdim  ExprValue(uint64_t Val) : ExprValue(nullptr, false, Val, "") {}
47327952Sdim
48321369Sdim  bool isAbsolute() const { return ForceAbsolute || Sec == nullptr; }
49321369Sdim  uint64_t getValue() const;
50321369Sdim  uint64_t getSecAddr() const;
51327952Sdim  uint64_t getSectionOffset() const;
52327952Sdim
53327952Sdim  // If a value is relative to a section, it has a non-null Sec.
54327952Sdim  SectionBase *Sec;
55327952Sdim
56327952Sdim  // True if this expression is enclosed in ABSOLUTE().
57327952Sdim  // This flag affects the return value of getValue().
58327952Sdim  bool ForceAbsolute;
59327952Sdim
60327952Sdim  uint64_t Val;
61327952Sdim  uint64_t Alignment = 1;
62327952Sdim
63327952Sdim  // Original source location. Used for error messages.
64327952Sdim  std::string Loc;
65314564Sdim};
66314564Sdim
67321369Sdim// This represents an expression in the linker script.
68321369Sdim// ScriptParser::readExpr reads an expression and returns an Expr.
69321369Sdim// Later, we evaluate the expression by calling the function.
70321369Sdimtypedef std::function<ExprValue()> Expr;
71303239Sdim
72314564Sdim// This enum is used to implement linker script SECTIONS command.
73314564Sdim// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
74314564Sdimenum SectionsCommandKind {
75314564Sdim  AssignmentKind, // . = expr or <sym> = expr
76314564Sdim  OutputSectionKind,
77314564Sdim  InputSectionKind,
78327952Sdim  AssertKind, // ASSERT(expr)
79327952Sdim  ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
80314564Sdim};
81303239Sdim
82314564Sdimstruct BaseCommand {
83314564Sdim  BaseCommand(int K) : Kind(K) {}
84314564Sdim  int Kind;
85303239Sdim};
86303239Sdim
87314564Sdim// This represents ". = <expr>" or "<symbol> = <expr>".
88314564Sdimstruct SymbolAssignment : BaseCommand {
89321369Sdim  SymbolAssignment(StringRef Name, Expr E, std::string Loc)
90321369Sdim      : BaseCommand(AssignmentKind), Name(Name), Expression(E), Location(Loc) {}
91303239Sdim
92327952Sdim  static bool classof(const BaseCommand *C) {
93327952Sdim    return C->Kind == AssignmentKind;
94327952Sdim  }
95314564Sdim
96314564Sdim  // The LHS of an expression. Name is either a symbol name or ".".
97303239Sdim  StringRef Name;
98327952Sdim  Defined *Sym = nullptr;
99314564Sdim
100314564Sdim  // The RHS of an expression.
101314564Sdim  Expr Expression;
102314564Sdim
103314564Sdim  // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
104314564Sdim  bool Provide = false;
105314564Sdim  bool Hidden = false;
106321369Sdim
107321369Sdim  // Holds file name and line number for error reporting.
108321369Sdim  std::string Location;
109303239Sdim};
110303239Sdim
111314564Sdim// Linker scripts allow additional constraints to be put on ouput sections.
112314564Sdim// If an output section is marked as ONLY_IF_RO, the section is created
113314564Sdim// only if its input sections are read-only. Likewise, an output section
114314564Sdim// with ONLY_IF_RW is created if all input sections are RW.
115314564Sdimenum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
116314564Sdim
117321369Sdim// This struct is used to represent the location and size of regions of
118321369Sdim// target memory. Instances of the struct are created by parsing the
119321369Sdim// MEMORY command.
120321369Sdimstruct MemoryRegion {
121328544Semaste  MemoryRegion(StringRef Name, uint64_t Origin, uint64_t Length, uint32_t Flags,
122328544Semaste               uint32_t NegFlags)
123328544Semaste      : Name(Name), Origin(Origin), Length(Length), Flags(Flags),
124328544Semaste        NegFlags(NegFlags) {}
125328544Semaste
126321369Sdim  std::string Name;
127321369Sdim  uint64_t Origin;
128321369Sdim  uint64_t Length;
129321369Sdim  uint32_t Flags;
130321369Sdim  uint32_t NegFlags;
131328544Semaste  uint64_t CurPos = 0;
132321369Sdim};
133321369Sdim
134314564Sdim// This struct represents one section match pattern in SECTIONS() command.
135314564Sdim// It can optionally have negative match pattern for EXCLUDED_FILE command.
136314564Sdim// Also it may be surrounded with SORT() command, so contains sorting rules.
137314564Sdimstruct SectionPattern {
138314564Sdim  SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2)
139314564Sdim      : ExcludedFilePat(Pat1), SectionPat(Pat2) {}
140314564Sdim
141314564Sdim  StringMatcher ExcludedFilePat;
142314564Sdim  StringMatcher SectionPat;
143314564Sdim  SortSectionPolicy SortOuter;
144314564Sdim  SortSectionPolicy SortInner;
145314564Sdim};
146314564Sdim
147327952Sdimclass ThunkSection;
148314564Sdimstruct InputSectionDescription : BaseCommand {
149314564Sdim  InputSectionDescription(StringRef FilePattern)
150314564Sdim      : BaseCommand(InputSectionKind), FilePat(FilePattern) {}
151314564Sdim
152327952Sdim  static bool classof(const BaseCommand *C) {
153327952Sdim    return C->Kind == InputSectionKind;
154327952Sdim  }
155314564Sdim
156314564Sdim  StringMatcher FilePat;
157314564Sdim
158314564Sdim  // Input sections that matches at least one of SectionPatterns
159314564Sdim  // will be associated with this InputSectionDescription.
160314564Sdim  std::vector<SectionPattern> SectionPatterns;
161314564Sdim
162321369Sdim  std::vector<InputSection *> Sections;
163327952Sdim
164327952Sdim  // Temporary record of synthetic ThunkSection instances and the pass that
165327952Sdim  // they were created in. This is used to insert newly created ThunkSections
166327952Sdim  // into Sections at the end of a createThunks() pass.
167327952Sdim  std::vector<std::pair<ThunkSection *, uint32_t>> ThunkSections;
168314564Sdim};
169314564Sdim
170314564Sdim// Represents an ASSERT().
171314564Sdimstruct AssertCommand : BaseCommand {
172314564Sdim  AssertCommand(Expr E) : BaseCommand(AssertKind), Expression(E) {}
173314564Sdim
174327952Sdim  static bool classof(const BaseCommand *C) { return C->Kind == AssertKind; }
175314564Sdim
176314564Sdim  Expr Expression;
177314564Sdim};
178314564Sdim
179314564Sdim// Represents BYTE(), SHORT(), LONG(), or QUAD().
180327952Sdimstruct ByteCommand : BaseCommand {
181327952Sdim  ByteCommand(Expr E, unsigned Size)
182327952Sdim      : BaseCommand(ByteKind), Expression(E), Size(Size) {}
183314564Sdim
184327952Sdim  static bool classof(const BaseCommand *C) { return C->Kind == ByteKind; }
185314564Sdim
186314564Sdim  Expr Expression;
187314564Sdim  unsigned Offset;
188314564Sdim  unsigned Size;
189314564Sdim};
190314564Sdim
191314564Sdimstruct PhdrsCommand {
192314564Sdim  StringRef Name;
193327952Sdim  unsigned Type = llvm::ELF::PT_NULL;
194327952Sdim  bool HasFilehdr = false;
195327952Sdim  bool HasPhdrs = false;
196327952Sdim  llvm::Optional<unsigned> Flags;
197327952Sdim  Expr LMAExpr = nullptr;
198314564Sdim};
199314564Sdim
200321369Sdimclass LinkerScript final {
201327952Sdim  // Temporary state used in processSectionCommands() and assignAddresses()
202321369Sdim  // that must be reinitialized for each call to the above functions, and must
203321369Sdim  // not be used outside of the scope of a call to the above functions.
204321369Sdim  struct AddressState {
205327952Sdim    AddressState();
206321369Sdim    uint64_t ThreadBssOffset = 0;
207321369Sdim    OutputSection *OutSec = nullptr;
208321369Sdim    MemoryRegion *MemRegion = nullptr;
209321369Sdim    std::function<uint64_t()> LMAOffset;
210321369Sdim  };
211303239Sdim
212327952Sdim  llvm::DenseMap<StringRef, OutputSection *> NameToOutputSection;
213327952Sdim
214327952Sdim  void addSymbol(SymbolAssignment *Cmd);
215321369Sdim  void assignSymbol(SymbolAssignment *Cmd, bool InSec);
216321369Sdim  void setDot(Expr E, const Twine &Loc, bool InSec);
217303239Sdim
218321369Sdim  std::vector<InputSection *>
219327952Sdim  computeInputSections(const InputSectionDescription *,
220327952Sdim                       const llvm::DenseMap<SectionBase *, int> &Order);
221321369Sdim
222327952Sdim  std::vector<InputSection *>
223327952Sdim  createInputSectionList(OutputSection &Cmd,
224327952Sdim                         const llvm::DenseMap<SectionBase *, int> &Order);
225321369Sdim
226327952Sdim  std::vector<size_t> getPhdrIndices(OutputSection *Sec);
227321369Sdim
228327952Sdim  MemoryRegion *findMemoryRegion(OutputSection *Sec);
229321369Sdim
230321369Sdim  void switchTo(OutputSection *Sec);
231321369Sdim  uint64_t advance(uint64_t Size, unsigned Align);
232321369Sdim  void output(InputSection *Sec);
233321369Sdim
234327952Sdim  void assignOffsets(OutputSection *Sec);
235327952Sdim
236327952Sdim  // Ctx captures the local AddressState and makes it accessible
237327952Sdim  // deliberately. This is needed as there are some cases where we cannot just
238327952Sdim  // thread the current state through to a lambda function created by the
239327952Sdim  // script parser.
240327952Sdim  // This should remain a plain pointer as its lifetime is smaller than
241327952Sdim  // LinkerScript.
242327952Sdim  AddressState *Ctx = nullptr;
243327952Sdim
244321369Sdim  OutputSection *Aether;
245321369Sdim
246321369Sdim  uint64_t Dot;
247321369Sdim
248303239Sdimpublic:
249327952Sdim  OutputSection *createOutputSection(StringRef Name, StringRef Location);
250327952Sdim  OutputSection *getOrCreateOutputSection(StringRef Name);
251314564Sdim
252327952Sdim  bool hasPhdrsCommands() { return !PhdrsCommands.empty(); }
253321369Sdim  uint64_t getDot() { return Dot; }
254327952Sdim  void discard(ArrayRef<InputSection *> V);
255321369Sdim
256327952Sdim  ExprValue getSymbolValue(StringRef Name, const Twine &Loc);
257321369Sdim
258327952Sdim  void addOrphanSections();
259314564Sdim  void removeEmptyCommands();
260314564Sdim  void adjustSectionsBeforeSorting();
261314564Sdim  void adjustSectionsAfterSorting();
262314564Sdim
263327952Sdim  std::vector<PhdrEntry *> createPhdrs();
264327952Sdim  bool needsInterpSection();
265314564Sdim
266321369Sdim  bool shouldKeep(InputSectionBase *S);
267321369Sdim  void assignAddresses();
268327952Sdim  void allocateHeaders(std::vector<PhdrEntry *> &Phdrs);
269327952Sdim  void processSectionCommands();
270303239Sdim
271327952Sdim  // SECTIONS command list.
272327952Sdim  std::vector<BaseCommand *> SectionCommands;
273327952Sdim
274327952Sdim  // PHDRS command list.
275327952Sdim  std::vector<PhdrsCommand> PhdrsCommands;
276327952Sdim
277327952Sdim  bool HasSectionsCommand = false;
278327952Sdim  bool ErrorOnMissingSection = false;
279327952Sdim
280327952Sdim  // List of section patterns specified with KEEP commands. They will
281327952Sdim  // be kept even if they are unused and --gc-sections is specified.
282327952Sdim  std::vector<InputSectionDescription *> KeptSections;
283327952Sdim
284327952Sdim  // A map from memory region name to a memory region descriptor.
285327952Sdim  llvm::MapVector<llvm::StringRef, MemoryRegion *> MemoryRegions;
286327952Sdim
287327952Sdim  // A list of symbols referenced by the script.
288327952Sdim  std::vector<llvm::StringRef> ReferencedSymbols;
289303239Sdim};
290303239Sdim
291321369Sdimextern LinkerScript *Script;
292303239Sdim
293314564Sdim} // end namespace elf
294314564Sdim} // end namespace lld
295314564Sdim
296314564Sdim#endif // LLD_ELF_LINKER_SCRIPT_H
297