LinkerScript.h revision 341825
1//===- LinkerScript.h -------------------------------------------*- C++ -*-===// 2// 3// The LLVM Linker 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef LLD_ELF_LINKER_SCRIPT_H 11#define LLD_ELF_LINKER_SCRIPT_H 12 13#include "Config.h" 14#include "Writer.h" 15#include "lld/Common/LLVM.h" 16#include "lld/Common/Strings.h" 17#include "llvm/ADT/ArrayRef.h" 18#include "llvm/ADT/DenseMap.h" 19#include "llvm/ADT/DenseSet.h" 20#include "llvm/ADT/MapVector.h" 21#include "llvm/ADT/StringRef.h" 22#include "llvm/Support/MemoryBuffer.h" 23#include <cstddef> 24#include <cstdint> 25#include <functional> 26#include <memory> 27#include <vector> 28 29namespace lld { 30namespace elf { 31 32class Defined; 33class Symbol; 34class InputSectionBase; 35class InputSection; 36class OutputSection; 37class InputSectionBase; 38class SectionBase; 39 40// This represents an r-value in the linker script. 41struct ExprValue { 42 ExprValue(SectionBase *Sec, bool ForceAbsolute, uint64_t Val, 43 const Twine &Loc) 44 : Sec(Sec), ForceAbsolute(ForceAbsolute), Val(Val), Loc(Loc.str()) {} 45 46 ExprValue(uint64_t Val) : ExprValue(nullptr, false, Val, "") {} 47 48 bool isAbsolute() const { return ForceAbsolute || Sec == nullptr; } 49 uint64_t getValue() const; 50 uint64_t getSecAddr() const; 51 uint64_t getSectionOffset() const; 52 53 // If a value is relative to a section, it has a non-null Sec. 54 SectionBase *Sec; 55 56 // True if this expression is enclosed in ABSOLUTE(). 57 // This flag affects the return value of getValue(). 58 bool ForceAbsolute; 59 60 uint64_t Val; 61 uint64_t Alignment = 1; 62 63 // Original source location. Used for error messages. 64 std::string Loc; 65}; 66 67// This represents an expression in the linker script. 68// ScriptParser::readExpr reads an expression and returns an Expr. 69// Later, we evaluate the expression by calling the function. 70typedef std::function<ExprValue()> Expr; 71 72// This enum is used to implement linker script SECTIONS command. 73// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS 74enum SectionsCommandKind { 75 AssignmentKind, // . = expr or <sym> = expr 76 OutputSectionKind, 77 InputSectionKind, 78 ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) 79}; 80 81struct BaseCommand { 82 BaseCommand(int K) : Kind(K) {} 83 int Kind; 84}; 85 86// This represents ". = <expr>" or "<symbol> = <expr>". 87struct SymbolAssignment : BaseCommand { 88 SymbolAssignment(StringRef Name, Expr E, std::string Loc) 89 : BaseCommand(AssignmentKind), Name(Name), Expression(E), Location(Loc) {} 90 91 static bool classof(const BaseCommand *C) { 92 return C->Kind == AssignmentKind; 93 } 94 95 // The LHS of an expression. Name is either a symbol name or ".". 96 StringRef Name; 97 Defined *Sym = nullptr; 98 99 // The RHS of an expression. 100 Expr Expression; 101 102 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. 103 bool Provide = false; 104 bool Hidden = false; 105 106 // Holds file name and line number for error reporting. 107 std::string Location; 108 109 // A string representation of this command. We use this for -Map. 110 std::string CommandString; 111 112 // Address of this assignment command. 113 unsigned Addr; 114 115 // Size of this assignment command. This is usually 0, but if 116 // you move '.' this may be greater than 0. 117 unsigned Size; 118}; 119 120// Linker scripts allow additional constraints to be put on ouput sections. 121// If an output section is marked as ONLY_IF_RO, the section is created 122// only if its input sections are read-only. Likewise, an output section 123// with ONLY_IF_RW is created if all input sections are RW. 124enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; 125 126// This struct is used to represent the location and size of regions of 127// target memory. Instances of the struct are created by parsing the 128// MEMORY command. 129struct MemoryRegion { 130 MemoryRegion(StringRef Name, uint64_t Origin, uint64_t Length, uint32_t Flags, 131 uint32_t NegFlags) 132 : Name(Name), Origin(Origin), Length(Length), Flags(Flags), 133 NegFlags(NegFlags) {} 134 135 std::string Name; 136 uint64_t Origin; 137 uint64_t Length; 138 uint32_t Flags; 139 uint32_t NegFlags; 140 uint64_t CurPos = 0; 141}; 142 143// This struct represents one section match pattern in SECTIONS() command. 144// It can optionally have negative match pattern for EXCLUDED_FILE command. 145// Also it may be surrounded with SORT() command, so contains sorting rules. 146struct SectionPattern { 147 SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2) 148 : ExcludedFilePat(Pat1), SectionPat(Pat2) {} 149 150 StringMatcher ExcludedFilePat; 151 StringMatcher SectionPat; 152 SortSectionPolicy SortOuter; 153 SortSectionPolicy SortInner; 154}; 155 156class ThunkSection; 157struct InputSectionDescription : BaseCommand { 158 InputSectionDescription(StringRef FilePattern) 159 : BaseCommand(InputSectionKind), FilePat(FilePattern) {} 160 161 static bool classof(const BaseCommand *C) { 162 return C->Kind == InputSectionKind; 163 } 164 165 StringMatcher FilePat; 166 167 // Input sections that matches at least one of SectionPatterns 168 // will be associated with this InputSectionDescription. 169 std::vector<SectionPattern> SectionPatterns; 170 171 std::vector<InputSection *> Sections; 172 173 // Temporary record of synthetic ThunkSection instances and the pass that 174 // they were created in. This is used to insert newly created ThunkSections 175 // into Sections at the end of a createThunks() pass. 176 std::vector<std::pair<ThunkSection *, uint32_t>> ThunkSections; 177}; 178 179// Represents BYTE(), SHORT(), LONG(), or QUAD(). 180struct ByteCommand : BaseCommand { 181 ByteCommand(Expr E, unsigned Size, std::string CommandString) 182 : BaseCommand(ByteKind), CommandString(CommandString), Expression(E), 183 Size(Size) {} 184 185 static bool classof(const BaseCommand *C) { return C->Kind == ByteKind; } 186 187 // Keeps string representing the command. Used for -Map" is perhaps better. 188 std::string CommandString; 189 190 Expr Expression; 191 192 // This is just an offset of this assignment command in the output section. 193 unsigned Offset; 194 195 // Size of this data command. 196 unsigned Size; 197}; 198 199struct PhdrsCommand { 200 StringRef Name; 201 unsigned Type = llvm::ELF::PT_NULL; 202 bool HasFilehdr = false; 203 bool HasPhdrs = false; 204 llvm::Optional<unsigned> Flags; 205 Expr LMAExpr = nullptr; 206}; 207 208class LinkerScript final { 209 // Temporary state used in processSectionCommands() and assignAddresses() 210 // that must be reinitialized for each call to the above functions, and must 211 // not be used outside of the scope of a call to the above functions. 212 struct AddressState { 213 AddressState(); 214 uint64_t ThreadBssOffset = 0; 215 OutputSection *OutSec = nullptr; 216 MemoryRegion *MemRegion = nullptr; 217 MemoryRegion *LMARegion = nullptr; 218 uint64_t LMAOffset = 0; 219 }; 220 221 llvm::DenseMap<StringRef, OutputSection *> NameToOutputSection; 222 223 void addSymbol(SymbolAssignment *Cmd); 224 void assignSymbol(SymbolAssignment *Cmd, bool InSec); 225 void setDot(Expr E, const Twine &Loc, bool InSec); 226 void expandOutputSection(uint64_t Size); 227 void expandMemoryRegions(uint64_t Size); 228 229 std::vector<InputSection *> 230 computeInputSections(const InputSectionDescription *); 231 232 std::vector<InputSection *> createInputSectionList(OutputSection &Cmd); 233 234 std::vector<size_t> getPhdrIndices(OutputSection *Sec); 235 236 MemoryRegion *findMemoryRegion(OutputSection *Sec); 237 238 void switchTo(OutputSection *Sec); 239 uint64_t advance(uint64_t Size, unsigned Align); 240 void output(InputSection *Sec); 241 242 void assignOffsets(OutputSection *Sec); 243 244 // Ctx captures the local AddressState and makes it accessible 245 // deliberately. This is needed as there are some cases where we cannot just 246 // thread the current state through to a lambda function created by the 247 // script parser. 248 // This should remain a plain pointer as its lifetime is smaller than 249 // LinkerScript. 250 AddressState *Ctx = nullptr; 251 252 OutputSection *Aether; 253 254 uint64_t Dot; 255 256public: 257 OutputSection *createOutputSection(StringRef Name, StringRef Location); 258 OutputSection *getOrCreateOutputSection(StringRef Name); 259 260 bool hasPhdrsCommands() { return !PhdrsCommands.empty(); } 261 uint64_t getDot() { return Dot; } 262 void discard(ArrayRef<InputSection *> V); 263 264 ExprValue getSymbolValue(StringRef Name, const Twine &Loc); 265 266 void addOrphanSections(); 267 void adjustSectionsBeforeSorting(); 268 void adjustSectionsAfterSorting(); 269 270 std::vector<PhdrEntry *> createPhdrs(); 271 bool needsInterpSection(); 272 273 bool shouldKeep(InputSectionBase *S); 274 void assignAddresses(); 275 void allocateHeaders(std::vector<PhdrEntry *> &Phdrs); 276 void processSectionCommands(); 277 void declareSymbols(); 278 279 // Used to handle INSERT AFTER statements. 280 void processInsertCommands(); 281 282 // SECTIONS command list. 283 std::vector<BaseCommand *> SectionCommands; 284 285 // PHDRS command list. 286 std::vector<PhdrsCommand> PhdrsCommands; 287 288 bool HasSectionsCommand = false; 289 bool ErrorOnMissingSection = false; 290 291 // List of section patterns specified with KEEP commands. They will 292 // be kept even if they are unused and --gc-sections is specified. 293 std::vector<InputSectionDescription *> KeptSections; 294 295 // A map from memory region name to a memory region descriptor. 296 llvm::MapVector<llvm::StringRef, MemoryRegion *> MemoryRegions; 297 298 // A list of symbols referenced by the script. 299 std::vector<llvm::StringRef> ReferencedSymbols; 300 301 // Used to implement INSERT [AFTER|BEFORE]. Contains commands that need 302 // to be inserted into SECTIONS commands list. 303 llvm::DenseMap<StringRef, std::vector<BaseCommand *>> InsertAfterCommands; 304 llvm::DenseMap<StringRef, std::vector<BaseCommand *>> InsertBeforeCommands; 305}; 306 307extern LinkerScript *Script; 308 309} // end namespace elf 310} // end namespace lld 311 312#endif // LLD_ELF_LINKER_SCRIPT_H 313