1303239Sdim//===- LinkerScript.h -------------------------------------------*- C++ -*-===// 2303239Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6303239Sdim// 7303239Sdim//===----------------------------------------------------------------------===// 8303239Sdim 9303239Sdim#ifndef LLD_ELF_LINKER_SCRIPT_H 10303239Sdim#define LLD_ELF_LINKER_SCRIPT_H 11303239Sdim 12314564Sdim#include "Config.h" 13314564Sdim#include "Writer.h" 14327952Sdim#include "lld/Common/LLVM.h" 15341825Sdim#include "lld/Common/Strings.h" 16314564Sdim#include "llvm/ADT/ArrayRef.h" 17321369Sdim#include "llvm/ADT/DenseMap.h" 18314564Sdim#include "llvm/ADT/DenseSet.h" 19327952Sdim#include "llvm/ADT/MapVector.h" 20314564Sdim#include "llvm/ADT/StringRef.h" 21303239Sdim#include "llvm/Support/MemoryBuffer.h" 22314564Sdim#include <cstddef> 23314564Sdim#include <cstdint> 24314564Sdim#include <functional> 25314564Sdim#include <memory> 26314564Sdim#include <vector> 27303239Sdim 28303239Sdimnamespace lld { 29303239Sdimnamespace elf { 30303239Sdim 31327952Sdimclass Defined; 32344779Sdimclass InputSection; 33321369Sdimclass InputSectionBase; 34321369Sdimclass OutputSection; 35321369Sdimclass SectionBase; 36344779Sdimclass Symbol; 37344779Sdimclass ThunkSection; 38314564Sdim 39327952Sdim// This represents an r-value in the linker script. 40321369Sdimstruct ExprValue { 41353358Sdim ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, 42353358Sdim const Twine &loc) 43353358Sdim : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {} 44327952Sdim 45353358Sdim ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {} 46327952Sdim 47353358Sdim bool isAbsolute() const { return forceAbsolute || sec == nullptr; } 48321369Sdim uint64_t getValue() const; 49321369Sdim uint64_t getSecAddr() const; 50327952Sdim uint64_t getSectionOffset() const; 51327952Sdim 52327952Sdim // If a value is relative to a section, it has a non-null Sec. 53353358Sdim SectionBase *sec; 54327952Sdim 55327952Sdim // True if this expression is enclosed in ABSOLUTE(). 56327952Sdim // This flag affects the return value of getValue(). 57353358Sdim bool forceAbsolute; 58327952Sdim 59353358Sdim uint64_t val; 60353358Sdim uint64_t alignment = 1; 61327952Sdim 62327952Sdim // Original source location. Used for error messages. 63353358Sdim std::string loc; 64314564Sdim}; 65314564Sdim 66321369Sdim// This represents an expression in the linker script. 67321369Sdim// ScriptParser::readExpr reads an expression and returns an Expr. 68321369Sdim// Later, we evaluate the expression by calling the function. 69353358Sdimusing Expr = std::function<ExprValue()>; 70303239Sdim 71314564Sdim// This enum is used to implement linker script SECTIONS command. 72314564Sdim// https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS 73314564Sdimenum SectionsCommandKind { 74314564Sdim AssignmentKind, // . = expr or <sym> = expr 75314564Sdim OutputSectionKind, 76314564Sdim InputSectionKind, 77327952Sdim ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) 78314564Sdim}; 79303239Sdim 80314564Sdimstruct BaseCommand { 81353358Sdim BaseCommand(int k) : kind(k) {} 82353358Sdim int kind; 83303239Sdim}; 84303239Sdim 85314564Sdim// This represents ". = <expr>" or "<symbol> = <expr>". 86314564Sdimstruct SymbolAssignment : BaseCommand { 87353358Sdim SymbolAssignment(StringRef name, Expr e, std::string loc) 88353358Sdim : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {} 89303239Sdim 90353358Sdim static bool classof(const BaseCommand *c) { 91353358Sdim return c->kind == AssignmentKind; 92327952Sdim } 93314564Sdim 94314564Sdim // The LHS of an expression. Name is either a symbol name or ".". 95353358Sdim StringRef name; 96353358Sdim Defined *sym = nullptr; 97314564Sdim 98314564Sdim // The RHS of an expression. 99353358Sdim Expr expression; 100314564Sdim 101314564Sdim // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. 102353358Sdim bool provide = false; 103353358Sdim bool hidden = false; 104321369Sdim 105321369Sdim // Holds file name and line number for error reporting. 106353358Sdim std::string location; 107341825Sdim 108341825Sdim // A string representation of this command. We use this for -Map. 109353358Sdim std::string commandString; 110341825Sdim 111341825Sdim // Address of this assignment command. 112353358Sdim unsigned addr; 113341825Sdim 114341825Sdim // Size of this assignment command. This is usually 0, but if 115341825Sdim // you move '.' this may be greater than 0. 116353358Sdim unsigned size; 117303239Sdim}; 118303239Sdim 119360784Sdim// Linker scripts allow additional constraints to be put on output sections. 120314564Sdim// If an output section is marked as ONLY_IF_RO, the section is created 121314564Sdim// only if its input sections are read-only. Likewise, an output section 122314564Sdim// with ONLY_IF_RW is created if all input sections are RW. 123314564Sdimenum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; 124314564Sdim 125321369Sdim// This struct is used to represent the location and size of regions of 126321369Sdim// target memory. Instances of the struct are created by parsing the 127321369Sdim// MEMORY command. 128321369Sdimstruct MemoryRegion { 129353358Sdim MemoryRegion(StringRef name, uint64_t origin, uint64_t length, uint32_t flags, 130353358Sdim uint32_t negFlags) 131353358Sdim : name(name), origin(origin), length(length), flags(flags), 132353358Sdim negFlags(negFlags) {} 133328544Semaste 134353358Sdim std::string name; 135353358Sdim uint64_t origin; 136353358Sdim uint64_t length; 137353358Sdim uint32_t flags; 138353358Sdim uint32_t negFlags; 139353358Sdim uint64_t curPos = 0; 140321369Sdim}; 141321369Sdim 142314564Sdim// This struct represents one section match pattern in SECTIONS() command. 143314564Sdim// It can optionally have negative match pattern for EXCLUDED_FILE command. 144314564Sdim// Also it may be surrounded with SORT() command, so contains sorting rules. 145314564Sdimstruct SectionPattern { 146353358Sdim SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) 147353358Sdim : excludedFilePat(pat1), sectionPat(pat2), 148353358Sdim sortOuter(SortSectionPolicy::Default), 149353358Sdim sortInner(SortSectionPolicy::Default) {} 150314564Sdim 151353358Sdim StringMatcher excludedFilePat; 152353358Sdim StringMatcher sectionPat; 153353358Sdim SortSectionPolicy sortOuter; 154353358Sdim SortSectionPolicy sortInner; 155314564Sdim}; 156314564Sdim 157314564Sdimstruct InputSectionDescription : BaseCommand { 158353358Sdim InputSectionDescription(StringRef filePattern) 159353358Sdim : BaseCommand(InputSectionKind), filePat(filePattern) {} 160314564Sdim 161353358Sdim static bool classof(const BaseCommand *c) { 162353358Sdim return c->kind == InputSectionKind; 163327952Sdim } 164314564Sdim 165353358Sdim StringMatcher filePat; 166314564Sdim 167314564Sdim // Input sections that matches at least one of SectionPatterns 168314564Sdim // will be associated with this InputSectionDescription. 169353358Sdim std::vector<SectionPattern> sectionPatterns; 170314564Sdim 171360784Sdim // Includes InputSections and MergeInputSections. Used temporarily during 172360784Sdim // assignment of input sections to output sections. 173360784Sdim std::vector<InputSectionBase *> sectionBases; 174360784Sdim 175360784Sdim // Used after the finalizeInputSections() pass. MergeInputSections have been 176360784Sdim // merged into MergeSyntheticSections. 177353358Sdim std::vector<InputSection *> sections; 178327952Sdim 179327952Sdim // Temporary record of synthetic ThunkSection instances and the pass that 180327952Sdim // they were created in. This is used to insert newly created ThunkSections 181327952Sdim // into Sections at the end of a createThunks() pass. 182353358Sdim std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections; 183314564Sdim}; 184314564Sdim 185314564Sdim// Represents BYTE(), SHORT(), LONG(), or QUAD(). 186327952Sdimstruct ByteCommand : BaseCommand { 187353358Sdim ByteCommand(Expr e, unsigned size, std::string commandString) 188353358Sdim : BaseCommand(ByteKind), commandString(commandString), expression(e), 189353358Sdim size(size) {} 190314564Sdim 191353358Sdim static bool classof(const BaseCommand *c) { return c->kind == ByteKind; } 192314564Sdim 193341825Sdim // Keeps string representing the command. Used for -Map" is perhaps better. 194353358Sdim std::string commandString; 195341825Sdim 196353358Sdim Expr expression; 197341825Sdim 198341825Sdim // This is just an offset of this assignment command in the output section. 199353358Sdim unsigned offset; 200341825Sdim 201341825Sdim // Size of this data command. 202353358Sdim unsigned size; 203314564Sdim}; 204314564Sdim 205314564Sdimstruct PhdrsCommand { 206353358Sdim StringRef name; 207353358Sdim unsigned type = llvm::ELF::PT_NULL; 208353358Sdim bool hasFilehdr = false; 209353358Sdim bool hasPhdrs = false; 210353358Sdim llvm::Optional<unsigned> flags; 211353358Sdim Expr lmaExpr = nullptr; 212314564Sdim}; 213314564Sdim 214321369Sdimclass LinkerScript final { 215327952Sdim // Temporary state used in processSectionCommands() and assignAddresses() 216321369Sdim // that must be reinitialized for each call to the above functions, and must 217321369Sdim // not be used outside of the scope of a call to the above functions. 218321369Sdim struct AddressState { 219327952Sdim AddressState(); 220353358Sdim uint64_t threadBssOffset = 0; 221353358Sdim OutputSection *outSec = nullptr; 222353358Sdim MemoryRegion *memRegion = nullptr; 223353358Sdim MemoryRegion *lmaRegion = nullptr; 224353358Sdim uint64_t lmaOffset = 0; 225321369Sdim }; 226303239Sdim 227353358Sdim llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection; 228327952Sdim 229353358Sdim void addSymbol(SymbolAssignment *cmd); 230353358Sdim void assignSymbol(SymbolAssignment *cmd, bool inSec); 231353358Sdim void setDot(Expr e, const Twine &loc, bool inSec); 232353358Sdim void expandOutputSection(uint64_t size); 233353358Sdim void expandMemoryRegions(uint64_t size); 234303239Sdim 235360784Sdim std::vector<InputSectionBase *> 236341825Sdim computeInputSections(const InputSectionDescription *); 237321369Sdim 238360784Sdim std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd); 239321369Sdim 240353358Sdim std::vector<size_t> getPhdrIndices(OutputSection *sec); 241321369Sdim 242353358Sdim MemoryRegion *findMemoryRegion(OutputSection *sec); 243321369Sdim 244353358Sdim void switchTo(OutputSection *sec); 245353358Sdim uint64_t advance(uint64_t size, unsigned align); 246353358Sdim void output(InputSection *sec); 247321369Sdim 248353358Sdim void assignOffsets(OutputSection *sec); 249327952Sdim 250327952Sdim // Ctx captures the local AddressState and makes it accessible 251327952Sdim // deliberately. This is needed as there are some cases where we cannot just 252327952Sdim // thread the current state through to a lambda function created by the 253327952Sdim // script parser. 254327952Sdim // This should remain a plain pointer as its lifetime is smaller than 255327952Sdim // LinkerScript. 256353358Sdim AddressState *ctx = nullptr; 257327952Sdim 258353358Sdim OutputSection *aether; 259321369Sdim 260353358Sdim uint64_t dot; 261321369Sdim 262303239Sdimpublic: 263353358Sdim OutputSection *createOutputSection(StringRef name, StringRef location); 264353358Sdim OutputSection *getOrCreateOutputSection(StringRef name); 265314564Sdim 266353358Sdim bool hasPhdrsCommands() { return !phdrsCommands.empty(); } 267353358Sdim uint64_t getDot() { return dot; } 268360784Sdim void discard(InputSectionBase *s); 269321369Sdim 270353358Sdim ExprValue getSymbolValue(StringRef name, const Twine &loc); 271321369Sdim 272327952Sdim void addOrphanSections(); 273314564Sdim void adjustSectionsBeforeSorting(); 274314564Sdim void adjustSectionsAfterSorting(); 275314564Sdim 276327952Sdim std::vector<PhdrEntry *> createPhdrs(); 277327952Sdim bool needsInterpSection(); 278314564Sdim 279353358Sdim bool shouldKeep(InputSectionBase *s); 280360784Sdim const Defined *assignAddresses(); 281353358Sdim void allocateHeaders(std::vector<PhdrEntry *> &phdrs); 282327952Sdim void processSectionCommands(); 283360784Sdim void processSymbolAssignments(); 284341825Sdim void declareSymbols(); 285303239Sdim 286341825Sdim // Used to handle INSERT AFTER statements. 287341825Sdim void processInsertCommands(); 288341825Sdim 289327952Sdim // SECTIONS command list. 290353358Sdim std::vector<BaseCommand *> sectionCommands; 291327952Sdim 292327952Sdim // PHDRS command list. 293353358Sdim std::vector<PhdrsCommand> phdrsCommands; 294327952Sdim 295353358Sdim bool hasSectionsCommand = false; 296353358Sdim bool errorOnMissingSection = false; 297327952Sdim 298327952Sdim // List of section patterns specified with KEEP commands. They will 299327952Sdim // be kept even if they are unused and --gc-sections is specified. 300353358Sdim std::vector<InputSectionDescription *> keptSections; 301327952Sdim 302327952Sdim // A map from memory region name to a memory region descriptor. 303353358Sdim llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; 304327952Sdim 305327952Sdim // A list of symbols referenced by the script. 306353358Sdim std::vector<llvm::StringRef> referencedSymbols; 307341825Sdim 308341825Sdim // Used to implement INSERT [AFTER|BEFORE]. Contains commands that need 309341825Sdim // to be inserted into SECTIONS commands list. 310353358Sdim llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertAfterCommands; 311353358Sdim llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertBeforeCommands; 312303239Sdim}; 313303239Sdim 314353358Sdimextern LinkerScript *script; 315303239Sdim 316314564Sdim} // end namespace elf 317314564Sdim} // end namespace lld 318314564Sdim 319314564Sdim#endif // LLD_ELF_LINKER_SCRIPT_H 320