//===- SyntheticSections.cpp ---------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "SyntheticSections.h" #include "Config.h" #include "ExportTrie.h" #include "InputFiles.h" #include "MachOStructs.h" #include "MergedOutputSection.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/LEB128.h" using namespace llvm; using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; using namespace lld::macho; InStruct macho::in; std::vector macho::syntheticSections; SyntheticSection::SyntheticSection(const char *segname, const char *name) : OutputSection(SyntheticKind, name), segname(segname) { syntheticSections.push_back(this); } // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts // from the beginning of the file (i.e. the header). MachHeaderSection::MachHeaderSection() : SyntheticSection(segment_names::text, section_names::header) {} void MachHeaderSection::addLoadCommand(LoadCommand *lc) { loadCommands.push_back(lc); sizeOfCmds += lc->getSize(); } uint64_t MachHeaderSection::getSize() const { return sizeof(MachO::mach_header_64) + sizeOfCmds; } void MachHeaderSection::writeTo(uint8_t *buf) const { auto *hdr = reinterpret_cast(buf); hdr->magic = MachO::MH_MAGIC_64; hdr->cputype = MachO::CPU_TYPE_X86_64; hdr->cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL | MachO::CPU_SUBTYPE_LIB64; hdr->filetype = config->outputType; hdr->ncmds = loadCommands.size(); hdr->sizeofcmds = sizeOfCmds; hdr->flags = MachO::MH_NOUNDEFS | MachO::MH_DYLDLINK | MachO::MH_TWOLEVEL; if (config->outputType == MachO::MH_DYLIB && !config->hasReexports) hdr->flags |= MachO::MH_NO_REEXPORTED_DYLIBS; uint8_t *p = reinterpret_cast(hdr + 1); for (LoadCommand *lc : loadCommands) { lc->writeTo(p); p += lc->getSize(); } } PageZeroSection::PageZeroSection() : SyntheticSection(segment_names::pageZero, section_names::pageZero) {} GotSection::GotSection() : SyntheticSection(segment_names::dataConst, section_names::got) { align = 8; flags = MachO::S_NON_LAZY_SYMBOL_POINTERS; // TODO: section_64::reserved1 should be an index into the indirect symbol // table, which we do not currently emit } void GotSection::addEntry(Symbol &sym) { if (entries.insert(&sym)) { sym.gotIndex = entries.size() - 1; } } void GotSection::writeTo(uint8_t *buf) const { for (size_t i = 0, n = entries.size(); i < n; ++i) if (auto *defined = dyn_cast(entries[i])) write64le(&buf[i * WordSize], defined->getVA()); } BindingSection::BindingSection() : SyntheticSection(segment_names::linkEdit, section_names::binding) {} bool BindingSection::isNeeded() const { return bindings.size() != 0 || in.got->isNeeded(); } namespace { struct Binding { OutputSegment *segment = nullptr; uint64_t offset = 0; int64_t addend = 0; uint8_t ordinal = 0; }; } // namespace // Encode a sequence of opcodes that tell dyld to write the address of dysym + // addend at osec->addr + outSecOff. // // The bind opcode "interpreter" remembers the values of each binding field, so // we only need to encode the differences between bindings. Hence the use of // lastBinding. static void encodeBinding(const DylibSymbol &dysym, const OutputSection *osec, uint64_t outSecOff, int64_t addend, Binding &lastBinding, raw_svector_ostream &os) { using namespace llvm::MachO; OutputSegment *seg = osec->parent; uint64_t offset = osec->getSegmentOffset() + outSecOff; if (lastBinding.segment != seg) { os << static_cast(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | seg->index); encodeULEB128(offset, os); lastBinding.segment = seg; lastBinding.offset = offset; } else if (lastBinding.offset != offset) { assert(lastBinding.offset <= offset); os << static_cast(BIND_OPCODE_ADD_ADDR_ULEB); encodeULEB128(offset - lastBinding.offset, os); lastBinding.offset = offset; } if (lastBinding.ordinal != dysym.file->ordinal) { if (dysym.file->ordinal <= BIND_IMMEDIATE_MASK) { os << static_cast(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | dysym.file->ordinal); } else { error("TODO: Support larger dylib symbol ordinals"); return; } lastBinding.ordinal = dysym.file->ordinal; } if (lastBinding.addend != addend) { os << static_cast(BIND_OPCODE_SET_ADDEND_SLEB); encodeSLEB128(addend, os); lastBinding.addend = addend; } os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) << dysym.getName() << '\0' << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) << static_cast(BIND_OPCODE_DO_BIND); // DO_BIND causes dyld to both perform the binding and increment the offset lastBinding.offset += WordSize; } // Emit bind opcodes, which are a stream of byte-sized opcodes that dyld // interprets to update a record with the following fields: // * segment index (of the segment to write the symbol addresses to, typically // the __DATA_CONST segment which contains the GOT) // * offset within the segment, indicating the next location to write a binding // * symbol type // * symbol library ordinal (the index of its library's LC_LOAD_DYLIB command) // * symbol name // * addend // When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind // a symbol in the GOT, and increments the segment offset to point to the next // entry. It does *not* clear the record state after doing the bind, so // subsequent opcodes only need to encode the differences between bindings. void BindingSection::finalizeContents() { raw_svector_ostream os{contents}; Binding lastBinding; bool didEncode = false; size_t gotIdx = 0; for (const Symbol *sym : in.got->getEntries()) { if (const auto *dysym = dyn_cast(sym)) { didEncode = true; encodeBinding(*dysym, in.got, gotIdx * WordSize, 0, lastBinding, os); } ++gotIdx; } // Sorting the relocations by segment and address allows us to encode them // more compactly. llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) { OutputSegment *segA = a.isec->parent->parent; OutputSegment *segB = b.isec->parent->parent; if (segA != segB) return segA->fileOff < segB->fileOff; OutputSection *osecA = a.isec->parent; OutputSection *osecB = b.isec->parent; if (osecA != osecB) return osecA->addr < osecB->addr; if (a.isec != b.isec) return a.isec->outSecOff < b.isec->outSecOff; return a.offset < b.offset; }); for (const BindingEntry &b : bindings) { didEncode = true; encodeBinding(*b.dysym, b.isec->parent, b.isec->outSecOff + b.offset, b.addend, lastBinding, os); } if (didEncode) os << static_cast(MachO::BIND_OPCODE_DONE); } void BindingSection::writeTo(uint8_t *buf) const { memcpy(buf, contents.data(), contents.size()); } StubsSection::StubsSection() : SyntheticSection(segment_names::text, "__stubs") {} uint64_t StubsSection::getSize() const { return entries.size() * target->stubSize; } void StubsSection::writeTo(uint8_t *buf) const { size_t off = 0; for (const DylibSymbol *sym : in.stubs->getEntries()) { target->writeStub(buf + off, *sym); off += target->stubSize; } } void StubsSection::addEntry(DylibSymbol &sym) { if (entries.insert(&sym)) sym.stubsIndex = entries.size() - 1; } StubHelperSection::StubHelperSection() : SyntheticSection(segment_names::text, "__stub_helper") {} uint64_t StubHelperSection::getSize() const { return target->stubHelperHeaderSize + in.stubs->getEntries().size() * target->stubHelperEntrySize; } bool StubHelperSection::isNeeded() const { return !in.stubs->getEntries().empty(); } void StubHelperSection::writeTo(uint8_t *buf) const { target->writeStubHelperHeader(buf); size_t off = target->stubHelperHeaderSize; for (const DylibSymbol *sym : in.stubs->getEntries()) { target->writeStubHelperEntry(buf + off, *sym, addr + off); off += target->stubHelperEntrySize; } } void StubHelperSection::setup() { stubBinder = dyn_cast_or_null(symtab->find("dyld_stub_binder")); if (stubBinder == nullptr) { error("symbol dyld_stub_binder not found (normally in libSystem.dylib). " "Needed to perform lazy binding."); return; } in.got->addEntry(*stubBinder); inputSections.push_back(in.imageLoaderCache); symtab->addDefined("__dyld_private", in.imageLoaderCache, 0); } ImageLoaderCacheSection::ImageLoaderCacheSection() { segname = segment_names::data; name = "__data"; uint8_t *arr = bAlloc.Allocate(WordSize); memset(arr, 0, WordSize); data = {arr, WordSize}; } LazyPointerSection::LazyPointerSection() : SyntheticSection(segment_names::data, "__la_symbol_ptr") { align = 8; flags = MachO::S_LAZY_SYMBOL_POINTERS; } uint64_t LazyPointerSection::getSize() const { return in.stubs->getEntries().size() * WordSize; } bool LazyPointerSection::isNeeded() const { return !in.stubs->getEntries().empty(); } void LazyPointerSection::writeTo(uint8_t *buf) const { size_t off = 0; for (const DylibSymbol *sym : in.stubs->getEntries()) { uint64_t stubHelperOffset = target->stubHelperHeaderSize + sym->stubsIndex * target->stubHelperEntrySize; write64le(buf + off, in.stubHelper->addr + stubHelperOffset); off += WordSize; } } LazyBindingSection::LazyBindingSection() : SyntheticSection(segment_names::linkEdit, section_names::lazyBinding) {} bool LazyBindingSection::isNeeded() const { return in.stubs->isNeeded(); } void LazyBindingSection::finalizeContents() { // TODO: Just precompute output size here instead of writing to a temporary // buffer for (DylibSymbol *sym : in.stubs->getEntries()) sym->lazyBindOffset = encode(*sym); } void LazyBindingSection::writeTo(uint8_t *buf) const { memcpy(buf, contents.data(), contents.size()); } // Unlike the non-lazy binding section, the bind opcodes in this section aren't // interpreted all at once. Rather, dyld will start interpreting opcodes at a // given offset, typically only binding a single symbol before it finds a // BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case, // we cannot encode just the differences between symbols; we have to emit the // complete bind information for each symbol. uint32_t LazyBindingSection::encode(const DylibSymbol &sym) { uint32_t opstreamOffset = contents.size(); OutputSegment *dataSeg = in.lazyPointers->parent; os << static_cast(MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | dataSeg->index); uint64_t offset = in.lazyPointers->addr - dataSeg->firstSection()->addr + sym.stubsIndex * WordSize; encodeULEB128(offset, os); if (sym.file->ordinal <= MachO::BIND_IMMEDIATE_MASK) os << static_cast(MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | sym.file->ordinal); else fatal("TODO: Support larger dylib symbol ordinals"); os << static_cast(MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) << sym.getName() << '\0' << static_cast(MachO::BIND_OPCODE_DO_BIND) << static_cast(MachO::BIND_OPCODE_DONE); return opstreamOffset; } ExportSection::ExportSection() : SyntheticSection(segment_names::linkEdit, section_names::export_) {} void ExportSection::finalizeContents() { // TODO: We should check symbol visibility. for (const Symbol *sym : symtab->getSymbols()) if (auto *defined = dyn_cast(sym)) trieBuilder.addSymbol(*defined); size = trieBuilder.build(); } void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); } SymtabSection::SymtabSection(StringTableSection &stringTableSection) : SyntheticSection(segment_names::linkEdit, section_names::symbolTable), stringTableSection(stringTableSection) { // TODO: When we introduce the SyntheticSections superclass, we should make // all synthetic sections aligned to WordSize by default. align = WordSize; } uint64_t SymtabSection::getSize() const { return symbols.size() * sizeof(structs::nlist_64); } void SymtabSection::finalizeContents() { // TODO support other symbol types for (Symbol *sym : symtab->getSymbols()) if (isa(sym)) symbols.push_back({sym, stringTableSection.addString(sym->getName())}); } void SymtabSection::writeTo(uint8_t *buf) const { auto *nList = reinterpret_cast(buf); for (const SymtabEntry &entry : symbols) { nList->n_strx = entry.strx; // TODO support other symbol types // TODO populate n_desc if (auto *defined = dyn_cast(entry.sym)) { nList->n_type = MachO::N_EXT | MachO::N_SECT; nList->n_sect = defined->isec->parent->index; // For the N_SECT symbol type, n_value is the address of the symbol nList->n_value = defined->value + defined->isec->getVA(); } ++nList; } } StringTableSection::StringTableSection() : SyntheticSection(segment_names::linkEdit, section_names::stringTable) {} uint32_t StringTableSection::addString(StringRef str) { uint32_t strx = size; strings.push_back(str); size += str.size() + 1; // account for null terminator return strx; } void StringTableSection::writeTo(uint8_t *buf) const { uint32_t off = 0; for (StringRef str : strings) { memcpy(buf + off, str.data(), str.size()); off += str.size() + 1; // account for null terminator } }