//===- lib/FileFormat/MachO/ArchHandler_x86.cpp ---------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "ArchHandler.h" #include "Atoms.h" #include "MachONormalizedFileBinaryUtils.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm::MachO; using namespace lld::mach_o::normalized; namespace lld { namespace mach_o { using llvm::support::ulittle16_t; using llvm::support::ulittle32_t; using llvm::support::little16_t; using llvm::support::little32_t; class ArchHandler_x86 : public ArchHandler { public: ArchHandler_x86() = default; ~ArchHandler_x86() override = default; const Registry::KindStrings *kindStrings() override { return _sKindStrings; } Reference::KindArch kindArch() override { return Reference::KindArch::x86; } const StubInfo &stubInfo() override { return _sStubInfo; } bool isCallSite(const Reference &) override; bool isNonCallBranch(const Reference &) override { return false; } bool isPointer(const Reference &) override; bool isPairedReloc(const normalized::Relocation &) override; bool needsCompactUnwind() override { return false; } Reference::KindValue imageOffsetKind() override { return invalid; } Reference::KindValue imageOffsetKindIndirect() override { return invalid; } Reference::KindValue unwindRefToPersonalityFunctionKind() override { return invalid; } Reference::KindValue unwindRefToCIEKind() override { return negDelta32; } Reference::KindValue unwindRefToFunctionKind() override{ return delta32; } Reference::KindValue lazyImmediateLocationKind() override { return lazyImmediateLocation; } Reference::KindValue unwindRefToEhFrameKind() override { return invalid; } Reference::KindValue pointerKind() override { return invalid; } uint32_t dwarfCompactUnwindType() override { return 0x04000000U; } llvm::Error getReferenceInfo(const normalized::Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, uint64_t fixupAddress, bool swap, FindAtomBySectionAndAddress atomFromAddress, FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, const lld::Atom **target, Reference::Addend *addend) override; llvm::Error getPairReferenceInfo(const normalized::Relocation &reloc1, const normalized::Relocation &reloc2, const DefinedAtom *inAtom, uint32_t offsetInAtom, uint64_t fixupAddress, bool swap, bool scatterable, FindAtomBySectionAndAddress atomFromAddress, FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, const lld::Atom **target, Reference::Addend *addend) override; void generateAtomContent(const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, llvm::MutableArrayRef atomContentBuffer) override; void appendSectionRelocations(const DefinedAtom &atom, uint64_t atomSectionOffset, const Reference &ref, FindSymbolIndexForAtom symbolIndexForAtom, FindSectionIndexForAtom sectionIndexForAtom, FindAddressForAtom addressForAtom, normalized::Relocations &relocs) override; bool isDataInCodeTransition(Reference::KindValue refKind) override { return refKind == modeCode || refKind == modeData; } Reference::KindValue dataInCodeTransitionStart( const MachODefinedAtom &atom) override { return modeData; } Reference::KindValue dataInCodeTransitionEnd( const MachODefinedAtom &atom) override { return modeCode; } private: static const Registry::KindStrings _sKindStrings[]; static const StubInfo _sStubInfo; enum X86Kind : Reference::KindValue { invalid, /// for error condition modeCode, /// Content starting at this offset is code. modeData, /// Content starting at this offset is data. // Kinds found in mach-o .o files: branch32, /// ex: call _foo branch16, /// ex: callw _foo abs32, /// ex: movl _foo, %eax funcRel32, /// ex: movl _foo-L1(%eax), %eax pointer32, /// ex: .long _foo delta32, /// ex: .long _foo - . negDelta32, /// ex: .long . - _foo // Kinds introduced by Passes: lazyPointer, /// Location contains a lazy pointer. lazyImmediateLocation, /// Location contains immediate value used in stub. }; static bool useExternalRelocationTo(const Atom &target); void applyFixupFinal(const Reference &ref, uint8_t *location, uint64_t fixupAddress, uint64_t targetAddress, uint64_t inAtomAddress); void applyFixupRelocatable(const Reference &ref, uint8_t *location, uint64_t fixupAddress, uint64_t targetAddress, uint64_t inAtomAddress); }; //===----------------------------------------------------------------------===// // ArchHandler_x86 //===----------------------------------------------------------------------===// const Registry::KindStrings ArchHandler_x86::_sKindStrings[] = { LLD_KIND_STRING_ENTRY(invalid), LLD_KIND_STRING_ENTRY(modeCode), LLD_KIND_STRING_ENTRY(modeData), LLD_KIND_STRING_ENTRY(branch32), LLD_KIND_STRING_ENTRY(branch16), LLD_KIND_STRING_ENTRY(abs32), LLD_KIND_STRING_ENTRY(funcRel32), LLD_KIND_STRING_ENTRY(pointer32), LLD_KIND_STRING_ENTRY(delta32), LLD_KIND_STRING_ENTRY(negDelta32), LLD_KIND_STRING_ENTRY(lazyPointer), LLD_KIND_STRING_ENTRY(lazyImmediateLocation), LLD_KIND_STRING_END }; const ArchHandler::StubInfo ArchHandler_x86::_sStubInfo = { "dyld_stub_binder", // Lazy pointer references { Reference::KindArch::x86, pointer32, 0, 0 }, { Reference::KindArch::x86, lazyPointer, 0, 0 }, // GOT pointer to dyld_stub_binder { Reference::KindArch::x86, pointer32, 0, 0 }, // x86 code alignment 1, // Stub size and code 6, { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer { Reference::KindArch::x86, abs32, 2, 0 }, { false, 0, 0, 0 }, // Stub Helper size and code 10, { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $lazy-info-offset 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper { Reference::KindArch::x86, lazyImmediateLocation, 1, 0 }, { Reference::KindArch::x86, branch32, 6, 0 }, // Stub helper image cache content type DefinedAtom::typeNonLazyPointer, // Stub Helper-Common size and code 12, // Stub helper alignment 2, { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $dyld_ImageLoaderCache 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *_fast_lazy_bind 0x90 }, // nop { Reference::KindArch::x86, abs32, 1, 0 }, { false, 0, 0, 0 }, { Reference::KindArch::x86, abs32, 7, 0 }, { false, 0, 0, 0 } }; bool ArchHandler_x86::isCallSite(const Reference &ref) { return (ref.kindValue() == branch32); } bool ArchHandler_x86::isPointer(const Reference &ref) { return (ref.kindValue() == pointer32); } bool ArchHandler_x86::isPairedReloc(const Relocation &reloc) { if (!reloc.scattered) return false; return (reloc.type == GENERIC_RELOC_LOCAL_SECTDIFF) || (reloc.type == GENERIC_RELOC_SECTDIFF); } llvm::Error ArchHandler_x86::getReferenceInfo(const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, uint64_t fixupAddress, bool swap, FindAtomBySectionAndAddress atomFromAddress, FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, const lld::Atom **target, Reference::Addend *addend) { DefinedAtom::ContentPermissions perms; const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; uint64_t targetAddress; switch (relocPattern(reloc)) { case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength4: // ex: call _foo (and _foo undefined) *kind = branch32; if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = fixupAddress + 4 + (int32_t)*(const little32_t *)fixupContent; break; case GENERIC_RELOC_VANILLA | rPcRel | rLength4: // ex: call _foo (and _foo defined) *kind = branch32; targetAddress = fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; return atomFromAddress(reloc.symbol, targetAddress, target, addend); break; case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4: // ex: call _foo+n (and _foo defined) *kind = branch32; targetAddress = fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; if (auto ec = atomFromAddress(0, reloc.value, target, addend)) return ec; *addend = targetAddress - reloc.value; break; case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength2: // ex: callw _foo (and _foo undefined) *kind = branch16; if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = fixupAddress + 2 + (int16_t)*(const little16_t *)fixupContent; break; case GENERIC_RELOC_VANILLA | rPcRel | rLength2: // ex: callw _foo (and _foo defined) *kind = branch16; targetAddress = fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; return atomFromAddress(reloc.symbol, targetAddress, target, addend); break; case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2: // ex: callw _foo+n (and _foo defined) *kind = branch16; targetAddress = fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; if (auto ec = atomFromAddress(0, reloc.value, target, addend)) return ec; *addend = targetAddress - reloc.value; break; case GENERIC_RELOC_VANILLA | rExtern | rLength4: // ex: movl _foo, %eax (and _foo undefined) // ex: .long _foo (and _foo undefined) perms = inAtom->permissions(); *kind = ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 : pointer32; if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) return ec; *addend = *(const ulittle32_t *)fixupContent; break; case GENERIC_RELOC_VANILLA | rLength4: // ex: movl _foo, %eax (and _foo defined) // ex: .long _foo (and _foo defined) perms = inAtom->permissions(); *kind = ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 : pointer32; targetAddress = *(const ulittle32_t *)fixupContent; return atomFromAddress(reloc.symbol, targetAddress, target, addend); break; case GENERIC_RELOC_VANILLA | rScattered | rLength4: // ex: .long _foo+n (and _foo defined) perms = inAtom->permissions(); *kind = ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 : pointer32; if (auto ec = atomFromAddress(0, reloc.value, target, addend)) return ec; *addend = *(const ulittle32_t *)fixupContent - reloc.value; break; default: return llvm::make_error("unsupported i386 relocation type"); } return llvm::Error::success(); } llvm::Error ArchHandler_x86::getPairReferenceInfo(const normalized::Relocation &reloc1, const normalized::Relocation &reloc2, const DefinedAtom *inAtom, uint32_t offsetInAtom, uint64_t fixupAddress, bool swap, bool scatterable, FindAtomBySectionAndAddress atomFromAddr, FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, const lld::Atom **target, Reference::Addend *addend) { const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; DefinedAtom::ContentPermissions perms = inAtom->permissions(); uint32_t fromAddress; uint32_t toAddress; uint32_t value; const lld::Atom *fromTarget; Reference::Addend offsetInTo; Reference::Addend offsetInFrom; switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) { case ((GENERIC_RELOC_SECTDIFF | rScattered | rLength4) << 16 | GENERIC_RELOC_PAIR | rScattered | rLength4): case ((GENERIC_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 | GENERIC_RELOC_PAIR | rScattered | rLength4): toAddress = reloc1.value; fromAddress = reloc2.value; value = *(const little32_t *)fixupContent; if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) return ec; if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) return ec; if (fromTarget != inAtom) { if (*target != inAtom) return llvm::make_error( "SECTDIFF relocation where neither target is in atom"); *kind = negDelta32; *addend = toAddress - value - fromAddress; *target = fromTarget; } else { if ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) { // SECTDIFF relocations are used in i386 codegen where the function // prolog does a CALL to the next instruction which POPs the return // address into EBX which becomes the pic-base register. The POP // instruction is label the used for the subtrahend in expressions. // The funcRel32 kind represents the 32-bit delta to some symbol from // the start of the function (atom) containing the funcRel32. *kind = funcRel32; uint32_t ta = fromAddress + value - toAddress; *addend = ta - offsetInFrom; } else { *kind = delta32; *addend = fromAddress + value - toAddress; } } return llvm::Error::success(); break; default: return llvm::make_error("unsupported i386 relocation type"); } } void ArchHandler_x86::generateAtomContent(const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, llvm::MutableArrayRef atomContentBuffer) { // Copy raw bytes. std::copy(atom.rawContent().begin(), atom.rawContent().end(), atomContentBuffer.begin()); // Apply fix-ups. for (const Reference *ref : atom) { uint32_t offset = ref->offsetInAtom(); const Atom *target = ref->target(); uint64_t targetAddress = 0; if (isa(target)) targetAddress = findAddress(*target); uint64_t atomAddress = findAddress(atom); uint64_t fixupAddress = atomAddress + offset; if (relocatable) { applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress, targetAddress, atomAddress); } else { applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress, targetAddress, atomAddress); } } } void ArchHandler_x86::applyFixupFinal(const Reference &ref, uint8_t *loc, uint64_t fixupAddress, uint64_t targetAddress, uint64_t inAtomAddress) { if (ref.kindNamespace() != Reference::KindNamespace::mach_o) return; assert(ref.kindArch() == Reference::KindArch::x86); ulittle32_t *loc32 = reinterpret_cast(loc); switch (static_cast(ref.kindValue())) { case branch32: *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend(); break; case branch16: *loc32 = (targetAddress - (fixupAddress + 2)) + ref.addend(); break; case pointer32: case abs32: *loc32 = targetAddress + ref.addend(); break; case funcRel32: *loc32 = targetAddress - inAtomAddress + ref.addend(); break; case delta32: *loc32 = targetAddress - fixupAddress + ref.addend(); break; case negDelta32: *loc32 = fixupAddress - targetAddress + ref.addend(); break; case modeCode: case modeData: case lazyPointer: // do nothing break; case lazyImmediateLocation: *loc32 = ref.addend(); break; case invalid: llvm_unreachable("invalid x86 Reference Kind"); break; } } void ArchHandler_x86::applyFixupRelocatable(const Reference &ref, uint8_t *loc, uint64_t fixupAddress, uint64_t targetAddress, uint64_t inAtomAddress) { if (ref.kindNamespace() != Reference::KindNamespace::mach_o) return; assert(ref.kindArch() == Reference::KindArch::x86); bool useExternalReloc = useExternalRelocationTo(*ref.target()); ulittle16_t *loc16 = reinterpret_cast(loc); ulittle32_t *loc32 = reinterpret_cast(loc); switch (static_cast(ref.kindValue())) { case branch32: if (useExternalReloc) *loc32 = ref.addend() - (fixupAddress + 4); else *loc32 =(targetAddress - (fixupAddress+4)) + ref.addend(); break; case branch16: if (useExternalReloc) *loc16 = ref.addend() - (fixupAddress + 2); else *loc16 = (targetAddress - (fixupAddress+2)) + ref.addend(); break; case pointer32: case abs32: *loc32 = targetAddress + ref.addend(); break; case funcRel32: *loc32 = targetAddress - inAtomAddress + ref.addend(); // FIXME break; case delta32: *loc32 = targetAddress - fixupAddress + ref.addend(); break; case negDelta32: *loc32 = fixupAddress - targetAddress + ref.addend(); break; case modeCode: case modeData: case lazyPointer: case lazyImmediateLocation: // do nothing break; case invalid: llvm_unreachable("invalid x86 Reference Kind"); break; } } bool ArchHandler_x86::useExternalRelocationTo(const Atom &target) { // Undefined symbols are referenced via external relocations. if (isa(&target)) return true; if (const DefinedAtom *defAtom = dyn_cast(&target)) { switch (defAtom->merge()) { case DefinedAtom::mergeAsTentative: // Tentative definitions are referenced via external relocations. return true; case DefinedAtom::mergeAsWeak: case DefinedAtom::mergeAsWeakAndAddressUsed: // Global weak-defs are referenced via external relocations. return (defAtom->scope() == DefinedAtom::scopeGlobal); default: break; } } // Everything else is reference via an internal relocation. return false; } void ArchHandler_x86::appendSectionRelocations( const DefinedAtom &atom, uint64_t atomSectionOffset, const Reference &ref, FindSymbolIndexForAtom symbolIndexForAtom, FindSectionIndexForAtom sectionIndexForAtom, FindAddressForAtom addressForAtom, normalized::Relocations &relocs) { if (ref.kindNamespace() != Reference::KindNamespace::mach_o) return; assert(ref.kindArch() == Reference::KindArch::x86); uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); bool useExternalReloc = useExternalRelocationTo(*ref.target()); switch (static_cast(ref.kindValue())) { case modeCode: case modeData: break; case branch32: if (useExternalReloc) { appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength4); } else { if (ref.addend() != 0) appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4); else appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, GENERIC_RELOC_VANILLA | rPcRel | rLength4); } break; case branch16: if (useExternalReloc) { appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength2); } else { if (ref.addend() != 0) appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2); else appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, GENERIC_RELOC_VANILLA | rPcRel | rLength2); } break; case pointer32: case abs32: if (useExternalReloc) appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, GENERIC_RELOC_VANILLA | rExtern | rLength4); else { if (ref.addend() != 0) appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), GENERIC_RELOC_VANILLA | rScattered | rLength4); else appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, GENERIC_RELOC_VANILLA | rLength4); } break; case funcRel32: appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), GENERIC_RELOC_SECTDIFF | rScattered | rLength4); appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) - ref.addend(), GENERIC_RELOC_PAIR | rScattered | rLength4); break; case delta32: appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), GENERIC_RELOC_SECTDIFF | rScattered | rLength4); appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + ref.offsetInAtom(), GENERIC_RELOC_PAIR | rScattered | rLength4); break; case negDelta32: appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + ref.offsetInAtom(), GENERIC_RELOC_SECTDIFF | rScattered | rLength4); appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), GENERIC_RELOC_PAIR | rScattered | rLength4); break; case lazyPointer: case lazyImmediateLocation: llvm_unreachable("lazy reference kind implies Stubs pass was run"); break; case invalid: llvm_unreachable("unknown x86 Reference Kind"); break; } } std::unique_ptr ArchHandler::create_x86() { return std::unique_ptr(new ArchHandler_x86()); } } // namespace mach_o } // namespace lld