1//===- lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp ------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9/// 10/// \file Converts from in-memory Atoms to in-memory normalized mach-o. 11/// 12/// +------------+ 13/// | normalized | 14/// +------------+ 15/// ^ 16/// | 17/// | 18/// +-------+ 19/// | Atoms | 20/// +-------+ 21 22#include "ArchHandler.h" 23#include "DebugInfo.h" 24#include "MachONormalizedFile.h" 25#include "MachONormalizedFileBinaryUtils.h" 26#include "lld/Common/LLVM.h" 27#include "lld/Core/Error.h" 28#include "llvm/ADT/StringRef.h" 29#include "llvm/ADT/StringSwitch.h" 30#include "llvm/BinaryFormat/MachO.h" 31#include "llvm/Support/Casting.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/Format.h" 35#include <map> 36#include <system_error> 37#include <unordered_set> 38 39using llvm::StringRef; 40using llvm::isa; 41using namespace llvm::MachO; 42using namespace lld::mach_o::normalized; 43using namespace lld; 44 45namespace { 46 47struct AtomInfo { 48 const DefinedAtom *atom; 49 uint64_t offsetInSection; 50}; 51 52struct SectionInfo { 53 SectionInfo(StringRef seg, StringRef sect, SectionType type, 54 const MachOLinkingContext &ctxt, uint32_t attr, 55 bool relocsToDefinedCanBeImplicit); 56 57 StringRef segmentName; 58 StringRef sectionName; 59 SectionType type; 60 uint32_t attributes; 61 uint64_t address; 62 uint64_t size; 63 uint16_t alignment; 64 65 /// If this is set, the any relocs in this section which point to defined 66 /// addresses can be implicitly generated. This is the case for the 67 /// __eh_frame section where references to the function can be implicit if the 68 /// function is defined. 69 bool relocsToDefinedCanBeImplicit; 70 71 72 std::vector<AtomInfo> atomsAndOffsets; 73 uint32_t normalizedSectionIndex; 74 uint32_t finalSectionIndex; 75}; 76 77SectionInfo::SectionInfo(StringRef sg, StringRef sct, SectionType t, 78 const MachOLinkingContext &ctxt, uint32_t attrs, 79 bool relocsToDefinedCanBeImplicit) 80 : segmentName(sg), sectionName(sct), type(t), attributes(attrs), 81 address(0), size(0), alignment(1), 82 relocsToDefinedCanBeImplicit(relocsToDefinedCanBeImplicit), 83 normalizedSectionIndex(0), finalSectionIndex(0) { 84 uint16_t align = 1; 85 if (ctxt.sectionAligned(segmentName, sectionName, align)) { 86 alignment = align; 87 } 88} 89 90struct SegmentInfo { 91 SegmentInfo(StringRef name); 92 93 StringRef name; 94 uint64_t address; 95 uint64_t size; 96 uint32_t init_access; 97 uint32_t max_access; 98 std::vector<SectionInfo*> sections; 99 uint32_t normalizedSegmentIndex; 100}; 101 102SegmentInfo::SegmentInfo(StringRef n) 103 : name(n), address(0), size(0), init_access(0), max_access(0), 104 normalizedSegmentIndex(0) { 105} 106 107class Util { 108public: 109 Util(const MachOLinkingContext &ctxt) 110 : _ctx(ctxt), _archHandler(ctxt.archHandler()), _entryAtom(nullptr), 111 _hasTLVDescriptors(false), _subsectionsViaSymbols(true) {} 112 ~Util(); 113 114 void processDefinedAtoms(const lld::File &atomFile); 115 void processAtomAttributes(const DefinedAtom *atom); 116 void assignAtomToSection(const DefinedAtom *atom); 117 void organizeSections(); 118 void assignAddressesToSections(const NormalizedFile &file); 119 uint32_t fileFlags(); 120 void copySegmentInfo(NormalizedFile &file); 121 void copySectionInfo(NormalizedFile &file); 122 void updateSectionInfo(NormalizedFile &file); 123 void buildAtomToAddressMap(); 124 llvm::Error synthesizeDebugNotes(NormalizedFile &file); 125 llvm::Error addSymbols(const lld::File &atomFile, NormalizedFile &file); 126 void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file); 127 void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file); 128 void addExportInfo(const lld::File &, NormalizedFile &file); 129 void addSectionRelocs(const lld::File &, NormalizedFile &file); 130 void addFunctionStarts(const lld::File &, NormalizedFile &file); 131 void buildDataInCodeArray(const lld::File &, NormalizedFile &file); 132 void addDependentDylibs(const lld::File &, NormalizedFile &file); 133 void copyEntryPointAddress(NormalizedFile &file); 134 void copySectionContent(NormalizedFile &file); 135 136 bool allSourceFilesHaveMinVersions() const { 137 return _allSourceFilesHaveMinVersions; 138 } 139 140 uint32_t minVersion() const { 141 return _minVersion; 142 } 143 144 LoadCommandType minVersionCommandType() const { 145 return _minVersionCommandType; 146 } 147 148private: 149 typedef std::map<DefinedAtom::ContentType, SectionInfo*> TypeToSection; 150 typedef llvm::DenseMap<const Atom*, uint64_t> AtomToAddress; 151 152 struct DylibInfo { int ordinal; bool hasWeak; bool hasNonWeak; }; 153 typedef llvm::StringMap<DylibInfo> DylibPathToInfo; 154 155 SectionInfo *sectionForAtom(const DefinedAtom*); 156 SectionInfo *getRelocatableSection(DefinedAtom::ContentType type); 157 SectionInfo *getFinalSection(DefinedAtom::ContentType type); 158 void appendAtom(SectionInfo *sect, const DefinedAtom *atom); 159 SegmentInfo *segmentForName(StringRef segName); 160 void layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr); 161 void layoutSectionsInTextSegment(size_t, SegmentInfo *, uint64_t &); 162 void copySectionContent(SectionInfo *si, ContentBytes &content); 163 uint16_t descBits(const DefinedAtom* atom); 164 int dylibOrdinal(const SharedLibraryAtom *sa); 165 void segIndexForSection(const SectionInfo *sect, 166 uint8_t &segmentIndex, uint64_t &segmentStartAddr); 167 const Atom *targetOfLazyPointer(const DefinedAtom *lpAtom); 168 const Atom *targetOfStub(const DefinedAtom *stubAtom); 169 llvm::Error getSymbolTableRegion(const DefinedAtom* atom, 170 bool &inGlobalsRegion, 171 SymbolScope &symbolScope); 172 void appendSection(SectionInfo *si, NormalizedFile &file); 173 uint32_t sectionIndexForAtom(const Atom *atom); 174 void fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset, 175 NormalizedFile &file); 176 177 typedef llvm::DenseMap<const Atom*, uint32_t> AtomToIndex; 178 struct AtomAndIndex { const Atom *atom; uint32_t index; SymbolScope scope; }; 179 struct AtomSorter { 180 bool operator()(const AtomAndIndex &left, const AtomAndIndex &right); 181 }; 182 struct SegmentSorter { 183 bool operator()(const SegmentInfo *left, const SegmentInfo *right); 184 static unsigned weight(const SegmentInfo *); 185 }; 186 struct TextSectionSorter { 187 bool operator()(const SectionInfo *left, const SectionInfo *right); 188 static unsigned weight(const SectionInfo *); 189 }; 190 191 const MachOLinkingContext &_ctx; 192 mach_o::ArchHandler &_archHandler; 193 llvm::BumpPtrAllocator _allocator; 194 std::vector<SectionInfo*> _sectionInfos; 195 std::vector<SegmentInfo*> _segmentInfos; 196 TypeToSection _sectionMap; 197 std::vector<SectionInfo*> _customSections; 198 AtomToAddress _atomToAddress; 199 DylibPathToInfo _dylibInfo; 200 const DefinedAtom *_entryAtom; 201 AtomToIndex _atomToSymbolIndex; 202 std::vector<const Atom *> _machHeaderAliasAtoms; 203 bool _hasTLVDescriptors; 204 bool _subsectionsViaSymbols; 205 bool _allSourceFilesHaveMinVersions = true; 206 LoadCommandType _minVersionCommandType = (LoadCommandType)0; 207 uint32_t _minVersion = 0; 208 std::vector<lld::mach_o::Stab> _stabs; 209}; 210 211Util::~Util() { 212 // The SectionInfo structs are BumpPtr allocated, but atomsAndOffsets needs 213 // to be deleted. 214 for (SectionInfo *si : _sectionInfos) { 215 // clear() destroys vector elements, but does not deallocate. 216 // Instead use swap() to deallocate vector buffer. 217 std::vector<AtomInfo> empty; 218 si->atomsAndOffsets.swap(empty); 219 } 220 // The SegmentInfo structs are BumpPtr allocated, but sections needs 221 // to be deleted. 222 for (SegmentInfo *sgi : _segmentInfos) { 223 std::vector<SectionInfo*> empty2; 224 sgi->sections.swap(empty2); 225 } 226} 227 228SectionInfo *Util::getRelocatableSection(DefinedAtom::ContentType type) { 229 StringRef segmentName; 230 StringRef sectionName; 231 SectionType sectionType; 232 SectionAttr sectionAttrs; 233 bool relocsToDefinedCanBeImplicit; 234 235 // Use same table used by when parsing .o files. 236 relocatableSectionInfoForContentType(type, segmentName, sectionName, 237 sectionType, sectionAttrs, 238 relocsToDefinedCanBeImplicit); 239 // If we already have a SectionInfo with this name, re-use it. 240 // This can happen if two ContentType map to the same mach-o section. 241 for (auto sect : _sectionMap) { 242 if (sect.second->sectionName.equals(sectionName) && 243 sect.second->segmentName.equals(segmentName)) { 244 return sect.second; 245 } 246 } 247 // Otherwise allocate new SectionInfo object. 248 auto *sect = new (_allocator) 249 SectionInfo(segmentName, sectionName, sectionType, _ctx, sectionAttrs, 250 relocsToDefinedCanBeImplicit); 251 _sectionInfos.push_back(sect); 252 _sectionMap[type] = sect; 253 return sect; 254} 255 256#define ENTRY(seg, sect, type, atomType) \ 257 {seg, sect, type, DefinedAtom::atomType } 258 259struct MachOFinalSectionFromAtomType { 260 StringRef segmentName; 261 StringRef sectionName; 262 SectionType sectionType; 263 DefinedAtom::ContentType atomType; 264}; 265 266const MachOFinalSectionFromAtomType sectsToAtomType[] = { 267 ENTRY("__TEXT", "__text", S_REGULAR, typeCode), 268 ENTRY("__TEXT", "__text", S_REGULAR, typeMachHeader), 269 ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), 270 ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), 271 ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), 272 ENTRY("__TEXT", "__const", S_4BYTE_LITERALS, typeLiteral4), 273 ENTRY("__TEXT", "__const", S_8BYTE_LITERALS, typeLiteral8), 274 ENTRY("__TEXT", "__const", S_16BYTE_LITERALS, typeLiteral16), 275 ENTRY("__TEXT", "__stubs", S_SYMBOL_STUBS, typeStub), 276 ENTRY("__TEXT", "__stub_helper", S_REGULAR, typeStubHelper), 277 ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), 278 ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), 279 ENTRY("__TEXT", "__unwind_info", S_REGULAR, typeProcessedUnwindInfo), 280 ENTRY("__DATA", "__data", S_REGULAR, typeData), 281 ENTRY("__DATA", "__const", S_REGULAR, typeConstData), 282 ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), 283 ENTRY("__DATA", "__la_symbol_ptr", S_LAZY_SYMBOL_POINTERS, 284 typeLazyPointer), 285 ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, 286 typeInitializerPtr), 287 ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, 288 typeTerminatorPtr), 289 ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, 290 typeGOT), 291 ENTRY("__DATA", "__nl_symbol_ptr", S_NON_LAZY_SYMBOL_POINTERS, 292 typeNonLazyPointer), 293 ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, 294 typeThunkTLV), 295 ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, 296 typeTLVInitialData), 297 ENTRY("__DATA", "__thread_ptrs", S_THREAD_LOCAL_VARIABLE_POINTERS, 298 typeTLVInitializerPtr), 299 ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, 300 typeTLVInitialZeroFill), 301 ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), 302 ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), 303}; 304#undef ENTRY 305 306SectionInfo *Util::getFinalSection(DefinedAtom::ContentType atomType) { 307 for (auto &p : sectsToAtomType) { 308 if (p.atomType != atomType) 309 continue; 310 SectionAttr sectionAttrs = 0; 311 switch (atomType) { 312 case DefinedAtom::typeMachHeader: 313 case DefinedAtom::typeCode: 314 case DefinedAtom::typeStub: 315 case DefinedAtom::typeStubHelper: 316 sectionAttrs = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS; 317 break; 318 case DefinedAtom::typeThunkTLV: 319 _hasTLVDescriptors = true; 320 break; 321 default: 322 break; 323 } 324 // If we already have a SectionInfo with this name, re-use it. 325 // This can happen if two ContentType map to the same mach-o section. 326 for (auto sect : _sectionMap) { 327 if (sect.second->sectionName.equals(p.sectionName) && 328 sect.second->segmentName.equals(p.segmentName)) { 329 return sect.second; 330 } 331 } 332 // Otherwise allocate new SectionInfo object. 333 auto *sect = new (_allocator) SectionInfo( 334 p.segmentName, p.sectionName, p.sectionType, _ctx, sectionAttrs, 335 /* relocsToDefinedCanBeImplicit */ false); 336 _sectionInfos.push_back(sect); 337 _sectionMap[atomType] = sect; 338 return sect; 339 } 340 llvm_unreachable("content type not yet supported"); 341} 342 343SectionInfo *Util::sectionForAtom(const DefinedAtom *atom) { 344 if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) { 345 // Section for this atom is derived from content type. 346 DefinedAtom::ContentType type = atom->contentType(); 347 auto pos = _sectionMap.find(type); 348 if ( pos != _sectionMap.end() ) 349 return pos->second; 350 bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); 351 return rMode ? getRelocatableSection(type) : getFinalSection(type); 352 } else { 353 // This atom needs to be in a custom section. 354 StringRef customName = atom->customSectionName(); 355 // Look to see if we have already allocated the needed custom section. 356 for(SectionInfo *sect : _customSections) { 357 const DefinedAtom *firstAtom = sect->atomsAndOffsets.front().atom; 358 if (firstAtom->customSectionName().equals(customName)) { 359 return sect; 360 } 361 } 362 // Not found, so need to create a new custom section. 363 size_t seperatorIndex = customName.find('/'); 364 assert(seperatorIndex != StringRef::npos); 365 StringRef segName = customName.slice(0, seperatorIndex); 366 StringRef sectName = customName.drop_front(seperatorIndex + 1); 367 auto *sect = 368 new (_allocator) SectionInfo(segName, sectName, S_REGULAR, _ctx, 369 0, /* relocsToDefinedCanBeImplicit */ false); 370 _customSections.push_back(sect); 371 _sectionInfos.push_back(sect); 372 return sect; 373 } 374} 375 376void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) { 377 // Figure out offset for atom in this section given alignment constraints. 378 uint64_t offset = sect->size; 379 DefinedAtom::Alignment atomAlign = atom->alignment(); 380 uint64_t align = atomAlign.value; 381 uint64_t requiredModulus = atomAlign.modulus; 382 uint64_t currentModulus = (offset % align); 383 if ( currentModulus != requiredModulus ) { 384 if ( requiredModulus > currentModulus ) 385 offset += requiredModulus-currentModulus; 386 else 387 offset += align+requiredModulus-currentModulus; 388 } 389 // Record max alignment of any atom in this section. 390 if (align > sect->alignment) 391 sect->alignment = atomAlign.value; 392 // Assign atom to this section with this offset. 393 AtomInfo ai = {atom, offset}; 394 sect->atomsAndOffsets.push_back(ai); 395 // Update section size to include this atom. 396 sect->size = offset + atom->size(); 397} 398 399void Util::processDefinedAtoms(const lld::File &atomFile) { 400 for (const DefinedAtom *atom : atomFile.defined()) { 401 processAtomAttributes(atom); 402 assignAtomToSection(atom); 403 } 404} 405 406void Util::processAtomAttributes(const DefinedAtom *atom) { 407 if (auto *machoFile = dyn_cast<mach_o::MachOFile>(&atom->file())) { 408 // If the file doesn't use subsections via symbols, then make sure we don't 409 // add that flag to the final output file if we have a relocatable file. 410 if (!machoFile->subsectionsViaSymbols()) 411 _subsectionsViaSymbols = false; 412 413 // All the source files must have min versions for us to output an object 414 // file with a min version. 415 if (auto v = machoFile->minVersion()) 416 _minVersion = std::max(_minVersion, v); 417 else 418 _allSourceFilesHaveMinVersions = false; 419 420 // If we don't have a platform load command, but one of the source files 421 // does, then take the one from the file. 422 if (!_minVersionCommandType) 423 if (auto v = machoFile->minVersionLoadCommandKind()) 424 _minVersionCommandType = v; 425 } 426} 427 428void Util::assignAtomToSection(const DefinedAtom *atom) { 429 if (atom->contentType() == DefinedAtom::typeMachHeader) { 430 _machHeaderAliasAtoms.push_back(atom); 431 // Assign atom to this section with this offset. 432 AtomInfo ai = {atom, 0}; 433 sectionForAtom(atom)->atomsAndOffsets.push_back(ai); 434 } else if (atom->contentType() == DefinedAtom::typeDSOHandle) 435 _machHeaderAliasAtoms.push_back(atom); 436 else 437 appendAtom(sectionForAtom(atom), atom); 438} 439 440SegmentInfo *Util::segmentForName(StringRef segName) { 441 for (SegmentInfo *si : _segmentInfos) { 442 if ( si->name.equals(segName) ) 443 return si; 444 } 445 auto *info = new (_allocator) SegmentInfo(segName); 446 447 // Set the initial segment protection. 448 if (segName.equals("__TEXT")) 449 info->init_access = VM_PROT_READ | VM_PROT_EXECUTE; 450 else if (segName.equals("__PAGEZERO")) 451 info->init_access = 0; 452 else if (segName.equals("__LINKEDIT")) 453 info->init_access = VM_PROT_READ; 454 else { 455 // All others default to read-write 456 info->init_access = VM_PROT_READ | VM_PROT_WRITE; 457 } 458 459 // Set max segment protection 460 // Note, its overkill to use a switch statement here, but makes it so much 461 // easier to use switch coverage to catch new cases. 462 switch (_ctx.os()) { 463 case lld::MachOLinkingContext::OS::unknown: 464 case lld::MachOLinkingContext::OS::macOSX: 465 case lld::MachOLinkingContext::OS::iOS_simulator: 466 if (segName.equals("__PAGEZERO")) { 467 info->max_access = 0; 468 break; 469 } 470 // All others default to all 471 info->max_access = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 472 break; 473 case lld::MachOLinkingContext::OS::iOS: 474 // iPhoneOS always uses same protection for max and initial 475 info->max_access = info->init_access; 476 break; 477 } 478 _segmentInfos.push_back(info); 479 return info; 480} 481 482unsigned Util::SegmentSorter::weight(const SegmentInfo *seg) { 483 return llvm::StringSwitch<unsigned>(seg->name) 484 .Case("__PAGEZERO", 1) 485 .Case("__TEXT", 2) 486 .Case("__DATA", 3) 487 .Default(100); 488} 489 490bool Util::SegmentSorter::operator()(const SegmentInfo *left, 491 const SegmentInfo *right) { 492 return (weight(left) < weight(right)); 493} 494 495unsigned Util::TextSectionSorter::weight(const SectionInfo *sect) { 496 return llvm::StringSwitch<unsigned>(sect->sectionName) 497 .Case("__text", 1) 498 .Case("__stubs", 2) 499 .Case("__stub_helper", 3) 500 .Case("__const", 4) 501 .Case("__cstring", 5) 502 .Case("__unwind_info", 98) 503 .Case("__eh_frame", 99) 504 .Default(10); 505} 506 507bool Util::TextSectionSorter::operator()(const SectionInfo *left, 508 const SectionInfo *right) { 509 return (weight(left) < weight(right)); 510} 511 512void Util::organizeSections() { 513 // NOTE!: Keep this in sync with assignAddressesToSections. 514 switch (_ctx.outputMachOType()) { 515 case llvm::MachO::MH_EXECUTE: 516 // Main executables, need a zero-page segment 517 segmentForName("__PAGEZERO"); 518 // Fall into next case. 519 LLVM_FALLTHROUGH; 520 case llvm::MachO::MH_DYLIB: 521 case llvm::MachO::MH_BUNDLE: 522 // All dynamic code needs TEXT segment to hold the load commands. 523 segmentForName("__TEXT"); 524 break; 525 default: 526 break; 527 } 528 segmentForName("__LINKEDIT"); 529 530 // Group sections into segments. 531 for (SectionInfo *si : _sectionInfos) { 532 SegmentInfo *seg = segmentForName(si->segmentName); 533 seg->sections.push_back(si); 534 } 535 // Sort segments. 536 std::sort(_segmentInfos.begin(), _segmentInfos.end(), SegmentSorter()); 537 538 // Sort sections within segments. 539 for (SegmentInfo *seg : _segmentInfos) { 540 if (seg->name.equals("__TEXT")) { 541 std::sort(seg->sections.begin(), seg->sections.end(), 542 TextSectionSorter()); 543 } 544 } 545 546 // Record final section indexes. 547 uint32_t segmentIndex = 0; 548 uint32_t sectionIndex = 1; 549 for (SegmentInfo *seg : _segmentInfos) { 550 seg->normalizedSegmentIndex = segmentIndex++; 551 for (SectionInfo *sect : seg->sections) 552 sect->finalSectionIndex = sectionIndex++; 553 } 554} 555 556void Util::layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr) { 557 seg->address = addr; 558 for (SectionInfo *sect : seg->sections) { 559 sect->address = llvm::alignTo(addr, sect->alignment); 560 addr = sect->address + sect->size; 561 } 562 seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize()); 563} 564 565// __TEXT segment lays out backwards so padding is at front after load commands. 566void Util::layoutSectionsInTextSegment(size_t hlcSize, SegmentInfo *seg, 567 uint64_t &addr) { 568 seg->address = addr; 569 // Walks sections starting at end to calculate padding for start. 570 int64_t taddr = 0; 571 for (auto it = seg->sections.rbegin(); it != seg->sections.rend(); ++it) { 572 SectionInfo *sect = *it; 573 taddr -= sect->size; 574 taddr = taddr & (0 - sect->alignment); 575 } 576 int64_t padding = taddr - hlcSize; 577 while (padding < 0) 578 padding += _ctx.pageSize(); 579 // Start assigning section address starting at padded offset. 580 addr += (padding + hlcSize); 581 for (SectionInfo *sect : seg->sections) { 582 sect->address = llvm::alignTo(addr, sect->alignment); 583 addr = sect->address + sect->size; 584 } 585 seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize()); 586} 587 588void Util::assignAddressesToSections(const NormalizedFile &file) { 589 // NOTE!: Keep this in sync with organizeSections. 590 size_t hlcSize = headerAndLoadCommandsSize(file, 591 _ctx.generateFunctionStartsLoadCommand()); 592 uint64_t address = 0; 593 for (SegmentInfo *seg : _segmentInfos) { 594 if (seg->name.equals("__PAGEZERO")) { 595 seg->size = _ctx.pageZeroSize(); 596 address += seg->size; 597 } 598 else if (seg->name.equals("__TEXT")) { 599 // _ctx.baseAddress() == 0 implies it was either unspecified or 600 // pageZeroSize is also 0. In either case resetting address is safe. 601 address = _ctx.baseAddress() ? _ctx.baseAddress() : address; 602 layoutSectionsInTextSegment(hlcSize, seg, address); 603 } else 604 layoutSectionsInSegment(seg, address); 605 606 address = llvm::alignTo(address, _ctx.pageSize()); 607 } 608 DEBUG_WITH_TYPE("WriterMachO-norm", 609 llvm::dbgs() << "assignAddressesToSections()\n"; 610 for (SegmentInfo *sgi : _segmentInfos) { 611 llvm::dbgs() << " address=" << llvm::format("0x%08llX", sgi->address) 612 << ", size=" << llvm::format("0x%08llX", sgi->size) 613 << ", segment-name='" << sgi->name 614 << "'\n"; 615 for (SectionInfo *si : sgi->sections) { 616 llvm::dbgs()<< " addr=" << llvm::format("0x%08llX", si->address) 617 << ", size=" << llvm::format("0x%08llX", si->size) 618 << ", section-name='" << si->sectionName 619 << "\n"; 620 } 621 } 622 ); 623} 624 625void Util::copySegmentInfo(NormalizedFile &file) { 626 for (SegmentInfo *sgi : _segmentInfos) { 627 Segment seg; 628 seg.name = sgi->name; 629 seg.address = sgi->address; 630 seg.size = sgi->size; 631 seg.init_access = sgi->init_access; 632 seg.max_access = sgi->max_access; 633 file.segments.push_back(seg); 634 } 635} 636 637void Util::appendSection(SectionInfo *si, NormalizedFile &file) { 638 // Add new empty section to end of file.sections. 639 Section temp; 640 file.sections.push_back(std::move(temp)); 641 Section* normSect = &file.sections.back(); 642 // Copy fields to normalized section. 643 normSect->segmentName = si->segmentName; 644 normSect->sectionName = si->sectionName; 645 normSect->type = si->type; 646 normSect->attributes = si->attributes; 647 normSect->address = si->address; 648 normSect->alignment = si->alignment; 649 // Record where normalized section is. 650 si->normalizedSectionIndex = file.sections.size()-1; 651} 652 653void Util::copySectionContent(NormalizedFile &file) { 654 const bool r = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); 655 656 // Utility function for ArchHandler to find address of atom in output file. 657 auto addrForAtom = [&] (const Atom &atom) -> uint64_t { 658 auto pos = _atomToAddress.find(&atom); 659 assert(pos != _atomToAddress.end()); 660 return pos->second; 661 }; 662 663 auto sectionAddrForAtom = [&] (const Atom &atom) -> uint64_t { 664 for (const SectionInfo *sectInfo : _sectionInfos) 665 for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) 666 if (atomInfo.atom == &atom) 667 return sectInfo->address; 668 llvm_unreachable("atom not assigned to section"); 669 }; 670 671 for (SectionInfo *si : _sectionInfos) { 672 Section *normSect = &file.sections[si->normalizedSectionIndex]; 673 if (isZeroFillSection(si->type)) { 674 const uint8_t *empty = nullptr; 675 normSect->content = llvm::makeArrayRef(empty, si->size); 676 continue; 677 } 678 // Copy content from atoms to content buffer for section. 679 llvm::MutableArrayRef<uint8_t> sectionContent; 680 if (si->size) { 681 uint8_t *sectContent = file.ownedAllocations.Allocate<uint8_t>(si->size); 682 sectionContent = llvm::MutableArrayRef<uint8_t>(sectContent, si->size); 683 normSect->content = sectionContent; 684 } 685 for (AtomInfo &ai : si->atomsAndOffsets) { 686 if (!ai.atom->size()) { 687 assert(ai.atom->begin() == ai.atom->end() && 688 "Cannot have references without content"); 689 continue; 690 } 691 auto atomContent = sectionContent.slice(ai.offsetInSection, 692 ai.atom->size()); 693 _archHandler.generateAtomContent(*ai.atom, r, addrForAtom, 694 sectionAddrForAtom, _ctx.baseAddress(), 695 atomContent); 696 } 697 } 698} 699 700void Util::copySectionInfo(NormalizedFile &file) { 701 file.sections.reserve(_sectionInfos.size()); 702 // Write sections grouped by segment. 703 for (SegmentInfo *sgi : _segmentInfos) { 704 for (SectionInfo *si : sgi->sections) { 705 appendSection(si, file); 706 } 707 } 708} 709 710void Util::updateSectionInfo(NormalizedFile &file) { 711 file.sections.reserve(_sectionInfos.size()); 712 // sections grouped by segment. 713 for (SegmentInfo *sgi : _segmentInfos) { 714 Segment *normSeg = &file.segments[sgi->normalizedSegmentIndex]; 715 normSeg->address = sgi->address; 716 normSeg->size = sgi->size; 717 for (SectionInfo *si : sgi->sections) { 718 Section *normSect = &file.sections[si->normalizedSectionIndex]; 719 normSect->address = si->address; 720 } 721 } 722} 723 724void Util::copyEntryPointAddress(NormalizedFile &nFile) { 725 if (!_entryAtom) { 726 nFile.entryAddress = 0; 727 return; 728 } 729 730 if (_ctx.outputTypeHasEntry()) { 731 if (_archHandler.isThumbFunction(*_entryAtom)) 732 nFile.entryAddress = (_atomToAddress[_entryAtom] | 1); 733 else 734 nFile.entryAddress = _atomToAddress[_entryAtom]; 735 } 736} 737 738void Util::buildAtomToAddressMap() { 739 DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() 740 << "assign atom addresses:\n"); 741 const bool lookForEntry = _ctx.outputTypeHasEntry(); 742 for (SectionInfo *sect : _sectionInfos) { 743 for (const AtomInfo &info : sect->atomsAndOffsets) { 744 _atomToAddress[info.atom] = sect->address + info.offsetInSection; 745 if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) && 746 (info.atom->size() != 0) && 747 info.atom->name() == _ctx.entrySymbolName()) { 748 _entryAtom = info.atom; 749 } 750 DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() 751 << " address=" 752 << llvm::format("0x%016X", _atomToAddress[info.atom]) 753 << llvm::format(" 0x%09lX", info.atom) 754 << ", file=#" 755 << info.atom->file().ordinal() 756 << ", atom=#" 757 << info.atom->ordinal() 758 << ", name=" 759 << info.atom->name() 760 << ", type=" 761 << info.atom->contentType() 762 << "\n"); 763 } 764 } 765 DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() 766 << "assign header alias atom addresses:\n"); 767 for (const Atom *atom : _machHeaderAliasAtoms) { 768 _atomToAddress[atom] = _ctx.baseAddress(); 769#ifndef NDEBUG 770 if (auto *definedAtom = dyn_cast<DefinedAtom>(atom)) { 771 DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() 772 << " address=" 773 << llvm::format("0x%016X", _atomToAddress[atom]) 774 << llvm::format(" 0x%09lX", atom) 775 << ", file=#" 776 << definedAtom->file().ordinal() 777 << ", atom=#" 778 << definedAtom->ordinal() 779 << ", name=" 780 << definedAtom->name() 781 << ", type=" 782 << definedAtom->contentType() 783 << "\n"); 784 } else { 785 DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() 786 << " address=" 787 << llvm::format("0x%016X", _atomToAddress[atom]) 788 << " atom=" << atom 789 << " name=" << atom->name() << "\n"); 790 } 791#endif 792 } 793} 794 795llvm::Error Util::synthesizeDebugNotes(NormalizedFile &file) { 796 797 // Bail out early if we don't need to generate a debug map. 798 if (_ctx.debugInfoMode() == MachOLinkingContext::DebugInfoMode::noDebugMap) 799 return llvm::Error::success(); 800 801 std::vector<const DefinedAtom*> atomsNeedingDebugNotes; 802 std::set<const mach_o::MachOFile*> filesWithStabs; 803 bool objFileHasDwarf = false; 804 const File *objFile = nullptr; 805 806 for (SectionInfo *sect : _sectionInfos) { 807 for (const AtomInfo &info : sect->atomsAndOffsets) { 808 if (const DefinedAtom *atom = dyn_cast<DefinedAtom>(info.atom)) { 809 810 // FIXME: No stabs/debug-notes for symbols that wouldn't be in the 811 // symbol table. 812 // FIXME: No stabs/debug-notes for kernel dtrace probes. 813 814 if (atom->contentType() == DefinedAtom::typeCFI || 815 atom->contentType() == DefinedAtom::typeCString) 816 continue; 817 818 // Whenever we encounter a new file, update the 'objfileHasDwarf' flag. 819 if (&info.atom->file() != objFile) { 820 objFileHasDwarf = false; 821 if (const mach_o::MachOFile *atomFile = 822 dyn_cast<mach_o::MachOFile>(&info.atom->file())) { 823 if (atomFile->debugInfo()) { 824 if (isa<mach_o::DwarfDebugInfo>(atomFile->debugInfo())) 825 objFileHasDwarf = true; 826 else if (isa<mach_o::StabsDebugInfo>(atomFile->debugInfo())) 827 filesWithStabs.insert(atomFile); 828 } 829 } 830 } 831 832 // If this atom is from a file that needs dwarf, add it to the list. 833 if (objFileHasDwarf) 834 atomsNeedingDebugNotes.push_back(info.atom); 835 } 836 } 837 } 838 839 // Sort atoms needing debug notes by file ordinal, then atom ordinal. 840 std::sort(atomsNeedingDebugNotes.begin(), atomsNeedingDebugNotes.end(), 841 [](const DefinedAtom *lhs, const DefinedAtom *rhs) { 842 if (lhs->file().ordinal() != rhs->file().ordinal()) 843 return (lhs->file().ordinal() < rhs->file().ordinal()); 844 return (lhs->ordinal() < rhs->ordinal()); 845 }); 846 847 // FIXME: Handle <rdar://problem/17689030>: Add -add_ast_path option to \ 848 // linker which add N_AST stab entry to output 849 // See OutputFile::synthesizeDebugNotes in ObjectFile.cpp in ld64. 850 851 StringRef oldFileName = ""; 852 StringRef oldDirPath = ""; 853 bool wroteStartSO = false; 854 std::unordered_set<std::string> seenFiles; 855 for (const DefinedAtom *atom : atomsNeedingDebugNotes) { 856 const auto &atomFile = cast<mach_o::MachOFile>(atom->file()); 857 assert(dyn_cast_or_null<lld::mach_o::DwarfDebugInfo>(atomFile.debugInfo()) 858 && "file for atom needing debug notes does not contain dwarf"); 859 auto &dwarf = cast<lld::mach_o::DwarfDebugInfo>(*atomFile.debugInfo()); 860 861 auto &tu = dwarf.translationUnitSource(); 862 StringRef newFileName = tu.name; 863 StringRef newDirPath = tu.path; 864 865 // Add an SO whenever the TU source file changes. 866 if (newFileName != oldFileName || newDirPath != oldDirPath) { 867 // Translation unit change, emit ending SO 868 if (oldFileName != "") 869 _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, "")); 870 871 oldFileName = newFileName; 872 oldDirPath = newDirPath; 873 874 // If newDirPath doesn't end with a '/' we need to add one: 875 if (newDirPath.back() != '/') { 876 char *p = 877 file.ownedAllocations.Allocate<char>(newDirPath.size() + 2); 878 memcpy(p, newDirPath.data(), newDirPath.size()); 879 p[newDirPath.size()] = '/'; 880 p[newDirPath.size() + 1] = '\0'; 881 newDirPath = p; 882 } 883 884 // New translation unit, emit start SOs: 885 _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newDirPath)); 886 _stabs.push_back(mach_o::Stab(nullptr, N_SO, 0, 0, 0, newFileName)); 887 888 // Synthesize OSO for start of file. 889 char *fullPath = nullptr; 890 { 891 SmallString<1024> pathBuf(atomFile.path()); 892 if (auto EC = llvm::sys::fs::make_absolute(pathBuf)) 893 return llvm::errorCodeToError(EC); 894 fullPath = file.ownedAllocations.Allocate<char>(pathBuf.size() + 1); 895 memcpy(fullPath, pathBuf.c_str(), pathBuf.size() + 1); 896 } 897 898 // Get mod time. 899 uint32_t modTime = 0; 900 llvm::sys::fs::file_status stat; 901 if (!llvm::sys::fs::status(fullPath, stat)) 902 if (llvm::sys::fs::exists(stat)) 903 modTime = llvm::sys::toTimeT(stat.getLastModificationTime()); 904 905 _stabs.push_back(mach_o::Stab(nullptr, N_OSO, _ctx.getCPUSubType(), 1, 906 modTime, fullPath)); 907 // <rdar://problem/6337329> linker should put cpusubtype in n_sect field 908 // of nlist entry for N_OSO debug note entries. 909 wroteStartSO = true; 910 } 911 912 if (atom->contentType() == DefinedAtom::typeCode) { 913 // Synthesize BNSYM and start FUN stabs. 914 _stabs.push_back(mach_o::Stab(atom, N_BNSYM, 1, 0, 0, "")); 915 _stabs.push_back(mach_o::Stab(atom, N_FUN, 1, 0, 0, atom->name())); 916 // Synthesize any SOL stabs needed 917 // FIXME: add SOL stabs. 918 _stabs.push_back(mach_o::Stab(nullptr, N_FUN, 0, 0, 919 atom->rawContent().size(), "")); 920 _stabs.push_back(mach_o::Stab(nullptr, N_ENSYM, 1, 0, 921 atom->rawContent().size(), "")); 922 } else { 923 if (atom->scope() == Atom::scopeTranslationUnit) 924 _stabs.push_back(mach_o::Stab(atom, N_STSYM, 1, 0, 0, atom->name())); 925 else 926 _stabs.push_back(mach_o::Stab(nullptr, N_GSYM, 1, 0, 0, atom->name())); 927 } 928 } 929 930 // Emit ending SO if necessary. 931 if (wroteStartSO) 932 _stabs.push_back(mach_o::Stab(nullptr, N_SO, 1, 0, 0, "")); 933 934 // Copy any stabs from .o file. 935 for (const auto *objFile : filesWithStabs) { 936 const auto &stabsList = 937 cast<mach_o::StabsDebugInfo>(objFile->debugInfo())->stabs(); 938 for (auto &stab : stabsList) { 939 // FIXME: Drop stabs whose atoms have been dead-stripped. 940 _stabs.push_back(stab); 941 } 942 } 943 944 return llvm::Error::success(); 945} 946 947uint16_t Util::descBits(const DefinedAtom* atom) { 948 uint16_t desc = 0; 949 switch (atom->merge()) { 950 case lld::DefinedAtom::mergeNo: 951 case lld::DefinedAtom::mergeAsTentative: 952 break; 953 case lld::DefinedAtom::mergeAsWeak: 954 case lld::DefinedAtom::mergeAsWeakAndAddressUsed: 955 desc |= N_WEAK_DEF; 956 break; 957 case lld::DefinedAtom::mergeSameNameAndSize: 958 case lld::DefinedAtom::mergeByLargestSection: 959 case lld::DefinedAtom::mergeByContent: 960 llvm_unreachable("Unsupported DefinedAtom::merge()"); 961 break; 962 } 963 if (atom->contentType() == lld::DefinedAtom::typeResolver) 964 desc |= N_SYMBOL_RESOLVER; 965 if (atom->contentType() == lld::DefinedAtom::typeMachHeader) 966 desc |= REFERENCED_DYNAMICALLY; 967 if (_archHandler.isThumbFunction(*atom)) 968 desc |= N_ARM_THUMB_DEF; 969 if (atom->deadStrip() == DefinedAtom::deadStripNever && 970 _ctx.outputMachOType() == llvm::MachO::MH_OBJECT) { 971 if ((atom->contentType() != DefinedAtom::typeInitializerPtr) 972 && (atom->contentType() != DefinedAtom::typeTerminatorPtr)) 973 desc |= N_NO_DEAD_STRIP; 974 } 975 return desc; 976} 977 978bool Util::AtomSorter::operator()(const AtomAndIndex &left, 979 const AtomAndIndex &right) { 980 return (left.atom->name().compare(right.atom->name()) < 0); 981} 982 983llvm::Error Util::getSymbolTableRegion(const DefinedAtom* atom, 984 bool &inGlobalsRegion, 985 SymbolScope &scope) { 986 bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); 987 switch (atom->scope()) { 988 case Atom::scopeTranslationUnit: 989 scope = 0; 990 inGlobalsRegion = false; 991 return llvm::Error::success(); 992 case Atom::scopeLinkageUnit: 993 if ((_ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) && 994 _ctx.exportSymbolNamed(atom->name())) { 995 return llvm::make_error<GenericError>( 996 Twine("cannot export hidden symbol ") + atom->name()); 997 } 998 if (rMode) { 999 if (_ctx.keepPrivateExterns()) { 1000 // -keep_private_externs means keep in globals region as N_PEXT. 1001 scope = N_PEXT | N_EXT; 1002 inGlobalsRegion = true; 1003 return llvm::Error::success(); 1004 } 1005 } 1006 // scopeLinkageUnit symbols are no longer global once linked. 1007 scope = N_PEXT; 1008 inGlobalsRegion = false; 1009 return llvm::Error::success(); 1010 case Atom::scopeGlobal: 1011 if (_ctx.exportRestrictMode()) { 1012 if (_ctx.exportSymbolNamed(atom->name())) { 1013 scope = N_EXT; 1014 inGlobalsRegion = true; 1015 return llvm::Error::success(); 1016 } else { 1017 scope = N_PEXT; 1018 inGlobalsRegion = false; 1019 return llvm::Error::success(); 1020 } 1021 } else { 1022 scope = N_EXT; 1023 inGlobalsRegion = true; 1024 return llvm::Error::success(); 1025 } 1026 break; 1027 } 1028 llvm_unreachable("atom->scope() unknown enum value"); 1029} 1030 1031 1032 1033llvm::Error Util::addSymbols(const lld::File &atomFile, 1034 NormalizedFile &file) { 1035 bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); 1036 // Mach-O symbol table has four regions: stabs, locals, globals, undefs. 1037 1038 // Add all stabs. 1039 for (auto &stab : _stabs) { 1040 Symbol sym; 1041 sym.type = static_cast<NListType>(stab.type); 1042 sym.scope = 0; 1043 sym.sect = stab.other; 1044 sym.desc = stab.desc; 1045 if (stab.atom) 1046 sym.value = _atomToAddress[stab.atom]; 1047 else 1048 sym.value = stab.value; 1049 sym.name = stab.str; 1050 file.stabsSymbols.push_back(sym); 1051 } 1052 1053 // Add all local (non-global) symbols in address order 1054 std::vector<AtomAndIndex> globals; 1055 globals.reserve(512); 1056 for (SectionInfo *sect : _sectionInfos) { 1057 for (const AtomInfo &info : sect->atomsAndOffsets) { 1058 const DefinedAtom *atom = info.atom; 1059 if (!atom->name().empty()) { 1060 SymbolScope symbolScope; 1061 bool inGlobalsRegion; 1062 if (auto ec = getSymbolTableRegion(atom, inGlobalsRegion, symbolScope)){ 1063 return ec; 1064 } 1065 if (inGlobalsRegion) { 1066 AtomAndIndex ai = { atom, sect->finalSectionIndex, symbolScope }; 1067 globals.push_back(ai); 1068 } else { 1069 Symbol sym; 1070 sym.name = atom->name(); 1071 sym.type = N_SECT; 1072 sym.scope = symbolScope; 1073 sym.sect = sect->finalSectionIndex; 1074 sym.desc = descBits(atom); 1075 sym.value = _atomToAddress[atom]; 1076 _atomToSymbolIndex[atom] = file.localSymbols.size(); 1077 file.localSymbols.push_back(sym); 1078 } 1079 } else if (rMode && _archHandler.needsLocalSymbolInRelocatableFile(atom)){ 1080 // Create 'Lxxx' labels for anonymous atoms if archHandler says so. 1081 static unsigned tempNum = 1; 1082 char tmpName[16]; 1083 sprintf(tmpName, "L%04u", tempNum++); 1084 StringRef tempRef(tmpName); 1085 Symbol sym; 1086 sym.name = tempRef.copy(file.ownedAllocations); 1087 sym.type = N_SECT; 1088 sym.scope = 0; 1089 sym.sect = sect->finalSectionIndex; 1090 sym.desc = 0; 1091 sym.value = _atomToAddress[atom]; 1092 _atomToSymbolIndex[atom] = file.localSymbols.size(); 1093 file.localSymbols.push_back(sym); 1094 } 1095 } 1096 } 1097 1098 // Sort global symbol alphabetically, then add to symbol table. 1099 std::sort(globals.begin(), globals.end(), AtomSorter()); 1100 const uint32_t globalStartIndex = file.localSymbols.size(); 1101 for (AtomAndIndex &ai : globals) { 1102 Symbol sym; 1103 sym.name = ai.atom->name(); 1104 sym.type = N_SECT; 1105 sym.scope = ai.scope; 1106 sym.sect = ai.index; 1107 sym.desc = descBits(static_cast<const DefinedAtom*>(ai.atom)); 1108 sym.value = _atomToAddress[ai.atom]; 1109 _atomToSymbolIndex[ai.atom] = globalStartIndex + file.globalSymbols.size(); 1110 file.globalSymbols.push_back(sym); 1111 } 1112 1113 // Sort undefined symbol alphabetically, then add to symbol table. 1114 std::vector<AtomAndIndex> undefs; 1115 undefs.reserve(128); 1116 for (const UndefinedAtom *atom : atomFile.undefined()) { 1117 AtomAndIndex ai = { atom, 0, N_EXT }; 1118 undefs.push_back(ai); 1119 } 1120 for (const SharedLibraryAtom *atom : atomFile.sharedLibrary()) { 1121 AtomAndIndex ai = { atom, 0, N_EXT }; 1122 undefs.push_back(ai); 1123 } 1124 std::sort(undefs.begin(), undefs.end(), AtomSorter()); 1125 const uint32_t start = file.globalSymbols.size() + file.localSymbols.size(); 1126 for (AtomAndIndex &ai : undefs) { 1127 Symbol sym; 1128 uint16_t desc = 0; 1129 if (!rMode) { 1130 uint8_t ordinal = 0; 1131 if (!_ctx.useFlatNamespace()) 1132 ordinal = dylibOrdinal(dyn_cast<SharedLibraryAtom>(ai.atom)); 1133 llvm::MachO::SET_LIBRARY_ORDINAL(desc, ordinal); 1134 } 1135 sym.name = ai.atom->name(); 1136 sym.type = N_UNDF; 1137 sym.scope = ai.scope; 1138 sym.sect = 0; 1139 sym.desc = desc; 1140 sym.value = 0; 1141 _atomToSymbolIndex[ai.atom] = file.undefinedSymbols.size() + start; 1142 file.undefinedSymbols.push_back(sym); 1143 } 1144 1145 return llvm::Error::success(); 1146} 1147 1148const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) { 1149 for (const Reference *ref : *lpAtom) { 1150 if (_archHandler.isLazyPointer(*ref)) { 1151 return ref->target(); 1152 } 1153 } 1154 return nullptr; 1155} 1156 1157const Atom *Util::targetOfStub(const DefinedAtom *stubAtom) { 1158 for (const Reference *ref : *stubAtom) { 1159 if (const Atom *ta = ref->target()) { 1160 if (const DefinedAtom *lpAtom = dyn_cast<DefinedAtom>(ta)) { 1161 const Atom *target = targetOfLazyPointer(lpAtom); 1162 if (target) 1163 return target; 1164 } 1165 } 1166 } 1167 return nullptr; 1168} 1169 1170void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) { 1171 for (SectionInfo *si : _sectionInfos) { 1172 Section &normSect = file.sections[si->normalizedSectionIndex]; 1173 switch (si->type) { 1174 case llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS: 1175 for (const AtomInfo &info : si->atomsAndOffsets) { 1176 bool foundTarget = false; 1177 for (const Reference *ref : *info.atom) { 1178 const Atom *target = ref->target(); 1179 if (target) { 1180 if (isa<const SharedLibraryAtom>(target)) { 1181 uint32_t index = _atomToSymbolIndex[target]; 1182 normSect.indirectSymbols.push_back(index); 1183 foundTarget = true; 1184 } else { 1185 normSect.indirectSymbols.push_back( 1186 llvm::MachO::INDIRECT_SYMBOL_LOCAL); 1187 } 1188 } 1189 } 1190 if (!foundTarget) { 1191 normSect.indirectSymbols.push_back( 1192 llvm::MachO::INDIRECT_SYMBOL_ABS); 1193 } 1194 } 1195 break; 1196 case llvm::MachO::S_LAZY_SYMBOL_POINTERS: 1197 for (const AtomInfo &info : si->atomsAndOffsets) { 1198 const Atom *target = targetOfLazyPointer(info.atom); 1199 if (target) { 1200 uint32_t index = _atomToSymbolIndex[target]; 1201 normSect.indirectSymbols.push_back(index); 1202 } 1203 } 1204 break; 1205 case llvm::MachO::S_SYMBOL_STUBS: 1206 for (const AtomInfo &info : si->atomsAndOffsets) { 1207 const Atom *target = targetOfStub(info.atom); 1208 if (target) { 1209 uint32_t index = _atomToSymbolIndex[target]; 1210 normSect.indirectSymbols.push_back(index); 1211 } 1212 } 1213 break; 1214 default: 1215 break; 1216 } 1217 } 1218} 1219 1220void Util::addDependentDylibs(const lld::File &atomFile, 1221 NormalizedFile &nFile) { 1222 // Scan all imported symbols and build up list of dylibs they are from. 1223 int ordinal = 1; 1224 for (const auto *dylib : _ctx.allDylibs()) { 1225 DylibPathToInfo::iterator pos = _dylibInfo.find(dylib->installName()); 1226 if (pos == _dylibInfo.end()) { 1227 DylibInfo info; 1228 bool flatNamespaceAtom = dylib == _ctx.flatNamespaceFile(); 1229 1230 // If we're in -flat_namespace mode (or this atom came from the flat 1231 // namespace file under -undefined dynamic_lookup) then use the flat 1232 // lookup ordinal. 1233 if (flatNamespaceAtom || _ctx.useFlatNamespace()) 1234 info.ordinal = BIND_SPECIAL_DYLIB_FLAT_LOOKUP; 1235 else 1236 info.ordinal = ordinal++; 1237 info.hasWeak = false; 1238 info.hasNonWeak = !info.hasWeak; 1239 _dylibInfo[dylib->installName()] = info; 1240 1241 // Unless this was a flat_namespace atom, record the source dylib. 1242 if (!flatNamespaceAtom) { 1243 DependentDylib depInfo; 1244 depInfo.path = dylib->installName(); 1245 depInfo.kind = llvm::MachO::LC_LOAD_DYLIB; 1246 depInfo.currentVersion = _ctx.dylibCurrentVersion(dylib->path()); 1247 depInfo.compatVersion = _ctx.dylibCompatVersion(dylib->path()); 1248 nFile.dependentDylibs.push_back(depInfo); 1249 } 1250 } else { 1251 pos->second.hasWeak = false; 1252 pos->second.hasNonWeak = !pos->second.hasWeak; 1253 } 1254 } 1255 // Automatically weak link dylib in which all symbols are weak (canBeNull). 1256 for (DependentDylib &dep : nFile.dependentDylibs) { 1257 DylibInfo &info = _dylibInfo[dep.path]; 1258 if (info.hasWeak && !info.hasNonWeak) 1259 dep.kind = llvm::MachO::LC_LOAD_WEAK_DYLIB; 1260 else if (_ctx.isUpwardDylib(dep.path)) 1261 dep.kind = llvm::MachO::LC_LOAD_UPWARD_DYLIB; 1262 } 1263} 1264 1265int Util::dylibOrdinal(const SharedLibraryAtom *sa) { 1266 return _dylibInfo[sa->loadName()].ordinal; 1267} 1268 1269void Util::segIndexForSection(const SectionInfo *sect, uint8_t &segmentIndex, 1270 uint64_t &segmentStartAddr) { 1271 segmentIndex = 0; 1272 for (const SegmentInfo *seg : _segmentInfos) { 1273 if ((seg->address <= sect->address) 1274 && (seg->address+seg->size >= sect->address+sect->size)) { 1275 segmentStartAddr = seg->address; 1276 return; 1277 } 1278 ++segmentIndex; 1279 } 1280 llvm_unreachable("section not in any segment"); 1281} 1282 1283uint32_t Util::sectionIndexForAtom(const Atom *atom) { 1284 uint64_t address = _atomToAddress[atom]; 1285 for (const SectionInfo *si : _sectionInfos) { 1286 if ((si->address <= address) && (address < si->address+si->size)) 1287 return si->finalSectionIndex; 1288 } 1289 llvm_unreachable("atom not in any section"); 1290} 1291 1292void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) { 1293 if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT) 1294 return; 1295 1296 // Utility function for ArchHandler to find symbol index for an atom. 1297 auto symIndexForAtom = [&] (const Atom &atom) -> uint32_t { 1298 auto pos = _atomToSymbolIndex.find(&atom); 1299 assert(pos != _atomToSymbolIndex.end()); 1300 return pos->second; 1301 }; 1302 1303 // Utility function for ArchHandler to find section index for an atom. 1304 auto sectIndexForAtom = [&] (const Atom &atom) -> uint32_t { 1305 return sectionIndexForAtom(&atom); 1306 }; 1307 1308 // Utility function for ArchHandler to find address of atom in output file. 1309 auto addressForAtom = [&] (const Atom &atom) -> uint64_t { 1310 auto pos = _atomToAddress.find(&atom); 1311 assert(pos != _atomToAddress.end()); 1312 return pos->second; 1313 }; 1314 1315 for (SectionInfo *si : _sectionInfos) { 1316 Section &normSect = file.sections[si->normalizedSectionIndex]; 1317 for (const AtomInfo &info : si->atomsAndOffsets) { 1318 const DefinedAtom *atom = info.atom; 1319 for (const Reference *ref : *atom) { 1320 // Skip emitting relocs for sections which are always able to be 1321 // implicitly regenerated and where the relocation targets an address 1322 // which is defined. 1323 if (si->relocsToDefinedCanBeImplicit && isa<DefinedAtom>(ref->target())) 1324 continue; 1325 _archHandler.appendSectionRelocations(*atom, info.offsetInSection, *ref, 1326 symIndexForAtom, 1327 sectIndexForAtom, 1328 addressForAtom, 1329 normSect.relocations); 1330 } 1331 } 1332 } 1333} 1334 1335void Util::addFunctionStarts(const lld::File &, NormalizedFile &file) { 1336 if (!_ctx.generateFunctionStartsLoadCommand()) 1337 return; 1338 file.functionStarts.reserve(8192); 1339 // Delta compress function starts, starting with the mach header symbol. 1340 const uint64_t badAddress = ~0ULL; 1341 uint64_t addr = badAddress; 1342 for (SectionInfo *si : _sectionInfos) { 1343 for (const AtomInfo &info : si->atomsAndOffsets) { 1344 auto type = info.atom->contentType(); 1345 if (type == DefinedAtom::typeMachHeader) { 1346 addr = _atomToAddress[info.atom]; 1347 continue; 1348 } 1349 if (type != DefinedAtom::typeCode) 1350 continue; 1351 assert(addr != badAddress && "Missing mach header symbol"); 1352 // Skip atoms which have 0 size. This is so that LC_FUNCTION_STARTS 1353 // can't spill in to the next section. 1354 if (!info.atom->size()) 1355 continue; 1356 uint64_t nextAddr = _atomToAddress[info.atom]; 1357 if (_archHandler.isThumbFunction(*info.atom)) 1358 nextAddr |= 1; 1359 uint64_t delta = nextAddr - addr; 1360 if (delta) { 1361 ByteBuffer buffer; 1362 buffer.append_uleb128(delta); 1363 file.functionStarts.insert(file.functionStarts.end(), buffer.bytes(), 1364 buffer.bytes() + buffer.size()); 1365 } 1366 addr = nextAddr; 1367 } 1368 } 1369 1370 // Null terminate, and pad to pointer size for this arch. 1371 file.functionStarts.push_back(0); 1372 1373 auto size = file.functionStarts.size(); 1374 for (unsigned i = size, e = llvm::alignTo(size, _ctx.is64Bit() ? 8 : 4); 1375 i != e; ++i) 1376 file.functionStarts.push_back(0); 1377} 1378 1379void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) { 1380 if (!_ctx.generateDataInCodeLoadCommand()) 1381 return; 1382 for (SectionInfo *si : _sectionInfos) { 1383 for (const AtomInfo &info : si->atomsAndOffsets) { 1384 // Atoms that contain data-in-code have "transition" references 1385 // which mark a point where the embedded data starts of ends. 1386 // This needs to be converted to the mach-o format which is an array 1387 // of data-in-code ranges. 1388 uint32_t startOffset = 0; 1389 DataRegionType mode = DataRegionType(0); 1390 for (const Reference *ref : *info.atom) { 1391 if (ref->kindNamespace() != Reference::KindNamespace::mach_o) 1392 continue; 1393 if (_archHandler.isDataInCodeTransition(ref->kindValue())) { 1394 DataRegionType nextMode = (DataRegionType)ref->addend(); 1395 if (mode != nextMode) { 1396 if (mode != 0) { 1397 // Found end data range, so make range entry. 1398 DataInCode entry; 1399 entry.offset = si->address + info.offsetInSection + startOffset; 1400 entry.length = ref->offsetInAtom() - startOffset; 1401 entry.kind = mode; 1402 file.dataInCode.push_back(entry); 1403 } 1404 } 1405 mode = nextMode; 1406 startOffset = ref->offsetInAtom(); 1407 } 1408 } 1409 if (mode != 0) { 1410 // Function ends with data (no end transition). 1411 DataInCode entry; 1412 entry.offset = si->address + info.offsetInSection + startOffset; 1413 entry.length = info.atom->size() - startOffset; 1414 entry.kind = mode; 1415 file.dataInCode.push_back(entry); 1416 } 1417 } 1418 } 1419} 1420 1421void Util::addRebaseAndBindingInfo(const lld::File &atomFile, 1422 NormalizedFile &nFile) { 1423 if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT) 1424 return; 1425 1426 uint8_t segmentIndex; 1427 uint64_t segmentStartAddr; 1428 uint32_t offsetInBindInfo = 0; 1429 1430 for (SectionInfo *sect : _sectionInfos) { 1431 segIndexForSection(sect, segmentIndex, segmentStartAddr); 1432 for (const AtomInfo &info : sect->atomsAndOffsets) { 1433 const DefinedAtom *atom = info.atom; 1434 for (const Reference *ref : *atom) { 1435 uint64_t segmentOffset = _atomToAddress[atom] + ref->offsetInAtom() 1436 - segmentStartAddr; 1437 const Atom* targ = ref->target(); 1438 if (_archHandler.isPointer(*ref)) { 1439 // A pointer to a DefinedAtom requires rebasing. 1440 if (isa<DefinedAtom>(targ)) { 1441 RebaseLocation rebase; 1442 rebase.segIndex = segmentIndex; 1443 rebase.segOffset = segmentOffset; 1444 rebase.kind = llvm::MachO::REBASE_TYPE_POINTER; 1445 nFile.rebasingInfo.push_back(rebase); 1446 } 1447 // A pointer to an SharedLibraryAtom requires binding. 1448 if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) { 1449 BindLocation bind; 1450 bind.segIndex = segmentIndex; 1451 bind.segOffset = segmentOffset; 1452 bind.kind = llvm::MachO::BIND_TYPE_POINTER; 1453 bind.canBeNull = sa->canBeNullAtRuntime(); 1454 bind.ordinal = dylibOrdinal(sa); 1455 bind.symbolName = targ->name(); 1456 bind.addend = ref->addend(); 1457 nFile.bindingInfo.push_back(bind); 1458 } 1459 } 1460 else if (_archHandler.isLazyPointer(*ref)) { 1461 BindLocation bind; 1462 if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) { 1463 bind.ordinal = dylibOrdinal(sa); 1464 } else { 1465 bind.ordinal = llvm::MachO::BIND_SPECIAL_DYLIB_SELF; 1466 } 1467 bind.segIndex = segmentIndex; 1468 bind.segOffset = segmentOffset; 1469 bind.kind = llvm::MachO::BIND_TYPE_POINTER; 1470 bind.canBeNull = false; //sa->canBeNullAtRuntime(); 1471 bind.symbolName = targ->name(); 1472 bind.addend = ref->addend(); 1473 nFile.lazyBindingInfo.push_back(bind); 1474 1475 // Now that we know the segmentOffset and the ordinal attribute, 1476 // we can fix the helper's code 1477 1478 fixLazyReferenceImm(atom, offsetInBindInfo, nFile); 1479 1480 // 5 bytes for opcodes + variable sizes (target name + \0 and offset 1481 // encode's size) 1482 offsetInBindInfo += 1483 6 + targ->name().size() + llvm::getULEB128Size(bind.segOffset); 1484 if (bind.ordinal > BIND_IMMEDIATE_MASK) 1485 offsetInBindInfo += llvm::getULEB128Size(bind.ordinal); 1486 } 1487 } 1488 } 1489 } 1490} 1491 1492void Util::fixLazyReferenceImm(const DefinedAtom *atom, uint32_t offset, 1493 NormalizedFile &file) { 1494 for (const Reference *ref : *atom) { 1495 const DefinedAtom *da = dyn_cast<DefinedAtom>(ref->target()); 1496 if (da == nullptr) 1497 return; 1498 1499 const Reference *helperRef = nullptr; 1500 for (const Reference *hr : *da) { 1501 if (hr->kindValue() == _archHandler.lazyImmediateLocationKind()) { 1502 helperRef = hr; 1503 break; 1504 } 1505 } 1506 if (helperRef == nullptr) 1507 continue; 1508 1509 // TODO: maybe get the fixed atom content from _archHandler ? 1510 for (SectionInfo *sectInfo : _sectionInfos) { 1511 for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) { 1512 if (atomInfo.atom == helperRef->target()) { 1513 auto sectionContent = 1514 file.sections[sectInfo->normalizedSectionIndex].content; 1515 uint8_t *rawb = 1516 file.ownedAllocations.Allocate<uint8_t>(sectionContent.size()); 1517 llvm::MutableArrayRef<uint8_t> newContent{rawb, 1518 sectionContent.size()}; 1519 std::copy(sectionContent.begin(), sectionContent.end(), 1520 newContent.begin()); 1521 llvm::support::ulittle32_t *loc = 1522 reinterpret_cast<llvm::support::ulittle32_t *>( 1523 &newContent[atomInfo.offsetInSection + 1524 helperRef->offsetInAtom()]); 1525 *loc = offset; 1526 file.sections[sectInfo->normalizedSectionIndex].content = newContent; 1527 } 1528 } 1529 } 1530 } 1531} 1532 1533void Util::addExportInfo(const lld::File &atomFile, NormalizedFile &nFile) { 1534 if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT) 1535 return; 1536 1537 for (SectionInfo *sect : _sectionInfos) { 1538 for (const AtomInfo &info : sect->atomsAndOffsets) { 1539 const DefinedAtom *atom = info.atom; 1540 if (atom->scope() != Atom::scopeGlobal) 1541 continue; 1542 if (_ctx.exportRestrictMode()) { 1543 if (!_ctx.exportSymbolNamed(atom->name())) 1544 continue; 1545 } 1546 Export exprt; 1547 exprt.name = atom->name(); 1548 exprt.offset = _atomToAddress[atom] - _ctx.baseAddress(); 1549 exprt.kind = EXPORT_SYMBOL_FLAGS_KIND_REGULAR; 1550 if (atom->merge() == DefinedAtom::mergeAsWeak) 1551 exprt.flags = EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; 1552 else 1553 exprt.flags = 0; 1554 exprt.otherOffset = 0; 1555 exprt.otherName = StringRef(); 1556 nFile.exportInfo.push_back(exprt); 1557 } 1558 } 1559} 1560 1561uint32_t Util::fileFlags() { 1562 // FIXME: these need to determined at runtime. 1563 if (_ctx.outputMachOType() == MH_OBJECT) { 1564 return _subsectionsViaSymbols ? MH_SUBSECTIONS_VIA_SYMBOLS : 0; 1565 } else { 1566 uint32_t flags = MH_DYLDLINK; 1567 if (!_ctx.useFlatNamespace()) 1568 flags |= MH_TWOLEVEL | MH_NOUNDEFS; 1569 if ((_ctx.outputMachOType() == MH_EXECUTE) && _ctx.PIE()) 1570 flags |= MH_PIE; 1571 if (_hasTLVDescriptors) 1572 flags |= (MH_PIE | MH_HAS_TLV_DESCRIPTORS); 1573 return flags; 1574 } 1575} 1576 1577} // end anonymous namespace 1578 1579namespace lld { 1580namespace mach_o { 1581namespace normalized { 1582 1583/// Convert a set of Atoms into a normalized mach-o file. 1584llvm::Expected<std::unique_ptr<NormalizedFile>> 1585normalizedFromAtoms(const lld::File &atomFile, 1586 const MachOLinkingContext &context) { 1587 // The util object buffers info until the normalized file can be made. 1588 Util util(context); 1589 util.processDefinedAtoms(atomFile); 1590 util.organizeSections(); 1591 1592 std::unique_ptr<NormalizedFile> f(new NormalizedFile()); 1593 NormalizedFile &normFile = *f.get(); 1594 normFile.arch = context.arch(); 1595 normFile.fileType = context.outputMachOType(); 1596 normFile.flags = util.fileFlags(); 1597 normFile.stackSize = context.stackSize(); 1598 normFile.installName = context.installName(); 1599 normFile.currentVersion = context.currentVersion(); 1600 normFile.compatVersion = context.compatibilityVersion(); 1601 normFile.os = context.os(); 1602 1603 // If we are emitting an object file, then the min version is the maximum 1604 // of the min's of all the source files and the cmdline. 1605 if (normFile.fileType == llvm::MachO::MH_OBJECT) 1606 normFile.minOSverson = std::max(context.osMinVersion(), util.minVersion()); 1607 else 1608 normFile.minOSverson = context.osMinVersion(); 1609 1610 normFile.minOSVersionKind = util.minVersionCommandType(); 1611 1612 normFile.sdkVersion = context.sdkVersion(); 1613 normFile.sourceVersion = context.sourceVersion(); 1614 1615 if (context.generateVersionLoadCommand() && 1616 context.os() != MachOLinkingContext::OS::unknown) 1617 normFile.hasMinVersionLoadCommand = true; 1618 else if (normFile.fileType == llvm::MachO::MH_OBJECT && 1619 util.allSourceFilesHaveMinVersions() && 1620 ((normFile.os != MachOLinkingContext::OS::unknown) || 1621 util.minVersionCommandType())) { 1622 // If we emit an object file, then it should contain a min version load 1623 // command if all of the source files also contained min version commands. 1624 // Also, we either need to have a platform, or found a platform from the 1625 // source object files. 1626 normFile.hasMinVersionLoadCommand = true; 1627 } 1628 normFile.generateDataInCodeLoadCommand = 1629 context.generateDataInCodeLoadCommand(); 1630 normFile.pageSize = context.pageSize(); 1631 normFile.rpaths = context.rpaths(); 1632 util.addDependentDylibs(atomFile, normFile); 1633 util.copySegmentInfo(normFile); 1634 util.copySectionInfo(normFile); 1635 util.assignAddressesToSections(normFile); 1636 util.buildAtomToAddressMap(); 1637 if (auto err = util.synthesizeDebugNotes(normFile)) 1638 return std::move(err); 1639 util.updateSectionInfo(normFile); 1640 util.copySectionContent(normFile); 1641 if (auto ec = util.addSymbols(atomFile, normFile)) { 1642 return std::move(ec); 1643 } 1644 util.addIndirectSymbols(atomFile, normFile); 1645 util.addRebaseAndBindingInfo(atomFile, normFile); 1646 util.addExportInfo(atomFile, normFile); 1647 util.addSectionRelocs(atomFile, normFile); 1648 util.addFunctionStarts(atomFile, normFile); 1649 util.buildDataInCodeArray(atomFile, normFile); 1650 util.copyEntryPointAddress(normFile); 1651 1652 return std::move(f); 1653} 1654 1655} // namespace normalized 1656} // namespace mach_o 1657} // namespace lld 1658