1//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9/// 10/// \file Converts from in-memory normalized mach-o to in-memory Atoms. 11/// 12/// +------------+ 13/// | normalized | 14/// +------------+ 15/// | 16/// | 17/// v 18/// +-------+ 19/// | Atoms | 20/// +-------+ 21 22#include "ArchHandler.h" 23#include "Atoms.h" 24#include "File.h" 25#include "MachONormalizedFile.h" 26#include "MachONormalizedFileBinaryUtils.h" 27#include "lld/Common/LLVM.h" 28#include "lld/Core/Error.h" 29#include "llvm/BinaryFormat/Dwarf.h" 30#include "llvm/BinaryFormat/MachO.h" 31#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 32#include "llvm/Support/DataExtractor.h" 33#include "llvm/Support/Debug.h" 34#include "llvm/Support/Error.h" 35#include "llvm/Support/Format.h" 36#include "llvm/Support/LEB128.h" 37#include "llvm/Support/raw_ostream.h" 38 39using namespace llvm::MachO; 40using namespace lld::mach_o::normalized; 41 42#define DEBUG_TYPE "normalized-file-to-atoms" 43 44namespace lld { 45namespace mach_o { 46 47 48namespace { // anonymous 49 50 51#define ENTRY(seg, sect, type, atomType) \ 52 {seg, sect, type, DefinedAtom::atomType } 53 54struct MachORelocatableSectionToAtomType { 55 StringRef segmentName; 56 StringRef sectionName; 57 SectionType sectionType; 58 DefinedAtom::ContentType atomType; 59}; 60 61const MachORelocatableSectionToAtomType sectsToAtomType[] = { 62 ENTRY("__TEXT", "__text", S_REGULAR, typeCode), 63 ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), 64 ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), 65 ENTRY("", "", S_CSTRING_LITERALS, typeCString), 66 ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), 67 ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), 68 ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), 69 ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), 70 ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), 71 ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), 72 ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), 73 ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), 74 ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), 75 ENTRY("__DATA", "__data", S_REGULAR, typeData), 76 ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), 77 ENTRY("__DATA", "__const", S_REGULAR, typeConstData), 78 ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), 79 ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, 80 typeInitializerPtr), 81 ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, 82 typeTerminatorPtr), 83 ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, 84 typeGOT), 85 ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), 86 ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, 87 typeGOT), 88 ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), 89 ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, 90 typeThunkTLV), 91 ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData), 92 ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, 93 typeTLVInitialZeroFill), 94 ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo), 95 ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList), 96 ENTRY("", "", S_INTERPOSING, typeInterposingTuples), 97 ENTRY("__LD", "__compact_unwind", S_REGULAR, 98 typeCompactUnwindInfo), 99 ENTRY("", "", S_REGULAR, typeUnknown) 100}; 101#undef ENTRY 102 103 104/// Figures out ContentType of a mach-o section. 105DefinedAtom::ContentType atomTypeFromSection(const Section §ion, 106 bool &customSectionName) { 107 // First look for match of name and type. Empty names in table are wildcards. 108 customSectionName = false; 109 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; 110 p->atomType != DefinedAtom::typeUnknown; ++p) { 111 if (p->sectionType != section.type) 112 continue; 113 if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty()) 114 continue; 115 if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty()) 116 continue; 117 customSectionName = p->segmentName.empty() && p->sectionName.empty(); 118 return p->atomType; 119 } 120 // Look for code denoted by section attributes 121 if (section.attributes & S_ATTR_PURE_INSTRUCTIONS) 122 return DefinedAtom::typeCode; 123 124 return DefinedAtom::typeUnknown; 125} 126 127enum AtomizeModel { 128 atomizeAtSymbols, 129 atomizeFixedSize, 130 atomizePointerSize, 131 atomizeUTF8, 132 atomizeUTF16, 133 atomizeCFI, 134 atomizeCU, 135 atomizeCFString 136}; 137 138/// Returns info on how to atomize a section of the specified ContentType. 139void sectionParseInfo(DefinedAtom::ContentType atomType, 140 unsigned int &sizeMultiple, 141 DefinedAtom::Scope &scope, 142 DefinedAtom::Merge &merge, 143 AtomizeModel &atomizeModel) { 144 struct ParseInfo { 145 DefinedAtom::ContentType atomType; 146 unsigned int sizeMultiple; 147 DefinedAtom::Scope scope; 148 DefinedAtom::Merge merge; 149 AtomizeModel atomizeModel; 150 }; 151 152 #define ENTRY(type, size, scope, merge, model) \ 153 {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } 154 155 static const ParseInfo parseInfo[] = { 156 ENTRY(typeCode, 1, scopeGlobal, mergeNo, 157 atomizeAtSymbols), 158 ENTRY(typeData, 1, scopeGlobal, mergeNo, 159 atomizeAtSymbols), 160 ENTRY(typeConstData, 1, scopeGlobal, mergeNo, 161 atomizeAtSymbols), 162 ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, 163 atomizeAtSymbols), 164 ENTRY(typeConstant, 1, scopeGlobal, mergeNo, 165 atomizeAtSymbols), 166 ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, 167 atomizeUTF8), 168 ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, 169 atomizeUTF16), 170 ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, 171 atomizeCFI), 172 ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, 173 atomizeFixedSize), 174 ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, 175 atomizeFixedSize), 176 ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, 177 atomizeFixedSize), 178 ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, 179 atomizeCFString), 180 ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, 181 atomizePointerSize), 182 ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, 183 atomizePointerSize), 184 ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, 185 atomizeCU), 186 ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, 187 atomizePointerSize), 188 ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent, 189 atomizePointerSize), 190 ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, 191 atomizeAtSymbols) 192 }; 193 #undef ENTRY 194 const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); 195 for (int i=0; i < tableLen; ++i) { 196 if (parseInfo[i].atomType == atomType) { 197 sizeMultiple = parseInfo[i].sizeMultiple; 198 scope = parseInfo[i].scope; 199 merge = parseInfo[i].merge; 200 atomizeModel = parseInfo[i].atomizeModel; 201 return; 202 } 203 } 204 205 // Unknown type is atomized by symbols. 206 sizeMultiple = 1; 207 scope = DefinedAtom::scopeGlobal; 208 merge = DefinedAtom::mergeNo; 209 atomizeModel = atomizeAtSymbols; 210} 211 212 213Atom::Scope atomScope(uint8_t scope) { 214 switch (scope) { 215 case N_EXT: 216 return Atom::scopeGlobal; 217 case N_PEXT: 218 case N_PEXT | N_EXT: 219 return Atom::scopeLinkageUnit; 220 case 0: 221 return Atom::scopeTranslationUnit; 222 } 223 llvm_unreachable("unknown scope value!"); 224} 225 226void appendSymbolsInSection(const std::vector<Symbol> &inSymbols, 227 uint32_t sectionIndex, 228 SmallVector<const Symbol *, 64> &outSyms) { 229 for (const Symbol &sym : inSymbols) { 230 // Only look at definition symbols. 231 if ((sym.type & N_TYPE) != N_SECT) 232 continue; 233 if (sym.sect != sectionIndex) 234 continue; 235 outSyms.push_back(&sym); 236 } 237} 238 239void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, 240 MachOFile &file, uint64_t symbolAddr, StringRef symbolName, 241 uint16_t symbolDescFlags, Atom::Scope symbolScope, 242 uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { 243 // Mach-O symbol table does have size in it. Instead the size is the 244 // difference between this and the next symbol. 245 uint64_t size = nextSymbolAddr - symbolAddr; 246 uint64_t offset = symbolAddr - section.address; 247 bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; 248 if (isZeroFillSection(section.type)) { 249 file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, 250 noDeadStrip, copyRefs, §ion); 251 } else { 252 DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) 253 ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo; 254 bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); 255 if (atomType == DefinedAtom::typeUnknown) { 256 // Mach-O needs a segment and section name. Concatenate those two 257 // with a / separator (e.g. "seg/sect") to fit into the lld model 258 // of just a section name. 259 std::string segSectName = section.segmentName.str() 260 + "/" + section.sectionName.str(); 261 file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, 262 merge, thumb, noDeadStrip, offset, 263 size, segSectName, true, §ion); 264 } else { 265 if ((atomType == lld::DefinedAtom::typeCode) && 266 (symbolDescFlags & N_SYMBOL_RESOLVER)) { 267 atomType = lld::DefinedAtom::typeResolver; 268 } 269 file.addDefinedAtom(symbolName, symbolScope, atomType, merge, 270 offset, size, thumb, noDeadStrip, copyRefs, §ion); 271 } 272 } 273} 274 275llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, 276 const Section §ion, 277 const NormalizedFile &normalizedFile, 278 MachOFile &file, bool scatterable, 279 bool copyRefs) { 280 // Find section's index. 281 uint32_t sectIndex = 1; 282 for (auto § : normalizedFile.sections) { 283 if (§ == §ion) 284 break; 285 ++sectIndex; 286 } 287 288 // Find all symbols in this section. 289 SmallVector<const Symbol *, 64> symbols; 290 appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); 291 appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); 292 293 // Sort symbols. 294 std::sort(symbols.begin(), symbols.end(), 295 [](const Symbol *lhs, const Symbol *rhs) -> bool { 296 if (lhs == rhs) 297 return false; 298 // First by address. 299 uint64_t lhsAddr = lhs->value; 300 uint64_t rhsAddr = rhs->value; 301 if (lhsAddr != rhsAddr) 302 return lhsAddr < rhsAddr; 303 // If same address, one is an alias so sort by scope. 304 Atom::Scope lScope = atomScope(lhs->scope); 305 Atom::Scope rScope = atomScope(rhs->scope); 306 if (lScope != rScope) 307 return lScope < rScope; 308 // If same address and scope, see if one might be better as 309 // the alias. 310 bool lPrivate = (lhs->name.front() == 'l'); 311 bool rPrivate = (rhs->name.front() == 'l'); 312 if (lPrivate != rPrivate) 313 return lPrivate; 314 // If same address and scope, sort by name. 315 return lhs->name < rhs->name; 316 }); 317 318 // Debug logging of symbols. 319 // for (const Symbol *sym : symbols) 320 // llvm::errs() << " sym: " 321 // << llvm::format("0x%08llx ", (uint64_t)sym->value) 322 // << ", " << sym->name << "\n"; 323 324 // If section has no symbols and no content, there are no atoms. 325 if (symbols.empty() && section.content.empty()) 326 return llvm::Error::success(); 327 328 if (symbols.empty()) { 329 // Section has no symbols, put all content in one anonymous atom. 330 atomFromSymbol(atomType, section, file, section.address, StringRef(), 331 0, Atom::scopeTranslationUnit, 332 section.address + section.content.size(), 333 scatterable, copyRefs); 334 } 335 else if (symbols.front()->value != section.address) { 336 // Section has anonymous content before first symbol. 337 atomFromSymbol(atomType, section, file, section.address, StringRef(), 338 0, Atom::scopeTranslationUnit, symbols.front()->value, 339 scatterable, copyRefs); 340 } 341 342 const Symbol *lastSym = nullptr; 343 for (const Symbol *sym : symbols) { 344 if (lastSym != nullptr) { 345 // Ignore any assembler added "ltmpNNN" symbol at start of section 346 // if there is another symbol at the start. 347 if ((lastSym->value != sym->value) 348 || lastSym->value != section.address 349 || !lastSym->name.startswith("ltmp")) { 350 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, 351 lastSym->desc, atomScope(lastSym->scope), sym->value, 352 scatterable, copyRefs); 353 } 354 } 355 lastSym = sym; 356 } 357 if (lastSym != nullptr) { 358 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, 359 lastSym->desc, atomScope(lastSym->scope), 360 section.address + section.content.size(), 361 scatterable, copyRefs); 362 } 363 364 // If object built without .subsections_via_symbols, add reference chain. 365 if (!scatterable) { 366 MachODefinedAtom *prevAtom = nullptr; 367 file.eachAtomInSection(section, 368 [&](MachODefinedAtom *atom, uint64_t offset)->void { 369 if (prevAtom) 370 prevAtom->addReference(Reference::KindNamespace::all, 371 Reference::KindArch::all, 372 Reference::kindLayoutAfter, 0, atom, 0); 373 prevAtom = atom; 374 }); 375 } 376 377 return llvm::Error::success(); 378} 379 380llvm::Error processSection(DefinedAtom::ContentType atomType, 381 const Section §ion, 382 bool customSectionName, 383 const NormalizedFile &normalizedFile, 384 MachOFile &file, bool scatterable, 385 bool copyRefs) { 386 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); 387 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 388 389 // Get info on how to atomize section. 390 unsigned int sizeMultiple; 391 DefinedAtom::Scope scope; 392 DefinedAtom::Merge merge; 393 AtomizeModel atomizeModel; 394 sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); 395 396 // Validate section size. 397 if ((section.content.size() % sizeMultiple) != 0) 398 return llvm::make_error<GenericError>(Twine("Section ") 399 + section.segmentName 400 + "/" + section.sectionName 401 + " has size (" 402 + Twine(section.content.size()) 403 + ") which is not a multiple of " 404 + Twine(sizeMultiple)); 405 406 if (atomizeModel == atomizeAtSymbols) { 407 // Break section up into atoms each with a fixed size. 408 return processSymboledSection(atomType, section, normalizedFile, file, 409 scatterable, copyRefs); 410 } else { 411 unsigned int size; 412 for (unsigned int offset = 0, e = section.content.size(); offset != e;) { 413 switch (atomizeModel) { 414 case atomizeFixedSize: 415 // Break section up into atoms each with a fixed size. 416 size = sizeMultiple; 417 break; 418 case atomizePointerSize: 419 // Break section up into atoms each the size of a pointer. 420 size = is64 ? 8 : 4; 421 break; 422 case atomizeUTF8: 423 // Break section up into zero terminated c-strings. 424 size = 0; 425 for (unsigned int i = offset; i < e; ++i) { 426 if (section.content[i] == 0) { 427 size = i + 1 - offset; 428 break; 429 } 430 } 431 break; 432 case atomizeUTF16: 433 // Break section up into zero terminated UTF16 strings. 434 size = 0; 435 for (unsigned int i = offset; i < e; i += 2) { 436 if ((section.content[i] == 0) && (section.content[i + 1] == 0)) { 437 size = i + 2 - offset; 438 break; 439 } 440 } 441 break; 442 case atomizeCFI: 443 // Break section up into dwarf unwind CFIs (FDE or CIE). 444 size = read32(§ion.content[offset], isBig) + 4; 445 if (offset+size > section.content.size()) { 446 return llvm::make_error<GenericError>(Twine("Section ") 447 + section.segmentName 448 + "/" + section.sectionName 449 + " is malformed. Size of CFI " 450 "starting at offset (" 451 + Twine(offset) 452 + ") is past end of section."); 453 } 454 break; 455 case atomizeCU: 456 // Break section up into compact unwind entries. 457 size = is64 ? 32 : 20; 458 break; 459 case atomizeCFString: 460 // Break section up into NS/CFString objects. 461 size = is64 ? 32 : 16; 462 break; 463 case atomizeAtSymbols: 464 break; 465 } 466 if (size == 0) { 467 return llvm::make_error<GenericError>(Twine("Section ") 468 + section.segmentName 469 + "/" + section.sectionName 470 + " is malformed. The last atom " 471 "is not zero terminated."); 472 } 473 if (customSectionName) { 474 // Mach-O needs a segment and section name. Concatenate those two 475 // with a / separator (e.g. "seg/sect") to fit into the lld model 476 // of just a section name. 477 std::string segSectName = section.segmentName.str() 478 + "/" + section.sectionName.str(); 479 file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, 480 merge, false, false, offset, 481 size, segSectName, true, §ion); 482 } else { 483 file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, 484 false, false, copyRefs, §ion); 485 } 486 offset += size; 487 } 488 } 489 return llvm::Error::success(); 490} 491 492const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, 493 uint64_t address) { 494 for (const Section &s : normalizedFile.sections) { 495 uint64_t sAddr = s.address; 496 if ((sAddr <= address) && (address < sAddr+s.content.size())) { 497 return &s; 498 } 499 } 500 return nullptr; 501} 502 503const MachODefinedAtom * 504findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, 505 uint64_t addr, Reference::Addend &addend) { 506 const Section *sect = nullptr; 507 sect = findSectionCoveringAddress(normalizedFile, addr); 508 if (!sect) 509 return nullptr; 510 511 uint32_t offsetInTarget; 512 uint64_t offsetInSect = addr - sect->address; 513 auto atom = 514 file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); 515 addend = offsetInTarget; 516 return atom; 517} 518 519// Walks all relocations for a section in a normalized .o file and 520// creates corresponding lld::Reference objects. 521llvm::Error convertRelocs(const Section §ion, 522 const NormalizedFile &normalizedFile, 523 bool scatterable, 524 MachOFile &file, 525 ArchHandler &handler) { 526 // Utility function for ArchHandler to find atom by its address. 527 auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, 528 const lld::Atom **atom, Reference::Addend *addend) 529 -> llvm::Error { 530 if (sectIndex > normalizedFile.sections.size()) 531 return llvm::make_error<GenericError>(Twine("out of range section " 532 "index (") + Twine(sectIndex) + ")"); 533 const Section *sect = nullptr; 534 if (sectIndex == 0) { 535 sect = findSectionCoveringAddress(normalizedFile, addr); 536 if (!sect) 537 return llvm::make_error<GenericError>(Twine("address (" + Twine(addr) 538 + ") is not in any section")); 539 } else { 540 sect = &normalizedFile.sections[sectIndex-1]; 541 } 542 uint32_t offsetInTarget; 543 uint64_t offsetInSect = addr - sect->address; 544 *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); 545 *addend = offsetInTarget; 546 return llvm::Error::success(); 547 }; 548 549 // Utility function for ArchHandler to find atom by its symbol index. 550 auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) 551 -> llvm::Error { 552 // Find symbol from index. 553 const Symbol *sym = nullptr; 554 uint32_t numStabs = normalizedFile.stabsSymbols.size(); 555 uint32_t numLocal = normalizedFile.localSymbols.size(); 556 uint32_t numGlobal = normalizedFile.globalSymbols.size(); 557 uint32_t numUndef = normalizedFile.undefinedSymbols.size(); 558 assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?"); 559 if (symbolIndex < numStabs+numLocal) { 560 sym = &normalizedFile.localSymbols[symbolIndex-numStabs]; 561 } else if (symbolIndex < numStabs+numLocal+numGlobal) { 562 sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal]; 563 } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) { 564 sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal- 565 numGlobal]; 566 } else { 567 return llvm::make_error<GenericError>(Twine("symbol index (") 568 + Twine(symbolIndex) + ") out of range"); 569 } 570 571 // Find atom from symbol. 572 if ((sym->type & N_TYPE) == N_SECT) { 573 if (sym->sect > normalizedFile.sections.size()) 574 return llvm::make_error<GenericError>(Twine("symbol section index (") 575 + Twine(sym->sect) + ") out of range "); 576 const Section &symSection = normalizedFile.sections[sym->sect-1]; 577 uint64_t targetOffsetInSect = sym->value - symSection.address; 578 MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, 579 targetOffsetInSect); 580 if (target) { 581 *result = target; 582 return llvm::Error::success(); 583 } 584 return llvm::make_error<GenericError>("no atom found for defined symbol"); 585 } else if ((sym->type & N_TYPE) == N_UNDF) { 586 const lld::Atom *target = file.findUndefAtom(sym->name); 587 if (target) { 588 *result = target; 589 return llvm::Error::success(); 590 } 591 return llvm::make_error<GenericError>("no undefined atom found for sym"); 592 } else { 593 // Search undefs 594 return llvm::make_error<GenericError>("no atom found for symbol"); 595 } 596 }; 597 598 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 599 // Use old-school iterator so that paired relocations can be grouped. 600 for (auto it=section.relocations.begin(), e=section.relocations.end(); 601 it != e; ++it) { 602 const Relocation &reloc = *it; 603 // Find atom this relocation is in. 604 if (reloc.offset > section.content.size()) 605 return llvm::make_error<GenericError>( 606 Twine("r_address (") + Twine(reloc.offset) 607 + ") is larger than section size (" 608 + Twine(section.content.size()) + ")"); 609 uint32_t offsetInAtom; 610 MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, 611 reloc.offset, 612 &offsetInAtom); 613 assert(inAtom && "r_address in range, should have found atom"); 614 uint64_t fixupAddress = section.address + reloc.offset; 615 616 const lld::Atom *target = nullptr; 617 Reference::Addend addend = 0; 618 Reference::KindValue kind; 619 if (handler.isPairedReloc(reloc)) { 620 // Handle paired relocations together. 621 const Relocation &reloc2 = *++it; 622 auto relocErr = handler.getPairReferenceInfo( 623 reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, 624 atomByAddr, atomBySymbol, &kind, &target, &addend); 625 if (relocErr) { 626 return handleErrors(std::move(relocErr), 627 [&](std::unique_ptr<GenericError> GE) { 628 return llvm::make_error<GenericError>( 629 Twine("bad relocation (") + GE->getMessage() 630 + ") in section " 631 + section.segmentName + "/" + section.sectionName 632 + " (r1_address=" + Twine::utohexstr(reloc.offset) 633 + ", r1_type=" + Twine(reloc.type) 634 + ", r1_extern=" + Twine(reloc.isExtern) 635 + ", r1_length=" + Twine((int)reloc.length) 636 + ", r1_pcrel=" + Twine(reloc.pcRel) 637 + (!reloc.scattered ? (Twine(", r1_symbolnum=") 638 + Twine(reloc.symbol)) 639 : (Twine(", r1_scattered=1, r1_value=") 640 + Twine(reloc.value))) 641 + ")" 642 + ", (r2_address=" + Twine::utohexstr(reloc2.offset) 643 + ", r2_type=" + Twine(reloc2.type) 644 + ", r2_extern=" + Twine(reloc2.isExtern) 645 + ", r2_length=" + Twine((int)reloc2.length) 646 + ", r2_pcrel=" + Twine(reloc2.pcRel) 647 + (!reloc2.scattered ? (Twine(", r2_symbolnum=") 648 + Twine(reloc2.symbol)) 649 : (Twine(", r2_scattered=1, r2_value=") 650 + Twine(reloc2.value))) 651 + ")" ); 652 }); 653 } 654 } 655 else { 656 // Use ArchHandler to convert relocation record into information 657 // needed to instantiate an lld::Reference object. 658 auto relocErr = handler.getReferenceInfo( 659 reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, 660 atomBySymbol, &kind, &target, &addend); 661 if (relocErr) { 662 return handleErrors(std::move(relocErr), 663 [&](std::unique_ptr<GenericError> GE) { 664 return llvm::make_error<GenericError>( 665 Twine("bad relocation (") + GE->getMessage() 666 + ") in section " 667 + section.segmentName + "/" + section.sectionName 668 + " (r_address=" + Twine::utohexstr(reloc.offset) 669 + ", r_type=" + Twine(reloc.type) 670 + ", r_extern=" + Twine(reloc.isExtern) 671 + ", r_length=" + Twine((int)reloc.length) 672 + ", r_pcrel=" + Twine(reloc.pcRel) 673 + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) 674 : (Twine(", r_scattered=1, r_value=") 675 + Twine(reloc.value))) 676 + ")" ); 677 }); 678 } 679 } 680 // Instantiate an lld::Reference object and add to its atom. 681 inAtom->addReference(Reference::KindNamespace::mach_o, 682 handler.kindArch(), 683 kind, offsetInAtom, target, addend); 684 } 685 686 return llvm::Error::success(); 687} 688 689bool isDebugInfoSection(const Section §ion) { 690 if ((section.attributes & S_ATTR_DEBUG) == 0) 691 return false; 692 return section.segmentName.equals("__DWARF"); 693} 694 695static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) { 696 std::string strName = name.str(); 697 for (auto *atom : file.defined()) 698 if (atom->name() == strName) 699 return atom; 700 return nullptr; 701} 702 703static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) { 704 char *strCopy = alloc.Allocate<char>(str.size() + 1); 705 memcpy(strCopy, str.data(), str.size()); 706 strCopy[str.size()] = '\0'; 707 return strCopy; 708} 709 710llvm::Error parseStabs(MachOFile &file, 711 const NormalizedFile &normalizedFile, 712 bool copyRefs) { 713 714 if (normalizedFile.stabsSymbols.empty()) 715 return llvm::Error::success(); 716 717 // FIXME: Kill this off when we can move to sane yaml parsing. 718 std::unique_ptr<BumpPtrAllocator> allocator; 719 if (copyRefs) 720 allocator = std::make_unique<BumpPtrAllocator>(); 721 722 enum { start, inBeginEnd } state = start; 723 724 const Atom *currentAtom = nullptr; 725 uint64_t currentAtomAddress = 0; 726 StabsDebugInfo::StabsList stabsList; 727 for (const auto &stabSym : normalizedFile.stabsSymbols) { 728 Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc, 729 stabSym.value, stabSym.name); 730 switch (state) { 731 case start: 732 switch (static_cast<StabType>(stabSym.type)) { 733 case N_BNSYM: 734 state = inBeginEnd; 735 currentAtomAddress = stabSym.value; 736 Reference::Addend addend; 737 currentAtom = findAtomCoveringAddress(normalizedFile, file, 738 currentAtomAddress, addend); 739 if (addend != 0) 740 return llvm::make_error<GenericError>( 741 "Non-zero addend for BNSYM '" + stabSym.name + "' in " + 742 file.path()); 743 if (currentAtom) 744 stab.atom = currentAtom; 745 else { 746 // FIXME: ld64 just issues a warning here - should we match that? 747 return llvm::make_error<GenericError>( 748 "can't find atom for stabs BNSYM at " + 749 Twine::utohexstr(stabSym.value) + " in " + file.path()); 750 } 751 break; 752 case N_SO: 753 case N_OSO: 754 // Not associated with an atom, just copy. 755 if (copyRefs) 756 stab.str = copyDebugString(stabSym.name, *allocator); 757 else 758 stab.str = stabSym.name; 759 break; 760 case N_GSYM: { 761 auto colonIdx = stabSym.name.find(':'); 762 if (colonIdx != StringRef::npos) { 763 StringRef name = stabSym.name.substr(0, colonIdx); 764 currentAtom = findDefinedAtomByName(file, "_" + name); 765 stab.atom = currentAtom; 766 if (copyRefs) 767 stab.str = copyDebugString(stabSym.name, *allocator); 768 else 769 stab.str = stabSym.name; 770 } else { 771 currentAtom = findDefinedAtomByName(file, stabSym.name); 772 stab.atom = currentAtom; 773 if (copyRefs) 774 stab.str = copyDebugString(stabSym.name, *allocator); 775 else 776 stab.str = stabSym.name; 777 } 778 if (stab.atom == nullptr) 779 return llvm::make_error<GenericError>( 780 "can't find atom for N_GSYM stabs" + stabSym.name + 781 " in " + file.path()); 782 break; 783 } 784 case N_FUN: 785 return llvm::make_error<GenericError>( 786 "old-style N_FUN stab '" + stabSym.name + "' unsupported"); 787 default: 788 return llvm::make_error<GenericError>( 789 "unrecognized stab symbol '" + stabSym.name + "'"); 790 } 791 break; 792 case inBeginEnd: 793 stab.atom = currentAtom; 794 switch (static_cast<StabType>(stabSym.type)) { 795 case N_ENSYM: 796 state = start; 797 currentAtom = nullptr; 798 break; 799 case N_FUN: 800 // Just copy the string. 801 if (copyRefs) 802 stab.str = copyDebugString(stabSym.name, *allocator); 803 else 804 stab.str = stabSym.name; 805 break; 806 default: 807 return llvm::make_error<GenericError>( 808 "unrecognized stab symbol '" + stabSym.name + "'"); 809 } 810 } 811 llvm::dbgs() << "Adding to stabsList: " << stab << "\n"; 812 stabsList.push_back(stab); 813 } 814 815 file.setDebugInfo(std::make_unique<StabsDebugInfo>(std::move(stabsList))); 816 817 // FIXME: Kill this off when we fix YAML memory ownership. 818 file.debugInfo()->setAllocator(std::move(allocator)); 819 820 return llvm::Error::success(); 821} 822 823static llvm::DataExtractor 824dataExtractorFromSection(const NormalizedFile &normalizedFile, 825 const Section &S) { 826 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); 827 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 828 StringRef SecData(reinterpret_cast<const char*>(S.content.data()), 829 S.content.size()); 830 return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4); 831} 832 833// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE 834// inspection" code if possible. 835static uint64_t getCUAbbrevOffset(llvm::DataExtractor abbrevData, 836 uint64_t abbrCode) { 837 uint64_t curCode; 838 uint64_t offset = 0; 839 while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) { 840 // Tag 841 abbrevData.getULEB128(&offset); 842 // DW_CHILDREN 843 abbrevData.getU8(&offset); 844 // Attributes 845 while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset)) 846 ; 847 } 848 return offset; 849} 850 851// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE 852// inspection" code if possible. 853static Expected<const char *> 854getIndexedString(const NormalizedFile &normalizedFile, 855 llvm::dwarf::Form form, llvm::DataExtractor infoData, 856 uint64_t &infoOffset, const Section &stringsSection) { 857 if (form == llvm::dwarf::DW_FORM_string) 858 return infoData.getCStr(&infoOffset); 859 if (form != llvm::dwarf::DW_FORM_strp) 860 return llvm::make_error<GenericError>( 861 "string field encoded without DW_FORM_strp"); 862 uint64_t stringOffset = infoData.getU32(&infoOffset); 863 llvm::DataExtractor stringsData = 864 dataExtractorFromSection(normalizedFile, stringsSection); 865 return stringsData.getCStr(&stringOffset); 866} 867 868// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE 869// inspection" code if possible. 870static llvm::Expected<TranslationUnitSource> 871readCompUnit(const NormalizedFile &normalizedFile, 872 const Section &info, 873 const Section &abbrev, 874 const Section &strings, 875 StringRef path) { 876 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE 877 // inspection" code if possible. 878 uint64_t offset = 0; 879 llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32; 880 auto infoData = dataExtractorFromSection(normalizedFile, info); 881 uint32_t length = infoData.getU32(&offset); 882 if (length == llvm::dwarf::DW_LENGTH_DWARF64) { 883 Format = llvm::dwarf::DwarfFormat::DWARF64; 884 infoData.getU64(&offset); 885 } 886 else if (length >= llvm::dwarf::DW_LENGTH_lo_reserved) 887 return llvm::make_error<GenericError>("Malformed DWARF in " + path); 888 889 uint16_t version = infoData.getU16(&offset); 890 891 if (version < 2 || version > 4) 892 return llvm::make_error<GenericError>("Unsupported DWARF version in " + 893 path); 894 895 infoData.getU32(&offset); // Abbrev offset (should be zero) 896 uint8_t addrSize = infoData.getU8(&offset); 897 898 uint32_t abbrCode = infoData.getULEB128(&offset); 899 auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev); 900 uint64_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode); 901 uint64_t tag = abbrevData.getULEB128(&abbrevOffset); 902 if (tag != llvm::dwarf::DW_TAG_compile_unit) 903 return llvm::make_error<GenericError>("top level DIE is not a compile unit"); 904 // DW_CHILDREN 905 abbrevData.getU8(&abbrevOffset); 906 uint32_t name; 907 llvm::dwarf::Form form; 908 llvm::dwarf::FormParams formParams = {version, addrSize, Format}; 909 TranslationUnitSource tu; 910 while ((name = abbrevData.getULEB128(&abbrevOffset)) | 911 (form = static_cast<llvm::dwarf::Form>( 912 abbrevData.getULEB128(&abbrevOffset))) && 913 (name != 0 || form != 0)) { 914 switch (name) { 915 case llvm::dwarf::DW_AT_name: { 916 if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, 917 strings)) 918 tu.name = *eName; 919 else 920 return eName.takeError(); 921 break; 922 } 923 case llvm::dwarf::DW_AT_comp_dir: { 924 if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, 925 strings)) 926 tu.path = *eName; 927 else 928 return eName.takeError(); 929 break; 930 } 931 default: 932 llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams); 933 } 934 } 935 return tu; 936} 937 938llvm::Error parseDebugInfo(MachOFile &file, 939 const NormalizedFile &normalizedFile, bool copyRefs) { 940 941 // Find the interesting debug info sections. 942 const Section *debugInfo = nullptr; 943 const Section *debugAbbrev = nullptr; 944 const Section *debugStrings = nullptr; 945 946 for (auto &s : normalizedFile.sections) { 947 if (s.segmentName == "__DWARF") { 948 if (s.sectionName == "__debug_info") 949 debugInfo = &s; 950 else if (s.sectionName == "__debug_abbrev") 951 debugAbbrev = &s; 952 else if (s.sectionName == "__debug_str") 953 debugStrings = &s; 954 } 955 } 956 957 if (!debugInfo) 958 return parseStabs(file, normalizedFile, copyRefs); 959 960 if (debugInfo->content.size() == 0) 961 return llvm::Error::success(); 962 963 if (debugInfo->content.size() < 12) 964 return llvm::make_error<GenericError>("Malformed __debug_info section in " + 965 file.path() + ": too small"); 966 967 if (!debugAbbrev) 968 return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " + 969 file.path()); 970 971 if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev, 972 *debugStrings, file.path())) { 973 // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML 974 // memory ownership. 975 std::unique_ptr<BumpPtrAllocator> allocator; 976 if (copyRefs) { 977 allocator = std::make_unique<BumpPtrAllocator>(); 978 tuOrErr->name = copyDebugString(tuOrErr->name, *allocator); 979 tuOrErr->path = copyDebugString(tuOrErr->path, *allocator); 980 } 981 file.setDebugInfo(std::make_unique<DwarfDebugInfo>(std::move(*tuOrErr))); 982 if (copyRefs) 983 file.debugInfo()->setAllocator(std::move(allocator)); 984 } else 985 return tuOrErr.takeError(); 986 987 return llvm::Error::success(); 988} 989 990static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { 991 if (is64) 992 return read64(addr, isBig); 993 994 int32_t res = read32(addr, isBig); 995 return res; 996} 997 998/// --- Augmentation String Processing --- 999 1000struct CIEInfo { 1001 bool _augmentationDataPresent = false; 1002 bool _mayHaveEH = false; 1003 uint32_t _offsetOfLSDA = ~0U; 1004 uint32_t _offsetOfPersonality = ~0U; 1005 uint32_t _offsetOfFDEPointerEncoding = ~0U; 1006 uint32_t _augmentationDataLength = ~0U; 1007}; 1008 1009typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap; 1010 1011static llvm::Error processAugmentationString(const uint8_t *augStr, 1012 CIEInfo &cieInfo, 1013 unsigned &len) { 1014 1015 if (augStr[0] == '\0') { 1016 len = 1; 1017 return llvm::Error::success(); 1018 } 1019 1020 if (augStr[0] != 'z') 1021 return llvm::make_error<GenericError>("expected 'z' at start of " 1022 "augmentation string"); 1023 1024 cieInfo._augmentationDataPresent = true; 1025 uint64_t idx = 1; 1026 1027 uint32_t offsetInAugmentationData = 0; 1028 while (augStr[idx] != '\0') { 1029 if (augStr[idx] == 'L') { 1030 cieInfo._offsetOfLSDA = offsetInAugmentationData; 1031 // This adds a single byte to the augmentation data. 1032 ++offsetInAugmentationData; 1033 ++idx; 1034 continue; 1035 } 1036 if (augStr[idx] == 'P') { 1037 cieInfo._offsetOfPersonality = offsetInAugmentationData; 1038 // This adds a single byte to the augmentation data for the encoding, 1039 // then a number of bytes for the pointer data. 1040 // FIXME: We are assuming 4 is correct here for the pointer size as we 1041 // always currently use delta32ToGOT. 1042 offsetInAugmentationData += 5; 1043 ++idx; 1044 continue; 1045 } 1046 if (augStr[idx] == 'R') { 1047 cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData; 1048 // This adds a single byte to the augmentation data. 1049 ++offsetInAugmentationData; 1050 ++idx; 1051 continue; 1052 } 1053 if (augStr[idx] == 'e') { 1054 if (augStr[idx + 1] != 'h') 1055 return llvm::make_error<GenericError>("expected 'eh' in " 1056 "augmentation string"); 1057 cieInfo._mayHaveEH = true; 1058 idx += 2; 1059 continue; 1060 } 1061 ++idx; 1062 } 1063 1064 cieInfo._augmentationDataLength = offsetInAugmentationData; 1065 1066 len = idx + 1; 1067 return llvm::Error::success(); 1068} 1069 1070static llvm::Error processCIE(const NormalizedFile &normalizedFile, 1071 MachOFile &file, 1072 mach_o::ArchHandler &handler, 1073 const Section *ehFrameSection, 1074 MachODefinedAtom *atom, 1075 uint64_t offset, 1076 CIEInfoMap &cieInfos) { 1077 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 1078 const uint8_t *frameData = atom->rawContent().data(); 1079 1080 CIEInfo cieInfo; 1081 1082 uint32_t size = read32(frameData, isBig); 1083 uint64_t cieIDField = size == 0xffffffffU 1084 ? sizeof(uint32_t) + sizeof(uint64_t) 1085 : sizeof(uint32_t); 1086 uint64_t versionField = cieIDField + sizeof(uint32_t); 1087 uint64_t augmentationStringField = versionField + sizeof(uint8_t); 1088 1089 unsigned augmentationStringLength = 0; 1090 if (auto err = processAugmentationString(frameData + augmentationStringField, 1091 cieInfo, augmentationStringLength)) 1092 return err; 1093 1094 if (cieInfo._offsetOfPersonality != ~0U) { 1095 // If we have augmentation data for the personality function, then we may 1096 // need to implicitly generate its relocation. 1097 1098 // Parse the EH Data field which is pointer sized. 1099 uint64_t EHDataField = augmentationStringField + augmentationStringLength; 1100 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); 1101 unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0); 1102 1103 // Parse Code Align Factor which is a ULEB128. 1104 uint64_t CodeAlignField = EHDataField + EHDataFieldSize; 1105 unsigned lengthFieldSize = 0; 1106 llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize); 1107 1108 // Parse Data Align Factor which is a SLEB128. 1109 uint64_t DataAlignField = CodeAlignField + lengthFieldSize; 1110 llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize); 1111 1112 // Parse Return Address Register which is a byte. 1113 uint64_t ReturnAddressField = DataAlignField + lengthFieldSize; 1114 1115 // Parse the augmentation length which is a ULEB128. 1116 uint64_t AugmentationLengthField = ReturnAddressField + 1; 1117 uint64_t AugmentationLength = 1118 llvm::decodeULEB128(frameData + AugmentationLengthField, 1119 &lengthFieldSize); 1120 1121 if (AugmentationLength != cieInfo._augmentationDataLength) 1122 return llvm::make_error<GenericError>("CIE augmentation data length " 1123 "mismatch"); 1124 1125 // Get the start address of the augmentation data. 1126 uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize; 1127 1128 // Parse the personality function from the augmentation data. 1129 uint64_t PersonalityField = 1130 AugmentationDataField + cieInfo._offsetOfPersonality; 1131 1132 // Parse the personality encoding. 1133 // FIXME: Verify that this is a 32-bit pcrel offset. 1134 uint64_t PersonalityFunctionField = PersonalityField + 1; 1135 1136 if (atom->begin() != atom->end()) { 1137 // If we have an explicit relocation, then make sure it matches this 1138 // offset as this is where we'd expect it to be applied to. 1139 DefinedAtom::reference_iterator CurrentRef = atom->begin(); 1140 if (CurrentRef->offsetInAtom() != PersonalityFunctionField) 1141 return llvm::make_error<GenericError>("CIE personality reloc at " 1142 "wrong offset"); 1143 1144 if (++CurrentRef != atom->end()) 1145 return llvm::make_error<GenericError>("CIE contains too many relocs"); 1146 } else { 1147 // Implicitly generate the personality function reloc. It's assumed to 1148 // be a delta32 offset to a GOT entry. 1149 // FIXME: Parse the encoding and check this. 1150 int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig); 1151 uint64_t funcAddress = ehFrameSection->address + offset + 1152 PersonalityFunctionField; 1153 funcAddress += funcDelta; 1154 1155 const MachODefinedAtom *func = nullptr; 1156 Reference::Addend addend; 1157 func = findAtomCoveringAddress(normalizedFile, file, funcAddress, 1158 addend); 1159 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), 1160 handler.unwindRefToPersonalityFunctionKind(), 1161 PersonalityFunctionField, func, addend); 1162 } 1163 } else if (atom->begin() != atom->end()) { 1164 // Otherwise, we expect there to be no relocations in this atom as the only 1165 // relocation would have been to the personality function. 1166 return llvm::make_error<GenericError>("unexpected relocation in CIE"); 1167 } 1168 1169 1170 cieInfos[atom] = std::move(cieInfo); 1171 1172 return llvm::Error::success(); 1173} 1174 1175static llvm::Error processFDE(const NormalizedFile &normalizedFile, 1176 MachOFile &file, 1177 mach_o::ArchHandler &handler, 1178 const Section *ehFrameSection, 1179 MachODefinedAtom *atom, 1180 uint64_t offset, 1181 const CIEInfoMap &cieInfos) { 1182 1183 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 1184 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); 1185 1186 // Compiler wasn't lazy and actually told us what it meant. 1187 // Unfortunately, the compiler may not have generated references for all of 1188 // [cie, func, lsda] and so we still need to parse the FDE and add references 1189 // for any the compiler didn't generate. 1190 if (atom->begin() != atom->end()) 1191 atom->sortReferences(); 1192 1193 DefinedAtom::reference_iterator CurrentRef = atom->begin(); 1194 1195 // This helper returns the reference (if one exists) at the offset we are 1196 // currently processing. It automatically increments the ref iterator if we 1197 // do return a ref, and throws an error if we pass over a ref without 1198 // comsuming it. 1199 auto currentRefGetter = [&CurrentRef, 1200 &atom](uint64_t Offset)->const Reference* { 1201 // If there are no more refs found, then we are done. 1202 if (CurrentRef == atom->end()) 1203 return nullptr; 1204 1205 const Reference *Ref = *CurrentRef; 1206 1207 // If we haven't reached the offset for this reference, then return that 1208 // we don't yet have a reference to process. 1209 if (Offset < Ref->offsetInAtom()) 1210 return nullptr; 1211 1212 // If the offset is equal, then we want to process this ref. 1213 if (Offset == Ref->offsetInAtom()) { 1214 ++CurrentRef; 1215 return Ref; 1216 } 1217 1218 // The current ref is at an offset which is earlier than the current 1219 // offset, then we failed to consume it when we should have. In this case 1220 // throw an error. 1221 llvm::report_fatal_error("Skipped reference when processing FDE"); 1222 }; 1223 1224 // Helper to either get the reference at this current location, and verify 1225 // that it is of the expected type, or add a reference of that type. 1226 // Returns the reference target. 1227 auto verifyOrAddReference = [&](uint64_t targetAddress, 1228 Reference::KindValue refKind, 1229 uint64_t refAddress, 1230 bool allowsAddend)->const Atom* { 1231 if (auto *ref = currentRefGetter(refAddress)) { 1232 // The compiler already emitted a relocation for the CIE ref. This should 1233 // have been converted to the correct type of reference in 1234 // get[Pair]ReferenceInfo(). 1235 assert(ref->kindValue() == refKind && 1236 "Incorrect EHFrame reference kind"); 1237 return ref->target(); 1238 } 1239 Reference::Addend addend; 1240 auto *target = findAtomCoveringAddress(normalizedFile, file, 1241 targetAddress, addend); 1242 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), 1243 refKind, refAddress, target, addend); 1244 1245 if (!allowsAddend) 1246 assert(!addend && "EHFrame reference cannot have addend"); 1247 return target; 1248 }; 1249 1250 const uint8_t *startFrameData = atom->rawContent().data(); 1251 const uint8_t *frameData = startFrameData; 1252 1253 uint32_t size = read32(frameData, isBig); 1254 uint64_t cieFieldInFDE = size == 0xffffffffU 1255 ? sizeof(uint32_t) + sizeof(uint64_t) 1256 : sizeof(uint32_t); 1257 1258 // Linker needs to fixup a reference from the FDE to its parent CIE (a 1259 // 32-bit byte offset backwards in the __eh_frame section). 1260 uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); 1261 uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; 1262 cieAddress -= cieDelta; 1263 1264 auto *cieRefTarget = verifyOrAddReference(cieAddress, 1265 handler.unwindRefToCIEKind(), 1266 cieFieldInFDE, false); 1267 const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget); 1268 assert(cie && cie->contentType() == DefinedAtom::typeCFI && 1269 "FDE's CIE field does not point at the start of a CIE."); 1270 1271 const CIEInfo &cieInfo = cieInfos.find(cie)->second; 1272 1273 // Linker needs to fixup reference from the FDE to the function it's 1274 // describing. FIXME: there are actually different ways to do this, and the 1275 // particular method used is specified in the CIE's augmentation fields 1276 // (hopefully) 1277 uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); 1278 1279 int64_t functionFromFDE = readSPtr(is64, isBig, 1280 frameData + rangeFieldInFDE); 1281 uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; 1282 rangeStart += functionFromFDE; 1283 1284 verifyOrAddReference(rangeStart, 1285 handler.unwindRefToFunctionKind(), 1286 rangeFieldInFDE, true); 1287 1288 // Handle the augmentation data if there is any. 1289 if (cieInfo._augmentationDataPresent) { 1290 // First process the augmentation data length field. 1291 uint64_t augmentationDataLengthFieldInFDE = 1292 rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t)); 1293 unsigned lengthFieldSize = 0; 1294 uint64_t augmentationDataLength = 1295 llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE, 1296 &lengthFieldSize); 1297 1298 if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) { 1299 1300 // Look at the augmentation data field. 1301 uint64_t augmentationDataFieldInFDE = 1302 augmentationDataLengthFieldInFDE + lengthFieldSize; 1303 1304 int64_t lsdaFromFDE = readSPtr(is64, isBig, 1305 frameData + augmentationDataFieldInFDE); 1306 uint64_t lsdaStart = 1307 ehFrameSection->address + offset + augmentationDataFieldInFDE + 1308 lsdaFromFDE; 1309 1310 verifyOrAddReference(lsdaStart, 1311 handler.unwindRefToFunctionKind(), 1312 augmentationDataFieldInFDE, true); 1313 } 1314 } 1315 1316 return llvm::Error::success(); 1317} 1318 1319llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, 1320 MachOFile &file, 1321 mach_o::ArchHandler &handler) { 1322 1323 const Section *ehFrameSection = nullptr; 1324 for (auto §ion : normalizedFile.sections) 1325 if (section.segmentName == "__TEXT" && 1326 section.sectionName == "__eh_frame") { 1327 ehFrameSection = §ion; 1328 break; 1329 } 1330 1331 // No __eh_frame so nothing to do. 1332 if (!ehFrameSection) 1333 return llvm::Error::success(); 1334 1335 llvm::Error ehFrameErr = llvm::Error::success(); 1336 CIEInfoMap cieInfos; 1337 1338 file.eachAtomInSection(*ehFrameSection, 1339 [&](MachODefinedAtom *atom, uint64_t offset) -> void { 1340 assert(atom->contentType() == DefinedAtom::typeCFI); 1341 1342 // Bail out if we've encountered an error. 1343 if (ehFrameErr) 1344 return; 1345 1346 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 1347 if (ArchHandler::isDwarfCIE(isBig, atom)) 1348 ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection, 1349 atom, offset, cieInfos); 1350 else 1351 ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection, 1352 atom, offset, cieInfos); 1353 }); 1354 1355 return ehFrameErr; 1356} 1357 1358llvm::Error parseObjCImageInfo(const Section §, 1359 const NormalizedFile &normalizedFile, 1360 MachOFile &file) { 1361 1362 // struct objc_image_info { 1363 // uint32_t version; // initially 0 1364 // uint32_t flags; 1365 // }; 1366 1367 ArrayRef<uint8_t> content = sect.content; 1368 if (content.size() != 8) 1369 return llvm::make_error<GenericError>(sect.segmentName + "/" + 1370 sect.sectionName + 1371 " in file " + file.path() + 1372 " should be 8 bytes in size"); 1373 1374 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); 1375 uint32_t version = read32(content.data(), isBig); 1376 if (version) 1377 return llvm::make_error<GenericError>(sect.segmentName + "/" + 1378 sect.sectionName + 1379 " in file " + file.path() + 1380 " should have version=0"); 1381 1382 uint32_t flags = read32(content.data() + 4, isBig); 1383 if (flags & (MachOLinkingContext::objc_supports_gc | 1384 MachOLinkingContext::objc_gc_only)) 1385 return llvm::make_error<GenericError>(sect.segmentName + "/" + 1386 sect.sectionName + 1387 " in file " + file.path() + 1388 " uses GC. This is not supported"); 1389 1390 if (flags & MachOLinkingContext::objc_retainReleaseForSimulator) 1391 file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator); 1392 else 1393 file.setObjcConstraint(MachOLinkingContext::objc_retainRelease); 1394 1395 file.setSwiftVersion((flags >> 8) & 0xFF); 1396 1397 return llvm::Error::success(); 1398} 1399 1400/// Converts normalized mach-o file into an lld::File and lld::Atoms. 1401llvm::Expected<std::unique_ptr<lld::File>> 1402objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, 1403 bool copyRefs) { 1404 std::unique_ptr<MachOFile> file(new MachOFile(path)); 1405 if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs)) 1406 return std::move(ec); 1407 return std::unique_ptr<File>(std::move(file)); 1408} 1409 1410llvm::Expected<std::unique_ptr<lld::File>> 1411dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, 1412 bool copyRefs) { 1413 // Instantiate SharedLibraryFile object. 1414 std::unique_ptr<MachODylibFile> file(new MachODylibFile(path)); 1415 if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs)) 1416 return std::move(ec); 1417 return std::unique_ptr<File>(std::move(file)); 1418} 1419 1420} // anonymous namespace 1421 1422namespace normalized { 1423 1424static bool isObjCImageInfo(const Section §) { 1425 return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") || 1426 (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo"); 1427} 1428 1429llvm::Error 1430normalizedObjectToAtoms(MachOFile *file, 1431 const NormalizedFile &normalizedFile, 1432 bool copyRefs) { 1433 LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: " 1434 << file->path() << "\n"); 1435 bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); 1436 1437 // Create atoms from each section. 1438 for (auto § : normalizedFile.sections) { 1439 1440 // If this is a debug-info section parse it specially. 1441 if (isDebugInfoSection(sect)) 1442 continue; 1443 1444 // If the file contains an objc_image_info struct, then we should parse the 1445 // ObjC flags and Swift version. 1446 if (isObjCImageInfo(sect)) { 1447 if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file)) 1448 return ec; 1449 // We then skip adding atoms for this section as we use the ObjCPass to 1450 // re-emit this data after it has been aggregated for all files. 1451 continue; 1452 } 1453 1454 bool customSectionName; 1455 DefinedAtom::ContentType atomType = atomTypeFromSection(sect, 1456 customSectionName); 1457 if (auto ec = processSection(atomType, sect, customSectionName, 1458 normalizedFile, *file, scatterable, copyRefs)) 1459 return ec; 1460 } 1461 // Create atoms from undefined symbols. 1462 for (auto &sym : normalizedFile.undefinedSymbols) { 1463 // Undefined symbols with n_value != 0 are actually tentative definitions. 1464 if (sym.value == Hex64(0)) { 1465 file->addUndefinedAtom(sym.name, copyRefs); 1466 } else { 1467 file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, 1468 DefinedAtom::Alignment(1 << (sym.desc >> 8)), 1469 copyRefs); 1470 } 1471 } 1472 1473 // Convert mach-o relocations to References 1474 std::unique_ptr<mach_o::ArchHandler> handler 1475 = ArchHandler::create(normalizedFile.arch); 1476 for (auto § : normalizedFile.sections) { 1477 if (isDebugInfoSection(sect)) 1478 continue; 1479 if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable, 1480 *file, *handler)) 1481 return ec; 1482 } 1483 1484 // Add additional arch-specific References 1485 file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void { 1486 handler->addAdditionalReferences(*atom); 1487 }); 1488 1489 // Each __eh_frame section needs references to both __text (the function we're 1490 // providing unwind info for) and itself (FDE -> CIE). These aren't 1491 // represented in the relocations on some architectures, so we have to add 1492 // them back in manually there. 1493 if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler)) 1494 return ec; 1495 1496 // Process mach-o data-in-code regions array. That information is encoded in 1497 // atoms as References at each transition point. 1498 unsigned nextIndex = 0; 1499 for (const DataInCode &entry : normalizedFile.dataInCode) { 1500 ++nextIndex; 1501 const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); 1502 if (!s) { 1503 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address (" 1504 + Twine(entry.offset) 1505 + ") is not in any section")); 1506 } 1507 uint64_t offsetInSect = entry.offset - s->address; 1508 uint32_t offsetInAtom; 1509 MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, 1510 &offsetInAtom); 1511 if (offsetInAtom + entry.length > atom->size()) { 1512 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry " 1513 "(offset=" 1514 + Twine(entry.offset) 1515 + ", length=" 1516 + Twine(entry.length) 1517 + ") crosses atom boundary.")); 1518 } 1519 // Add reference that marks start of data-in-code. 1520 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), 1521 handler->dataInCodeTransitionStart(*atom), 1522 offsetInAtom, atom, entry.kind); 1523 1524 // Peek at next entry, if it starts where this one ends, skip ending ref. 1525 if (nextIndex < normalizedFile.dataInCode.size()) { 1526 const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; 1527 if (nextEntry.offset == (entry.offset + entry.length)) 1528 continue; 1529 } 1530 1531 // If data goes to end of function, skip ending ref. 1532 if ((offsetInAtom + entry.length) == atom->size()) 1533 continue; 1534 1535 // Add reference that marks end of data-in-code. 1536 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), 1537 handler->dataInCodeTransitionEnd(*atom), 1538 offsetInAtom+entry.length, atom, 0); 1539 } 1540 1541 // Cache some attributes on the file for use later. 1542 file->setFlags(normalizedFile.flags); 1543 file->setArch(normalizedFile.arch); 1544 file->setOS(normalizedFile.os); 1545 file->setMinVersion(normalizedFile.minOSverson); 1546 file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind); 1547 1548 // Sort references in each atom to their canonical order. 1549 for (const DefinedAtom* defAtom : file->defined()) { 1550 reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences(); 1551 } 1552 1553 if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs)) 1554 return err; 1555 1556 return llvm::Error::success(); 1557} 1558 1559llvm::Error 1560normalizedDylibToAtoms(MachODylibFile *file, 1561 const NormalizedFile &normalizedFile, 1562 bool copyRefs) { 1563 file->setInstallName(normalizedFile.installName); 1564 file->setCompatVersion(normalizedFile.compatVersion); 1565 file->setCurrentVersion(normalizedFile.currentVersion); 1566 1567 // Tell MachODylibFile object about all symbols it exports. 1568 if (!normalizedFile.exportInfo.empty()) { 1569 // If exports trie exists, use it instead of traditional symbol table. 1570 for (const Export &exp : normalizedFile.exportInfo) { 1571 bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); 1572 // StringRefs from export iterator are ephemeral, so force copy. 1573 file->addExportedSymbol(exp.name, weakDef, true); 1574 } 1575 } else { 1576 for (auto &sym : normalizedFile.globalSymbols) { 1577 assert((sym.scope & N_EXT) && "only expect external symbols here"); 1578 bool weakDef = (sym.desc & N_WEAK_DEF); 1579 file->addExportedSymbol(sym.name, weakDef, copyRefs); 1580 } 1581 } 1582 // Tell MachODylibFile object about all dylibs it re-exports. 1583 for (const DependentDylib &dep : normalizedFile.dependentDylibs) { 1584 if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) 1585 file->addReExportedDylib(dep.path); 1586 } 1587 return llvm::Error::success(); 1588} 1589 1590void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, 1591 StringRef &segmentName, 1592 StringRef §ionName, 1593 SectionType §ionType, 1594 SectionAttr §ionAttrs, 1595 bool &relocsToDefinedCanBeImplicit) { 1596 1597 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; 1598 p->atomType != DefinedAtom::typeUnknown; ++p) { 1599 if (p->atomType != atomType) 1600 continue; 1601 // Wild carded entries are ignored for reverse lookups. 1602 if (p->segmentName.empty() || p->sectionName.empty()) 1603 continue; 1604 segmentName = p->segmentName; 1605 sectionName = p->sectionName; 1606 sectionType = p->sectionType; 1607 sectionAttrs = 0; 1608 relocsToDefinedCanBeImplicit = false; 1609 if (atomType == DefinedAtom::typeCode) 1610 sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; 1611 if (atomType == DefinedAtom::typeCFI) 1612 relocsToDefinedCanBeImplicit = true; 1613 return; 1614 } 1615 llvm_unreachable("content type not yet supported"); 1616} 1617 1618llvm::Expected<std::unique_ptr<lld::File>> 1619normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, 1620 bool copyRefs) { 1621 switch (normalizedFile.fileType) { 1622 case MH_DYLIB: 1623 case MH_DYLIB_STUB: 1624 return dylibToAtoms(normalizedFile, path, copyRefs); 1625 case MH_OBJECT: 1626 return objectToAtoms(normalizedFile, path, copyRefs); 1627 default: 1628 llvm_unreachable("unhandled MachO file type!"); 1629 } 1630} 1631 1632} // namespace normalized 1633} // namespace mach_o 1634} // namespace lld 1635