MachONormalizedFileBinaryReader.cpp revision 303239
1//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===// 2// 3// The LLVM Linker 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10/// 11/// \file For mach-o object files, this implementation converts from 12/// mach-o on-disk binary format to in-memory normalized mach-o. 13/// 14/// +---------------+ 15/// | binary mach-o | 16/// +---------------+ 17/// | 18/// | 19/// v 20/// +------------+ 21/// | normalized | 22/// +------------+ 23 24#include "MachONormalizedFile.h" 25#include "ArchHandler.h" 26#include "MachONormalizedFileBinaryUtils.h" 27#include "lld/Core/Error.h" 28#include "lld/Core/LLVM.h" 29#include "lld/Core/SharedLibraryFile.h" 30#include "llvm/ADT/SmallString.h" 31#include "llvm/ADT/StringRef.h" 32#include "llvm/ADT/StringSwitch.h" 33#include "llvm/ADT/STLExtras.h" 34#include "llvm/ADT/Twine.h" 35#include "llvm/Object/MachO.h" 36#include "llvm/Support/Casting.h" 37#include "llvm/Support/Errc.h" 38#include "llvm/Support/ErrorHandling.h" 39#include "llvm/Support/FileOutputBuffer.h" 40#include "llvm/Support/Host.h" 41#include "llvm/Support/MachO.h" 42#include "llvm/Support/MemoryBuffer.h" 43#include "llvm/Support/raw_ostream.h" 44#include <functional> 45#include <system_error> 46 47using namespace llvm::MachO; 48using llvm::object::ExportEntry; 49using llvm::object::MachOObjectFile; 50 51namespace lld { 52namespace mach_o { 53namespace normalized { 54 55// Utility to call a lambda expression on each load command. 56static llvm::Error forEachLoadCommand( 57 StringRef lcRange, unsigned lcCount, bool isBig, bool is64, 58 std::function<bool(uint32_t cmd, uint32_t size, const char *lc)> func) { 59 const char* p = lcRange.begin(); 60 for (unsigned i=0; i < lcCount; ++i) { 61 const load_command *lc = reinterpret_cast<const load_command*>(p); 62 load_command lcCopy; 63 const load_command *slc = lc; 64 if (isBig != llvm::sys::IsBigEndianHost) { 65 memcpy(&lcCopy, lc, sizeof(load_command)); 66 swapStruct(lcCopy); 67 slc = &lcCopy; 68 } 69 if ( (p + slc->cmdsize) > lcRange.end() ) 70 return llvm::make_error<GenericError>("Load command exceeds range"); 71 72 if (func(slc->cmd, slc->cmdsize, p)) 73 return llvm::Error(); 74 75 p += slc->cmdsize; 76 } 77 78 return llvm::Error(); 79} 80 81static std::error_code appendRelocations(Relocations &relocs, StringRef buffer, 82 bool bigEndian, 83 uint32_t reloff, uint32_t nreloc) { 84 if ((reloff + nreloc*8) > buffer.size()) 85 return make_error_code(llvm::errc::executable_format_error); 86 const any_relocation_info* relocsArray = 87 reinterpret_cast<const any_relocation_info*>(buffer.begin()+reloff); 88 89 for(uint32_t i=0; i < nreloc; ++i) { 90 relocs.push_back(unpackRelocation(relocsArray[i], bigEndian)); 91 } 92 return std::error_code(); 93} 94 95static std::error_code 96appendIndirectSymbols(IndirectSymbols &isyms, StringRef buffer, bool isBig, 97 uint32_t istOffset, uint32_t istCount, 98 uint32_t startIndex, uint32_t count) { 99 if ((istOffset + istCount*4) > buffer.size()) 100 return make_error_code(llvm::errc::executable_format_error); 101 if (startIndex+count > istCount) 102 return make_error_code(llvm::errc::executable_format_error); 103 const uint8_t *indirectSymbolArray = (const uint8_t *)buffer.data(); 104 105 for(uint32_t i=0; i < count; ++i) { 106 isyms.push_back(read32( 107 indirectSymbolArray + (startIndex + i) * sizeof(uint32_t), isBig)); 108 } 109 return std::error_code(); 110} 111 112 113template <typename T> static T readBigEndian(T t) { 114 if (llvm::sys::IsLittleEndianHost) 115 llvm::sys::swapByteOrder(t); 116 return t; 117} 118 119 120static bool isMachOHeader(const mach_header *mh, bool &is64, bool &isBig) { 121 switch (read32(&mh->magic, false)) { 122 case llvm::MachO::MH_MAGIC: 123 is64 = false; 124 isBig = false; 125 return true; 126 case llvm::MachO::MH_MAGIC_64: 127 is64 = true; 128 isBig = false; 129 return true; 130 case llvm::MachO::MH_CIGAM: 131 is64 = false; 132 isBig = true; 133 return true; 134 case llvm::MachO::MH_CIGAM_64: 135 is64 = true; 136 isBig = true; 137 return true; 138 default: 139 return false; 140 } 141} 142 143 144bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch) { 145 // Try opening and mapping file at path. 146 ErrorOr<std::unique_ptr<MemoryBuffer>> b = MemoryBuffer::getFileOrSTDIN(path); 147 if (b.getError()) 148 return false; 149 150 // If file length < 32 it is too small to be mach-o object file. 151 StringRef fileBuffer = b->get()->getBuffer(); 152 if (fileBuffer.size() < 32) 153 return false; 154 155 // If file buffer does not start with MH_MAGIC (and variants), not obj file. 156 const mach_header *mh = reinterpret_cast<const mach_header *>( 157 fileBuffer.begin()); 158 bool is64, isBig; 159 if (!isMachOHeader(mh, is64, isBig)) 160 return false; 161 162 // If not MH_OBJECT, not object file. 163 if (read32(&mh->filetype, isBig) != MH_OBJECT) 164 return false; 165 166 // Lookup up arch from cpu/subtype pair. 167 arch = MachOLinkingContext::archFromCpuType( 168 read32(&mh->cputype, isBig), 169 read32(&mh->cpusubtype, isBig)); 170 return true; 171} 172 173bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch, 174 uint32_t &offset, uint32_t &size) { 175 const char *start = mb.getBufferStart(); 176 const llvm::MachO::fat_header *fh = 177 reinterpret_cast<const llvm::MachO::fat_header *>(start); 178 if (readBigEndian(fh->magic) != llvm::MachO::FAT_MAGIC) 179 return false; 180 uint32_t nfat_arch = readBigEndian(fh->nfat_arch); 181 const fat_arch *fstart = 182 reinterpret_cast<const fat_arch *>(start + sizeof(fat_header)); 183 const fat_arch *fend = 184 reinterpret_cast<const fat_arch *>(start + sizeof(fat_header) + 185 sizeof(fat_arch) * nfat_arch); 186 const uint32_t reqCpuType = MachOLinkingContext::cpuTypeFromArch(arch); 187 const uint32_t reqCpuSubtype = MachOLinkingContext::cpuSubtypeFromArch(arch); 188 for (const fat_arch *fa = fstart; fa < fend; ++fa) { 189 if ((readBigEndian(fa->cputype) == reqCpuType) && 190 (readBigEndian(fa->cpusubtype) == reqCpuSubtype)) { 191 offset = readBigEndian(fa->offset); 192 size = readBigEndian(fa->size); 193 if ((offset + size) > mb.getBufferSize()) 194 return false; 195 return true; 196 } 197 } 198 return false; 199} 200 201/// Reads a mach-o file and produces an in-memory normalized view. 202llvm::Expected<std::unique_ptr<NormalizedFile>> 203readBinary(std::unique_ptr<MemoryBuffer> &mb, 204 const MachOLinkingContext::Arch arch) { 205 // Make empty NormalizedFile. 206 std::unique_ptr<NormalizedFile> f(new NormalizedFile()); 207 208 const char *start = mb->getBufferStart(); 209 size_t objSize = mb->getBufferSize(); 210 const mach_header *mh = reinterpret_cast<const mach_header *>(start); 211 212 uint32_t sliceOffset; 213 uint32_t sliceSize; 214 if (sliceFromFatFile(mb->getMemBufferRef(), arch, sliceOffset, sliceSize)) { 215 start = &start[sliceOffset]; 216 objSize = sliceSize; 217 mh = reinterpret_cast<const mach_header *>(start); 218 } 219 220 // Determine endianness and pointer size for mach-o file. 221 bool is64, isBig; 222 if (!isMachOHeader(mh, is64, isBig)) 223 return llvm::make_error<GenericError>("File is not a mach-o"); 224 225 // Endian swap header, if needed. 226 mach_header headerCopy; 227 const mach_header *smh = mh; 228 if (isBig != llvm::sys::IsBigEndianHost) { 229 memcpy(&headerCopy, mh, sizeof(mach_header)); 230 swapStruct(headerCopy); 231 smh = &headerCopy; 232 } 233 234 // Validate head and load commands fit in buffer. 235 const uint32_t lcCount = smh->ncmds; 236 const char *lcStart = 237 start + (is64 ? sizeof(mach_header_64) : sizeof(mach_header)); 238 StringRef lcRange(lcStart, smh->sizeofcmds); 239 if (lcRange.end() > (start + objSize)) 240 return llvm::make_error<GenericError>("Load commands exceed file size"); 241 242 // Get architecture from mach_header. 243 f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype); 244 if (f->arch != arch) { 245 return llvm::make_error<GenericError>( 246 Twine("file is wrong architecture. Expected " 247 "(" + MachOLinkingContext::nameFromArch(arch) 248 + ") found (" 249 + MachOLinkingContext::nameFromArch(f->arch) 250 + ")" )); 251 } 252 // Copy file type and flags 253 f->fileType = HeaderFileType(smh->filetype); 254 f->flags = smh->flags; 255 256 257 // Pre-scan load commands looking for indirect symbol table. 258 uint32_t indirectSymbolTableOffset = 0; 259 uint32_t indirectSymbolTableCount = 0; 260 auto ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, 261 [&](uint32_t cmd, uint32_t size, 262 const char *lc) -> bool { 263 if (cmd == LC_DYSYMTAB) { 264 const dysymtab_command *d = reinterpret_cast<const dysymtab_command*>(lc); 265 indirectSymbolTableOffset = read32(&d->indirectsymoff, isBig); 266 indirectSymbolTableCount = read32(&d->nindirectsyms, isBig); 267 return true; 268 } 269 return false; 270 }); 271 if (ec) 272 return std::move(ec); 273 274 // Walk load commands looking for segments/sections and the symbol table. 275 const data_in_code_entry *dataInCode = nullptr; 276 const dyld_info_command *dyldInfo = nullptr; 277 uint32_t dataInCodeSize = 0; 278 ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, 279 [&] (uint32_t cmd, uint32_t size, const char* lc) -> bool { 280 switch(cmd) { 281 case LC_SEGMENT_64: 282 if (is64) { 283 const segment_command_64 *seg = 284 reinterpret_cast<const segment_command_64*>(lc); 285 const unsigned sectionCount = read32(&seg->nsects, isBig); 286 const section_64 *sects = reinterpret_cast<const section_64*> 287 (lc + sizeof(segment_command_64)); 288 const unsigned lcSize = sizeof(segment_command_64) 289 + sectionCount*sizeof(section_64); 290 // Verify sections don't extend beyond end of segment load command. 291 if (lcSize > size) 292 return true; 293 for (unsigned i=0; i < sectionCount; ++i) { 294 const section_64 *sect = §s[i]; 295 Section section; 296 section.segmentName = getString16(sect->segname); 297 section.sectionName = getString16(sect->sectname); 298 section.type = (SectionType)(read32(§->flags, isBig) & 299 SECTION_TYPE); 300 section.attributes = read32(§->flags, isBig) & SECTION_ATTRIBUTES; 301 section.alignment = 1 << read32(§->align, isBig); 302 section.address = read64(§->addr, isBig); 303 const uint8_t *content = 304 (const uint8_t *)start + read32(§->offset, isBig); 305 size_t contentSize = read64(§->size, isBig); 306 // Note: this assign() is copying the content bytes. Ideally, 307 // we can use a custom allocator for vector to avoid the copy. 308 section.content = llvm::makeArrayRef(content, contentSize); 309 appendRelocations(section.relocations, mb->getBuffer(), isBig, 310 read32(§->reloff, isBig), 311 read32(§->nreloc, isBig)); 312 if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { 313 appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(), 314 isBig, 315 indirectSymbolTableOffset, 316 indirectSymbolTableCount, 317 read32(§->reserved1, isBig), 318 contentSize/4); 319 } 320 f->sections.push_back(section); 321 } 322 } 323 break; 324 case LC_SEGMENT: 325 if (!is64) { 326 const segment_command *seg = 327 reinterpret_cast<const segment_command*>(lc); 328 const unsigned sectionCount = read32(&seg->nsects, isBig); 329 const section *sects = reinterpret_cast<const section*> 330 (lc + sizeof(segment_command)); 331 const unsigned lcSize = sizeof(segment_command) 332 + sectionCount*sizeof(section); 333 // Verify sections don't extend beyond end of segment load command. 334 if (lcSize > size) 335 return true; 336 for (unsigned i=0; i < sectionCount; ++i) { 337 const section *sect = §s[i]; 338 Section section; 339 section.segmentName = getString16(sect->segname); 340 section.sectionName = getString16(sect->sectname); 341 section.type = (SectionType)(read32(§->flags, isBig) & 342 SECTION_TYPE); 343 section.attributes = 344 read32((const uint8_t *)§->flags, isBig) & SECTION_ATTRIBUTES; 345 section.alignment = 1 << read32(§->align, isBig); 346 section.address = read32(§->addr, isBig); 347 const uint8_t *content = 348 (const uint8_t *)start + read32(§->offset, isBig); 349 size_t contentSize = read32(§->size, isBig); 350 // Note: this assign() is copying the content bytes. Ideally, 351 // we can use a custom allocator for vector to avoid the copy. 352 section.content = llvm::makeArrayRef(content, contentSize); 353 appendRelocations(section.relocations, mb->getBuffer(), isBig, 354 read32(§->reloff, isBig), 355 read32(§->nreloc, isBig)); 356 if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { 357 appendIndirectSymbols( 358 section.indirectSymbols, mb->getBuffer(), isBig, 359 indirectSymbolTableOffset, indirectSymbolTableCount, 360 read32(§->reserved1, isBig), contentSize / 4); 361 } 362 f->sections.push_back(section); 363 } 364 } 365 break; 366 case LC_SYMTAB: { 367 const symtab_command *st = reinterpret_cast<const symtab_command*>(lc); 368 const char *strings = start + read32(&st->stroff, isBig); 369 const uint32_t strSize = read32(&st->strsize, isBig); 370 // Validate string pool and symbol table all in buffer. 371 if (read32((const uint8_t *)&st->stroff, isBig) + 372 read32((const uint8_t *)&st->strsize, isBig) > 373 objSize) 374 return true; 375 if (is64) { 376 const uint32_t symOffset = read32(&st->symoff, isBig); 377 const uint32_t symCount = read32(&st->nsyms, isBig); 378 if ( symOffset+(symCount*sizeof(nlist_64)) > objSize) 379 return true; 380 const nlist_64 *symbols = 381 reinterpret_cast<const nlist_64 *>(start + symOffset); 382 // Convert each nlist_64 to a lld::mach_o::normalized::Symbol. 383 for(uint32_t i=0; i < symCount; ++i) { 384 nlist_64 tempSym; 385 memcpy(&tempSym, &symbols[i], sizeof(nlist_64)); 386 const nlist_64 *sin = &tempSym; 387 if (isBig != llvm::sys::IsBigEndianHost) 388 swapStruct(tempSym); 389 Symbol sout; 390 if (sin->n_strx > strSize) 391 return true; 392 sout.name = &strings[sin->n_strx]; 393 sout.type = (NListType)(sin->n_type & N_TYPE); 394 sout.scope = (sin->n_type & (N_PEXT|N_EXT)); 395 sout.sect = sin->n_sect; 396 sout.desc = sin->n_desc; 397 sout.value = sin->n_value; 398 if (sout.type == N_UNDF) 399 f->undefinedSymbols.push_back(sout); 400 else if (sin->n_type & N_EXT) 401 f->globalSymbols.push_back(sout); 402 else 403 f->localSymbols.push_back(sout); 404 } 405 } else { 406 const uint32_t symOffset = read32(&st->symoff, isBig); 407 const uint32_t symCount = read32(&st->nsyms, isBig); 408 if ( symOffset+(symCount*sizeof(nlist)) > objSize) 409 return true; 410 const nlist *symbols = 411 reinterpret_cast<const nlist *>(start + symOffset); 412 // Convert each nlist to a lld::mach_o::normalized::Symbol. 413 for(uint32_t i=0; i < symCount; ++i) { 414 const nlist *sin = &symbols[i]; 415 nlist tempSym; 416 if (isBig != llvm::sys::IsBigEndianHost) { 417 tempSym = *sin; swapStruct(tempSym); sin = &tempSym; 418 } 419 Symbol sout; 420 if (sin->n_strx > strSize) 421 return true; 422 sout.name = &strings[sin->n_strx]; 423 sout.type = (NListType)(sin->n_type & N_TYPE); 424 sout.scope = (sin->n_type & (N_PEXT|N_EXT)); 425 sout.sect = sin->n_sect; 426 sout.desc = sin->n_desc; 427 sout.value = sin->n_value; 428 if (sout.type == N_UNDF) 429 f->undefinedSymbols.push_back(sout); 430 else if (sout.scope == (SymbolScope)N_EXT) 431 f->globalSymbols.push_back(sout); 432 else 433 f->localSymbols.push_back(sout); 434 } 435 } 436 } 437 break; 438 case LC_ID_DYLIB: { 439 const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc); 440 f->installName = lc + read32(&dl->dylib.name, isBig); 441 f->currentVersion = read32(&dl->dylib.current_version, isBig); 442 f->compatVersion = read32(&dl->dylib.compatibility_version, isBig); 443 } 444 break; 445 case LC_DATA_IN_CODE: { 446 const linkedit_data_command *ldc = 447 reinterpret_cast<const linkedit_data_command*>(lc); 448 dataInCode = reinterpret_cast<const data_in_code_entry *>( 449 start + read32(&ldc->dataoff, isBig)); 450 dataInCodeSize = read32(&ldc->datasize, isBig); 451 } 452 break; 453 case LC_LOAD_DYLIB: 454 case LC_LOAD_WEAK_DYLIB: 455 case LC_REEXPORT_DYLIB: 456 case LC_LOAD_UPWARD_DYLIB: { 457 const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc); 458 DependentDylib entry; 459 entry.path = lc + read32(&dl->dylib.name, isBig); 460 entry.kind = LoadCommandType(cmd); 461 entry.compatVersion = read32(&dl->dylib.compatibility_version, isBig); 462 entry.currentVersion = read32(&dl->dylib.current_version, isBig); 463 f->dependentDylibs.push_back(entry); 464 } 465 break; 466 case LC_RPATH: { 467 const rpath_command *rpc = reinterpret_cast<const rpath_command *>(lc); 468 f->rpaths.push_back(lc + read32(&rpc->path, isBig)); 469 } 470 break; 471 case LC_DYLD_INFO: 472 case LC_DYLD_INFO_ONLY: 473 dyldInfo = reinterpret_cast<const dyld_info_command*>(lc); 474 break; 475 case LC_VERSION_MIN_MACOSX: 476 case LC_VERSION_MIN_IPHONEOS: 477 case LC_VERSION_MIN_WATCHOS: 478 case LC_VERSION_MIN_TVOS: 479 // If we are emitting an object file, then we may take the load command 480 // kind from these commands and pass it on to the output 481 // file. 482 f->minOSVersionKind = (LoadCommandType)cmd; 483 break; 484 } 485 return false; 486 }); 487 if (ec) 488 return std::move(ec); 489 490 if (dataInCode) { 491 // Convert on-disk data_in_code_entry array to DataInCode vector. 492 for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) { 493 DataInCode entry; 494 entry.offset = read32(&dataInCode[i].offset, isBig); 495 entry.length = read16(&dataInCode[i].length, isBig); 496 entry.kind = 497 (DataRegionType)read16((const uint8_t *)&dataInCode[i].kind, isBig); 498 f->dataInCode.push_back(entry); 499 } 500 } 501 502 if (dyldInfo) { 503 // If any exports, extract and add to normalized exportInfo vector. 504 if (dyldInfo->export_size) { 505 const uint8_t *trieStart = reinterpret_cast<const uint8_t*>(start + 506 dyldInfo->export_off); 507 ArrayRef<uint8_t> trie(trieStart, dyldInfo->export_size); 508 for (const ExportEntry &trieExport : MachOObjectFile::exports(trie)) { 509 Export normExport; 510 normExport.name = trieExport.name().copy(f->ownedAllocations); 511 normExport.offset = trieExport.address(); 512 normExport.kind = ExportSymbolKind(trieExport.flags() & EXPORT_SYMBOL_FLAGS_KIND_MASK); 513 normExport.flags = trieExport.flags() & ~EXPORT_SYMBOL_FLAGS_KIND_MASK; 514 normExport.otherOffset = trieExport.other(); 515 if (!trieExport.otherName().empty()) 516 normExport.otherName = trieExport.otherName().copy(f->ownedAllocations); 517 f->exportInfo.push_back(normExport); 518 } 519 } 520 } 521 522 return std::move(f); 523} 524 525class MachOObjectReader : public Reader { 526public: 527 MachOObjectReader(MachOLinkingContext &ctx) : _ctx(ctx) {} 528 529 bool canParse(file_magic magic, MemoryBufferRef mb) const override { 530 return (magic == llvm::sys::fs::file_magic::macho_object && 531 mb.getBufferSize() > 32); 532 } 533 534 ErrorOr<std::unique_ptr<File>> 535 loadFile(std::unique_ptr<MemoryBuffer> mb, 536 const Registry ®istry) const override { 537 std::unique_ptr<File> ret = 538 llvm::make_unique<MachOFile>(std::move(mb), &_ctx); 539 return std::move(ret); 540 } 541 542private: 543 MachOLinkingContext &_ctx; 544}; 545 546class MachODylibReader : public Reader { 547public: 548 MachODylibReader(MachOLinkingContext &ctx) : _ctx(ctx) {} 549 550 bool canParse(file_magic magic, MemoryBufferRef mb) const override { 551 switch (magic) { 552 case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib: 553 case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: 554 return mb.getBufferSize() > 32; 555 default: 556 return false; 557 } 558 } 559 560 ErrorOr<std::unique_ptr<File>> 561 loadFile(std::unique_ptr<MemoryBuffer> mb, 562 const Registry ®istry) const override { 563 std::unique_ptr<File> ret = 564 llvm::make_unique<MachODylibFile>(std::move(mb), &_ctx); 565 return std::move(ret); 566 } 567 568private: 569 MachOLinkingContext &_ctx; 570}; 571 572} // namespace normalized 573} // namespace mach_o 574 575void Registry::addSupportMachOObjects(MachOLinkingContext &ctx) { 576 MachOLinkingContext::Arch arch = ctx.arch(); 577 add(std::unique_ptr<Reader>(new mach_o::normalized::MachOObjectReader(ctx))); 578 add(std::unique_ptr<Reader>(new mach_o::normalized::MachODylibReader(ctx))); 579 addKindTable(Reference::KindNamespace::mach_o, ctx.archHandler().kindArch(), 580 ctx.archHandler().kindStrings()); 581 add(std::unique_ptr<YamlIOTaggedDocumentHandler>( 582 new mach_o::MachOYamlIOTaggedDocumentHandler(arch))); 583} 584 585 586} // namespace lld 587