RuntimeDyld.cpp revision 221337
1//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Implementation of the MC-JIT runtime dynamic linker. 11// 12//===----------------------------------------------------------------------===// 13 14#define DEBUG_TYPE "dyld" 15#include "llvm/ADT/OwningPtr.h" 16#include "llvm/ADT/SmallVector.h" 17#include "llvm/ADT/StringMap.h" 18#include "llvm/ADT/StringRef.h" 19#include "llvm/ADT/STLExtras.h" 20#include "llvm/ADT/Twine.h" 21#include "llvm/ExecutionEngine/RuntimeDyld.h" 22#include "llvm/Object/MachOObject.h" 23#include "llvm/Support/Debug.h" 24#include "llvm/Support/ErrorHandling.h" 25#include "llvm/Support/Format.h" 26#include "llvm/Support/Memory.h" 27#include "llvm/Support/MemoryBuffer.h" 28#include "llvm/Support/system_error.h" 29#include "llvm/Support/raw_ostream.h" 30using namespace llvm; 31using namespace llvm::object; 32 33// Empty out-of-line virtual destructor as the key function. 34RTDyldMemoryManager::~RTDyldMemoryManager() {} 35 36namespace llvm { 37class RuntimeDyldImpl { 38 unsigned CPUType; 39 unsigned CPUSubtype; 40 41 // The MemoryManager to load objects into. 42 RTDyldMemoryManager *MemMgr; 43 44 // FIXME: This all assumes we're dealing with external symbols for anything 45 // explicitly referenced. I.e., we can index by name and things 46 // will work out. In practice, this may not be the case, so we 47 // should find a way to effectively generalize. 48 49 // For each function, we have a MemoryBlock of it's instruction data. 50 StringMap<sys::MemoryBlock> Functions; 51 52 // Master symbol table. As modules are loaded and external symbols are 53 // resolved, their addresses are stored here. 54 StringMap<uint8_t*> SymbolTable; 55 56 // For each symbol, keep a list of relocations based on it. Anytime 57 // its address is reassigned (the JIT re-compiled the function, e.g.), 58 // the relocations get re-resolved. 59 struct RelocationEntry { 60 std::string Target; // Object this relocation is contained in. 61 uint64_t Offset; // Offset into the object for the relocation. 62 uint32_t Data; // Second word of the raw macho relocation entry. 63 int64_t Addend; // Addend encoded in the instruction itself, if any. 64 bool isResolved; // Has this relocation been resolved previously? 65 66 RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend) 67 : Target(t), Offset(offset), Data(data), Addend(addend), 68 isResolved(false) {} 69 }; 70 typedef SmallVector<RelocationEntry, 4> RelocationList; 71 StringMap<RelocationList> Relocations; 72 73 // FIXME: Also keep a map of all the relocations contained in an object. Use 74 // this to dynamically answer whether all of the relocations in it have 75 // been resolved or not. 76 77 bool HasError; 78 std::string ErrorStr; 79 80 // Set the error state and record an error string. 81 bool Error(const Twine &Msg) { 82 ErrorStr = Msg.str(); 83 HasError = true; 84 return true; 85 } 86 87 void extractFunction(StringRef Name, uint8_t *StartAddress, 88 uint8_t *EndAddress); 89 bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel, 90 unsigned Type, unsigned Size); 91 bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel, 92 unsigned Type, unsigned Size); 93 bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel, 94 unsigned Type, unsigned Size); 95 96 bool loadSegment32(const MachOObject *Obj, 97 const MachOObject::LoadCommandInfo *SegmentLCI, 98 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); 99 bool loadSegment64(const MachOObject *Obj, 100 const MachOObject::LoadCommandInfo *SegmentLCI, 101 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); 102 103public: 104 RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {} 105 106 bool loadObject(MemoryBuffer *InputBuffer); 107 108 void *getSymbolAddress(StringRef Name) { 109 // FIXME: Just look up as a function for now. Overly simple of course. 110 // Work in progress. 111 return SymbolTable.lookup(Name); 112 } 113 114 void resolveRelocations(); 115 116 void reassignSymbolAddress(StringRef Name, uint8_t *Addr); 117 118 // Is the linker in an error state? 119 bool hasError() { return HasError; } 120 121 // Mark the error condition as handled and continue. 122 void clearError() { HasError = false; } 123 124 // Get the error message. 125 StringRef getErrorString() { return ErrorStr; } 126}; 127 128void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress, 129 uint8_t *EndAddress) { 130 // Allocate memory for the function via the memory manager. 131 uintptr_t Size = EndAddress - StartAddress + 1; 132 uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), Size); 133 assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) && 134 "Memory manager failed to allocate enough memory!"); 135 // Copy the function payload into the memory block. 136 memcpy(Mem, StartAddress, EndAddress - StartAddress + 1); 137 MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size); 138 // Remember where we put it. 139 Functions[Name] = sys::MemoryBlock(Mem, Size); 140 // Default the assigned address for this symbol to wherever this 141 // allocated it. 142 SymbolTable[Name] = Mem; 143 DEBUG(dbgs() << " allocated to " << Mem << "\n"); 144} 145 146bool RuntimeDyldImpl:: 147resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel, 148 unsigned Type, unsigned Size) { 149 // This just dispatches to the proper target specific routine. 150 switch (CPUType) { 151 default: assert(0 && "Unsupported CPU type!"); 152 case mach::CTM_x86_64: 153 return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value, 154 isPCRel, Type, Size); 155 case mach::CTM_ARM: 156 return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value, 157 isPCRel, Type, Size); 158 } 159 llvm_unreachable(""); 160} 161 162bool RuntimeDyldImpl:: 163resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, 164 bool isPCRel, unsigned Type, 165 unsigned Size) { 166 // If the relocation is PC-relative, the value to be encoded is the 167 // pointer difference. 168 if (isPCRel) 169 // FIXME: It seems this value needs to be adjusted by 4 for an effective PC 170 // address. Is that expected? Only for branches, perhaps? 171 Value -= Address + 4; 172 173 switch(Type) { 174 default: 175 llvm_unreachable("Invalid relocation type!"); 176 case macho::RIT_X86_64_Unsigned: 177 case macho::RIT_X86_64_Branch: { 178 // Mask in the target value a byte at a time (we don't have an alignment 179 // guarantee for the target address, so this is safest). 180 uint8_t *p = (uint8_t*)Address; 181 for (unsigned i = 0; i < Size; ++i) { 182 *p++ = (uint8_t)Value; 183 Value >>= 8; 184 } 185 return false; 186 } 187 case macho::RIT_X86_64_Signed: 188 case macho::RIT_X86_64_GOTLoad: 189 case macho::RIT_X86_64_GOT: 190 case macho::RIT_X86_64_Subtractor: 191 case macho::RIT_X86_64_Signed1: 192 case macho::RIT_X86_64_Signed2: 193 case macho::RIT_X86_64_Signed4: 194 case macho::RIT_X86_64_TLV: 195 return Error("Relocation type not implemented yet!"); 196 } 197 return false; 198} 199 200bool RuntimeDyldImpl::resolveARMRelocation(uintptr_t Address, uintptr_t Value, 201 bool isPCRel, unsigned Type, 202 unsigned Size) { 203 // If the relocation is PC-relative, the value to be encoded is the 204 // pointer difference. 205 if (isPCRel) { 206 Value -= Address; 207 // ARM PCRel relocations have an effective-PC offset of two instructions 208 // (four bytes in Thumb mode, 8 bytes in ARM mode). 209 // FIXME: For now, assume ARM mode. 210 Value -= 8; 211 } 212 213 switch(Type) { 214 default: 215 llvm_unreachable("Invalid relocation type!"); 216 case macho::RIT_Vanilla: { 217 llvm_unreachable("Invalid relocation type!"); 218 // Mask in the target value a byte at a time (we don't have an alignment 219 // guarantee for the target address, so this is safest). 220 uint8_t *p = (uint8_t*)Address; 221 for (unsigned i = 0; i < Size; ++i) { 222 *p++ = (uint8_t)Value; 223 Value >>= 8; 224 } 225 break; 226 } 227 case macho::RIT_ARM_Branch24Bit: { 228 // Mask the value into the target address. We know instructions are 229 // 32-bit aligned, so we can do it all at once. 230 uint32_t *p = (uint32_t*)Address; 231 // The low two bits of the value are not encoded. 232 Value >>= 2; 233 // Mask the value to 24 bits. 234 Value &= 0xffffff; 235 // FIXME: If the destination is a Thumb function (and the instruction 236 // is a non-predicated BL instruction), we need to change it to a BLX 237 // instruction instead. 238 239 // Insert the value into the instruction. 240 *p = (*p & ~0xffffff) | Value; 241 break; 242 } 243 case macho::RIT_ARM_ThumbBranch22Bit: 244 case macho::RIT_ARM_ThumbBranch32Bit: 245 case macho::RIT_ARM_Half: 246 case macho::RIT_ARM_HalfDifference: 247 case macho::RIT_Pair: 248 case macho::RIT_Difference: 249 case macho::RIT_ARM_LocalDifference: 250 case macho::RIT_ARM_PreboundLazyPointer: 251 return Error("Relocation type not implemented yet!"); 252 } 253 return false; 254} 255 256bool RuntimeDyldImpl:: 257loadSegment32(const MachOObject *Obj, 258 const MachOObject::LoadCommandInfo *SegmentLCI, 259 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { 260 InMemoryStruct<macho::SegmentLoadCommand> SegmentLC; 261 Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC); 262 if (!SegmentLC) 263 return Error("unable to load segment load command"); 264 265 for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { 266 InMemoryStruct<macho::Section> Sect; 267 Obj->ReadSection(*SegmentLCI, SectNum, Sect); 268 if (!Sect) 269 return Error("unable to load section: '" + Twine(SectNum) + "'"); 270 271 // FIXME: Improve check. 272 if (Sect->Flags != 0x80000400) 273 return Error("unsupported section type!"); 274 275 // Address and names of symbols in the section. 276 typedef std::pair<uint64_t, StringRef> SymbolEntry; 277 SmallVector<SymbolEntry, 64> Symbols; 278 // Index of all the names, in this section or not. Used when we're 279 // dealing with relocation entries. 280 SmallVector<StringRef, 64> SymbolNames; 281 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { 282 InMemoryStruct<macho::SymbolTableEntry> STE; 283 Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE); 284 if (!STE) 285 return Error("unable to read symbol: '" + Twine(i) + "'"); 286 if (STE->SectionIndex > SegmentLC->NumSections) 287 return Error("invalid section index for symbol: '" + Twine(i) + "'"); 288 // Get the symbol name. 289 StringRef Name = Obj->getStringAtIndex(STE->StringIndex); 290 SymbolNames.push_back(Name); 291 292 // Just skip symbols not defined in this section. 293 if ((unsigned)STE->SectionIndex - 1 != SectNum) 294 continue; 295 296 // FIXME: Check the symbol type and flags. 297 if (STE->Type != 0xF) // external, defined in this section. 298 return Error("unexpected symbol type!"); 299 // Flags == 0x8 marks a thumb function for ARM, which is fine as it 300 // doesn't require any special handling here. 301 if (STE->Flags != 0x0 && STE->Flags != 0x8) 302 return Error("unexpected symbol type!"); 303 304 // Remember the symbol. 305 Symbols.push_back(SymbolEntry(STE->Value, Name)); 306 307 DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << 308 (Sect->Address + STE->Value) << "\n"); 309 } 310 // Sort the symbols by address, just in case they didn't come in that way. 311 array_pod_sort(Symbols.begin(), Symbols.end()); 312 313 // Extract the function data. 314 uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset, 315 SegmentLC->FileSize).data(); 316 for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { 317 uint64_t StartOffset = Sect->Address + Symbols[i].first; 318 uint64_t EndOffset = Symbols[i + 1].first - 1; 319 DEBUG(dbgs() << "Extracting function: " << Symbols[i].second 320 << " from [" << StartOffset << ", " << EndOffset << "]\n"); 321 extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); 322 } 323 // The last symbol we do after since the end address is calculated 324 // differently because there is no next symbol to reference. 325 uint64_t StartOffset = Symbols[Symbols.size() - 1].first; 326 uint64_t EndOffset = Sect->Size - 1; 327 DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second 328 << " from [" << StartOffset << ", " << EndOffset << "]\n"); 329 extractFunction(Symbols[Symbols.size()-1].second, 330 Base + StartOffset, Base + EndOffset); 331 332 // Now extract the relocation information for each function and process it. 333 for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { 334 InMemoryStruct<macho::RelocationEntry> RE; 335 Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); 336 if (RE->Word0 & macho::RF_Scattered) 337 return Error("NOT YET IMPLEMENTED: scattered relocations."); 338 // Word0 of the relocation is the offset into the section where the 339 // relocation should be applied. We need to translate that into an 340 // offset into a function since that's our atom. 341 uint32_t Offset = RE->Word0; 342 // Look for the function containing the address. This is used for JIT 343 // code, so the number of functions in section is almost always going 344 // to be very small (usually just one), so until we have use cases 345 // where that's not true, just use a trivial linear search. 346 unsigned SymbolNum; 347 unsigned NumSymbols = Symbols.size(); 348 assert(NumSymbols > 0 && Symbols[0].first <= Offset && 349 "No symbol containing relocation!"); 350 for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) 351 if (Symbols[SymbolNum + 1].first > Offset) 352 break; 353 // Adjust the offset to be relative to the symbol. 354 Offset -= Symbols[SymbolNum].first; 355 // Get the name of the symbol containing the relocation. 356 StringRef TargetName = SymbolNames[SymbolNum]; 357 358 bool isExtern = (RE->Word1 >> 27) & 1; 359 // Figure out the source symbol of the relocation. If isExtern is true, 360 // this relocation references the symbol table, otherwise it references 361 // a section in the same object, numbered from 1 through NumSections 362 // (SectionBases is [0, NumSections-1]). 363 // FIXME: Some targets (ARM) use internal relocations even for 364 // externally visible symbols, if the definition is in the same 365 // file as the reference. We need to convert those back to by-name 366 // references. We can resolve the address based on the section 367 // offset and see if we have a symbol at that address. If we do, 368 // use that; otherwise, puke. 369 if (!isExtern) 370 return Error("Internal relocations not supported."); 371 uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value 372 StringRef SourceName = SymbolNames[SourceNum]; 373 374 // FIXME: Get the relocation addend from the target address. 375 376 // Now store the relocation information. Associate it with the source 377 // symbol. 378 Relocations[SourceName].push_back(RelocationEntry(TargetName, 379 Offset, 380 RE->Word1, 381 0 /*Addend*/)); 382 DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset 383 << " from '" << SourceName << "(Word1: " 384 << format("0x%x", RE->Word1) << ")\n"); 385 } 386 } 387 return false; 388} 389 390 391bool RuntimeDyldImpl:: 392loadSegment64(const MachOObject *Obj, 393 const MachOObject::LoadCommandInfo *SegmentLCI, 394 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { 395 InMemoryStruct<macho::Segment64LoadCommand> Segment64LC; 396 Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC); 397 if (!Segment64LC) 398 return Error("unable to load segment load command"); 399 400 for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) { 401 InMemoryStruct<macho::Section64> Sect; 402 Obj->ReadSection64(*SegmentLCI, SectNum, Sect); 403 if (!Sect) 404 return Error("unable to load section: '" + Twine(SectNum) + "'"); 405 406 // FIXME: Improve check. 407 if (Sect->Flags != 0x80000400) 408 return Error("unsupported section type!"); 409 410 // Address and names of symbols in the section. 411 typedef std::pair<uint64_t, StringRef> SymbolEntry; 412 SmallVector<SymbolEntry, 64> Symbols; 413 // Index of all the names, in this section or not. Used when we're 414 // dealing with relocation entries. 415 SmallVector<StringRef, 64> SymbolNames; 416 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { 417 InMemoryStruct<macho::Symbol64TableEntry> STE; 418 Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE); 419 if (!STE) 420 return Error("unable to read symbol: '" + Twine(i) + "'"); 421 if (STE->SectionIndex > Segment64LC->NumSections) 422 return Error("invalid section index for symbol: '" + Twine(i) + "'"); 423 // Get the symbol name. 424 StringRef Name = Obj->getStringAtIndex(STE->StringIndex); 425 SymbolNames.push_back(Name); 426 427 // Just skip symbols not defined in this section. 428 if ((unsigned)STE->SectionIndex - 1 != SectNum) 429 continue; 430 431 // FIXME: Check the symbol type and flags. 432 if (STE->Type != 0xF) // external, defined in this section. 433 return Error("unexpected symbol type!"); 434 if (STE->Flags != 0x0) 435 return Error("unexpected symbol type!"); 436 437 // Remember the symbol. 438 Symbols.push_back(SymbolEntry(STE->Value, Name)); 439 440 DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << 441 (Sect->Address + STE->Value) << "\n"); 442 } 443 // Sort the symbols by address, just in case they didn't come in that way. 444 array_pod_sort(Symbols.begin(), Symbols.end()); 445 446 // Extract the function data. 447 uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset, 448 Segment64LC->FileSize).data(); 449 for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { 450 uint64_t StartOffset = Sect->Address + Symbols[i].first; 451 uint64_t EndOffset = Symbols[i + 1].first - 1; 452 DEBUG(dbgs() << "Extracting function: " << Symbols[i].second 453 << " from [" << StartOffset << ", " << EndOffset << "]\n"); 454 extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); 455 } 456 // The last symbol we do after since the end address is calculated 457 // differently because there is no next symbol to reference. 458 uint64_t StartOffset = Symbols[Symbols.size() - 1].first; 459 uint64_t EndOffset = Sect->Size - 1; 460 DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second 461 << " from [" << StartOffset << ", " << EndOffset << "]\n"); 462 extractFunction(Symbols[Symbols.size()-1].second, 463 Base + StartOffset, Base + EndOffset); 464 465 // Now extract the relocation information for each function and process it. 466 for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { 467 InMemoryStruct<macho::RelocationEntry> RE; 468 Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); 469 if (RE->Word0 & macho::RF_Scattered) 470 return Error("NOT YET IMPLEMENTED: scattered relocations."); 471 // Word0 of the relocation is the offset into the section where the 472 // relocation should be applied. We need to translate that into an 473 // offset into a function since that's our atom. 474 uint32_t Offset = RE->Word0; 475 // Look for the function containing the address. This is used for JIT 476 // code, so the number of functions in section is almost always going 477 // to be very small (usually just one), so until we have use cases 478 // where that's not true, just use a trivial linear search. 479 unsigned SymbolNum; 480 unsigned NumSymbols = Symbols.size(); 481 assert(NumSymbols > 0 && Symbols[0].first <= Offset && 482 "No symbol containing relocation!"); 483 for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) 484 if (Symbols[SymbolNum + 1].first > Offset) 485 break; 486 // Adjust the offset to be relative to the symbol. 487 Offset -= Symbols[SymbolNum].first; 488 // Get the name of the symbol containing the relocation. 489 StringRef TargetName = SymbolNames[SymbolNum]; 490 491 bool isExtern = (RE->Word1 >> 27) & 1; 492 // Figure out the source symbol of the relocation. If isExtern is true, 493 // this relocation references the symbol table, otherwise it references 494 // a section in the same object, numbered from 1 through NumSections 495 // (SectionBases is [0, NumSections-1]). 496 if (!isExtern) 497 return Error("Internal relocations not supported."); 498 uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value 499 StringRef SourceName = SymbolNames[SourceNum]; 500 501 // FIXME: Get the relocation addend from the target address. 502 503 // Now store the relocation information. Associate it with the source 504 // symbol. 505 Relocations[SourceName].push_back(RelocationEntry(TargetName, 506 Offset, 507 RE->Word1, 508 0 /*Addend*/)); 509 DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset 510 << " from '" << SourceName << "(Word1: " 511 << format("0x%x", RE->Word1) << ")\n"); 512 } 513 } 514 return false; 515} 516 517bool RuntimeDyldImpl::loadObject(MemoryBuffer *InputBuffer) { 518 // If the linker is in an error state, don't do anything. 519 if (hasError()) 520 return true; 521 // Load the Mach-O wrapper object. 522 std::string ErrorStr; 523 OwningPtr<MachOObject> Obj( 524 MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr)); 525 if (!Obj) 526 return Error("unable to load object: '" + ErrorStr + "'"); 527 528 // Get the CPU type information from the header. 529 const macho::Header &Header = Obj->getHeader(); 530 531 // FIXME: Error checking that the loaded object is compatible with 532 // the system we're running on. 533 CPUType = Header.CPUType; 534 CPUSubtype = Header.CPUSubtype; 535 536 // Validate that the load commands match what we expect. 537 const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0, 538 *DysymtabLCI = 0; 539 for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { 540 const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i); 541 switch (LCI.Command.Type) { 542 case macho::LCT_Segment: 543 case macho::LCT_Segment64: 544 if (SegmentLCI) 545 return Error("unexpected input object (multiple segments)"); 546 SegmentLCI = &LCI; 547 break; 548 case macho::LCT_Symtab: 549 if (SymtabLCI) 550 return Error("unexpected input object (multiple symbol tables)"); 551 SymtabLCI = &LCI; 552 break; 553 case macho::LCT_Dysymtab: 554 if (DysymtabLCI) 555 return Error("unexpected input object (multiple symbol tables)"); 556 DysymtabLCI = &LCI; 557 break; 558 default: 559 return Error("unexpected input object (unexpected load command"); 560 } 561 } 562 563 if (!SymtabLCI) 564 return Error("no symbol table found in object"); 565 if (!SegmentLCI) 566 return Error("no symbol table found in object"); 567 568 // Read and register the symbol table data. 569 InMemoryStruct<macho::SymtabLoadCommand> SymtabLC; 570 Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC); 571 if (!SymtabLC) 572 return Error("unable to load symbol table load command"); 573 Obj->RegisterStringTable(*SymtabLC); 574 575 // Read the dynamic link-edit information, if present (not present in static 576 // objects). 577 if (DysymtabLCI) { 578 InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC; 579 Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC); 580 if (!DysymtabLC) 581 return Error("unable to load dynamic link-exit load command"); 582 583 // FIXME: We don't support anything interesting yet. 584// if (DysymtabLC->LocalSymbolsIndex != 0) 585// return Error("NOT YET IMPLEMENTED: local symbol entries"); 586// if (DysymtabLC->ExternalSymbolsIndex != 0) 587// return Error("NOT YET IMPLEMENTED: non-external symbol entries"); 588// if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries) 589// return Error("NOT YET IMPLEMENTED: undefined symbol entries"); 590 } 591 592 // Load the segment load command. 593 if (SegmentLCI->Command.Type == macho::LCT_Segment) { 594 if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC)) 595 return true; 596 } else { 597 if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC)) 598 return true; 599 } 600 601 return false; 602} 603 604// Resolve the relocations for all symbols we currently know about. 605void RuntimeDyldImpl::resolveRelocations() { 606 // Just iterate over the symbols in our symbol table and assign their 607 // addresses. 608 StringMap<uint8_t*>::iterator i = SymbolTable.begin(); 609 StringMap<uint8_t*>::iterator e = SymbolTable.end(); 610 for (;i != e; ++i) 611 reassignSymbolAddress(i->getKey(), i->getValue()); 612} 613 614// Assign an address to a symbol name and resolve all the relocations 615// associated with it. 616void RuntimeDyldImpl::reassignSymbolAddress(StringRef Name, uint8_t *Addr) { 617 // Assign the address in our symbol table. 618 SymbolTable[Name] = Addr; 619 620 RelocationList &Relocs = Relocations[Name]; 621 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { 622 RelocationEntry &RE = Relocs[i]; 623 uint8_t *Target = SymbolTable[RE.Target] + RE.Offset; 624 bool isPCRel = (RE.Data >> 24) & 1; 625 unsigned Type = (RE.Data >> 28) & 0xf; 626 unsigned Size = 1 << ((RE.Data >> 25) & 3); 627 628 DEBUG(dbgs() << "Resolving relocation at '" << RE.Target 629 << "' + " << RE.Offset << " (" << format("%p", Target) << ")" 630 << " from '" << Name << " (" << format("%p", Addr) << ")" 631 << "(" << (isPCRel ? "pcrel" : "absolute") 632 << ", type: " << Type << ", Size: " << Size << ").\n"); 633 634 resolveRelocation(Target, Addr, isPCRel, Type, Size); 635 RE.isResolved = true; 636 } 637} 638 639//===----------------------------------------------------------------------===// 640// RuntimeDyld class implementation 641RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *MM) { 642 Dyld = new RuntimeDyldImpl(MM); 643} 644 645RuntimeDyld::~RuntimeDyld() { 646 delete Dyld; 647} 648 649bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) { 650 return Dyld->loadObject(InputBuffer); 651} 652 653void *RuntimeDyld::getSymbolAddress(StringRef Name) { 654 return Dyld->getSymbolAddress(Name); 655} 656 657void RuntimeDyld::resolveRelocations() { 658 Dyld->resolveRelocations(); 659} 660 661void RuntimeDyld::reassignSymbolAddress(StringRef Name, uint8_t *Addr) { 662 Dyld->reassignSymbolAddress(Name, Addr); 663} 664 665StringRef RuntimeDyld::getErrorString() { 666 return Dyld->getErrorString(); 667} 668 669} // end namespace llvm 670