MachObjectWriter.cpp revision 221345
1//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "llvm/MC/MCMachObjectWriter.h" 11#include "llvm/ADT/OwningPtr.h" 12#include "llvm/ADT/StringMap.h" 13#include "llvm/ADT/Twine.h" 14#include "llvm/MC/MCAssembler.h" 15#include "llvm/MC/MCAsmLayout.h" 16#include "llvm/MC/MCExpr.h" 17#include "llvm/MC/MCObjectWriter.h" 18#include "llvm/MC/MCSectionMachO.h" 19#include "llvm/MC/MCSymbol.h" 20#include "llvm/MC/MCMachOSymbolFlags.h" 21#include "llvm/MC/MCValue.h" 22#include "llvm/Object/MachOFormat.h" 23#include "llvm/Support/ErrorHandling.h" 24#include "llvm/Target/TargetAsmBackend.h" 25 26// FIXME: Gross. 27#include "../Target/ARM/ARMFixupKinds.h" 28#include "../Target/X86/X86FixupKinds.h" 29 30#include <vector> 31using namespace llvm; 32using namespace llvm::object; 33 34// FIXME: this has been copied from (or to) X86AsmBackend.cpp 35static unsigned getFixupKindLog2Size(unsigned Kind) { 36 switch (Kind) { 37 default: 38 llvm_unreachable("invalid fixup kind!"); 39 case FK_PCRel_1: 40 case FK_Data_1: return 0; 41 case FK_PCRel_2: 42 case FK_Data_2: return 1; 43 case FK_PCRel_4: 44 // FIXME: Remove these!!! 45 case X86::reloc_riprel_4byte: 46 case X86::reloc_riprel_4byte_movq_load: 47 case X86::reloc_signed_4byte: 48 case FK_Data_4: return 2; 49 case FK_Data_8: return 3; 50 } 51} 52 53static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { 54 // Undefined symbols are always extern. 55 if (SD->Symbol->isUndefined()) 56 return true; 57 58 // References to weak definitions require external relocation entries; the 59 // definition may not always be the one in the same object file. 60 if (SD->getFlags() & SF_WeakDefinition) 61 return true; 62 63 // Otherwise, we can use an internal relocation. 64 return false; 65} 66 67namespace { 68 69class MachObjectWriter : public MCObjectWriter { 70 /// MachSymbolData - Helper struct for containing some precomputed information 71 /// on symbols. 72 struct MachSymbolData { 73 MCSymbolData *SymbolData; 74 uint64_t StringIndex; 75 uint8_t SectionIndex; 76 77 // Support lexicographic sorting. 78 bool operator<(const MachSymbolData &RHS) const { 79 return SymbolData->getSymbol().getName() < 80 RHS.SymbolData->getSymbol().getName(); 81 } 82 }; 83 84 /// The target specific Mach-O writer instance. 85 llvm::OwningPtr<MCMachObjectTargetWriter> TargetObjectWriter; 86 87 /// @name Relocation Data 88 /// @{ 89 90 llvm::DenseMap<const MCSectionData*, 91 std::vector<macho::RelocationEntry> > Relocations; 92 llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase; 93 94 /// @} 95 /// @name Symbol Table Data 96 /// @{ 97 98 SmallString<256> StringTable; 99 std::vector<MachSymbolData> LocalSymbolData; 100 std::vector<MachSymbolData> ExternalSymbolData; 101 std::vector<MachSymbolData> UndefinedSymbolData; 102 103 /// @} 104 105private: 106 /// @name Utility Methods 107 /// @{ 108 109 bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { 110 const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( 111 (MCFixupKind) Kind); 112 113 return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; 114 } 115 116 /// @} 117 118 SectionAddrMap SectionAddress; 119 uint64_t getSectionAddress(const MCSectionData* SD) const { 120 return SectionAddress.lookup(SD); 121 } 122 uint64_t getSymbolAddress(const MCSymbolData* SD, 123 const MCAsmLayout &Layout) const { 124 const MCSymbol &S = SD->getSymbol(); 125 126 // If this is a variable, then recursively evaluate now. 127 if (S.isVariable()) { 128 MCValue Target; 129 if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout)) 130 report_fatal_error("unable to evaluate offset for variable '" + 131 S.getName() + "'"); 132 133 // Verify that any used symbols are defined. 134 if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) 135 report_fatal_error("unable to evaluate offset to undefined symbol '" + 136 Target.getSymA()->getSymbol().getName() + "'"); 137 if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) 138 report_fatal_error("unable to evaluate offset to undefined symbol '" + 139 Target.getSymB()->getSymbol().getName() + "'"); 140 141 uint64_t Address = Target.getConstant(); 142 if (Target.getSymA()) 143 Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( 144 Target.getSymA()->getSymbol()), Layout); 145 if (Target.getSymB()) 146 Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( 147 Target.getSymB()->getSymbol()), Layout); 148 return Address; 149 } 150 151 return getSectionAddress(SD->getFragment()->getParent()) + 152 Layout.getSymbolOffset(SD); 153 } 154 uint64_t getFragmentAddress(const MCFragment *Fragment, 155 const MCAsmLayout &Layout) const { 156 return getSectionAddress(Fragment->getParent()) + 157 Layout.getFragmentOffset(Fragment); 158 } 159 160 uint64_t getPaddingSize(const MCSectionData *SD, 161 const MCAsmLayout &Layout) const { 162 uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD); 163 unsigned Next = SD->getLayoutOrder() + 1; 164 if (Next >= Layout.getSectionOrder().size()) 165 return 0; 166 167 const MCSectionData &NextSD = *Layout.getSectionOrder()[Next]; 168 if (NextSD.getSection().isVirtualSection()) 169 return 0; 170 return OffsetToAlignment(EndAddr, NextSD.getAlignment()); 171 } 172 173public: 174 MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS, 175 bool _IsLittleEndian) 176 : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) { 177 } 178 179 /// @name Target Writer Proxy Accessors 180 /// @{ 181 182 bool is64Bit() const { return TargetObjectWriter->is64Bit(); } 183 bool isARM() const { 184 uint32_t CPUType = TargetObjectWriter->getCPUType() & ~mach::CTFM_ArchMask; 185 return CPUType == mach::CTM_ARM; 186 } 187 188 /// @} 189 190 void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, 191 bool SubsectionsViaSymbols) { 192 uint32_t Flags = 0; 193 194 if (SubsectionsViaSymbols) 195 Flags |= macho::HF_SubsectionsViaSymbols; 196 197 // struct mach_header (28 bytes) or 198 // struct mach_header_64 (32 bytes) 199 200 uint64_t Start = OS.tell(); 201 (void) Start; 202 203 Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32); 204 205 Write32(TargetObjectWriter->getCPUType()); 206 Write32(TargetObjectWriter->getCPUSubtype()); 207 208 Write32(macho::HFT_Object); 209 Write32(NumLoadCommands); 210 Write32(LoadCommandsSize); 211 Write32(Flags); 212 if (is64Bit()) 213 Write32(0); // reserved 214 215 assert(OS.tell() - Start == 216 (is64Bit() ? macho::Header64Size : macho::Header32Size)); 217 } 218 219 /// WriteSegmentLoadCommand - Write a segment load command. 220 /// 221 /// \arg NumSections - The number of sections in this segment. 222 /// \arg SectionDataSize - The total size of the sections. 223 void WriteSegmentLoadCommand(unsigned NumSections, 224 uint64_t VMSize, 225 uint64_t SectionDataStartOffset, 226 uint64_t SectionDataSize) { 227 // struct segment_command (56 bytes) or 228 // struct segment_command_64 (72 bytes) 229 230 uint64_t Start = OS.tell(); 231 (void) Start; 232 233 unsigned SegmentLoadCommandSize = 234 is64Bit() ? macho::SegmentLoadCommand64Size: 235 macho::SegmentLoadCommand32Size; 236 Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment); 237 Write32(SegmentLoadCommandSize + 238 NumSections * (is64Bit() ? macho::Section64Size : 239 macho::Section32Size)); 240 241 WriteBytes("", 16); 242 if (is64Bit()) { 243 Write64(0); // vmaddr 244 Write64(VMSize); // vmsize 245 Write64(SectionDataStartOffset); // file offset 246 Write64(SectionDataSize); // file size 247 } else { 248 Write32(0); // vmaddr 249 Write32(VMSize); // vmsize 250 Write32(SectionDataStartOffset); // file offset 251 Write32(SectionDataSize); // file size 252 } 253 Write32(0x7); // maxprot 254 Write32(0x7); // initprot 255 Write32(NumSections); 256 Write32(0); // flags 257 258 assert(OS.tell() - Start == SegmentLoadCommandSize); 259 } 260 261 void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout, 262 const MCSectionData &SD, uint64_t FileOffset, 263 uint64_t RelocationsStart, unsigned NumRelocations) { 264 uint64_t SectionSize = Layout.getSectionAddressSize(&SD); 265 266 // The offset is unused for virtual sections. 267 if (SD.getSection().isVirtualSection()) { 268 assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); 269 FileOffset = 0; 270 } 271 272 // struct section (68 bytes) or 273 // struct section_64 (80 bytes) 274 275 uint64_t Start = OS.tell(); 276 (void) Start; 277 278 const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection()); 279 WriteBytes(Section.getSectionName(), 16); 280 WriteBytes(Section.getSegmentName(), 16); 281 if (is64Bit()) { 282 Write64(getSectionAddress(&SD)); // address 283 Write64(SectionSize); // size 284 } else { 285 Write32(getSectionAddress(&SD)); // address 286 Write32(SectionSize); // size 287 } 288 Write32(FileOffset); 289 290 unsigned Flags = Section.getTypeAndAttributes(); 291 if (SD.hasInstructions()) 292 Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; 293 294 assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); 295 Write32(Log2_32(SD.getAlignment())); 296 Write32(NumRelocations ? RelocationsStart : 0); 297 Write32(NumRelocations); 298 Write32(Flags); 299 Write32(IndirectSymBase.lookup(&SD)); // reserved1 300 Write32(Section.getStubSize()); // reserved2 301 if (is64Bit()) 302 Write32(0); // reserved3 303 304 assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size : 305 macho::Section32Size)); 306 } 307 308 void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, 309 uint32_t StringTableOffset, 310 uint32_t StringTableSize) { 311 // struct symtab_command (24 bytes) 312 313 uint64_t Start = OS.tell(); 314 (void) Start; 315 316 Write32(macho::LCT_Symtab); 317 Write32(macho::SymtabLoadCommandSize); 318 Write32(SymbolOffset); 319 Write32(NumSymbols); 320 Write32(StringTableOffset); 321 Write32(StringTableSize); 322 323 assert(OS.tell() - Start == macho::SymtabLoadCommandSize); 324 } 325 326 void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, 327 uint32_t NumLocalSymbols, 328 uint32_t FirstExternalSymbol, 329 uint32_t NumExternalSymbols, 330 uint32_t FirstUndefinedSymbol, 331 uint32_t NumUndefinedSymbols, 332 uint32_t IndirectSymbolOffset, 333 uint32_t NumIndirectSymbols) { 334 // struct dysymtab_command (80 bytes) 335 336 uint64_t Start = OS.tell(); 337 (void) Start; 338 339 Write32(macho::LCT_Dysymtab); 340 Write32(macho::DysymtabLoadCommandSize); 341 Write32(FirstLocalSymbol); 342 Write32(NumLocalSymbols); 343 Write32(FirstExternalSymbol); 344 Write32(NumExternalSymbols); 345 Write32(FirstUndefinedSymbol); 346 Write32(NumUndefinedSymbols); 347 Write32(0); // tocoff 348 Write32(0); // ntoc 349 Write32(0); // modtaboff 350 Write32(0); // nmodtab 351 Write32(0); // extrefsymoff 352 Write32(0); // nextrefsyms 353 Write32(IndirectSymbolOffset); 354 Write32(NumIndirectSymbols); 355 Write32(0); // extreloff 356 Write32(0); // nextrel 357 Write32(0); // locreloff 358 Write32(0); // nlocrel 359 360 assert(OS.tell() - Start == macho::DysymtabLoadCommandSize); 361 } 362 363 void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) { 364 MCSymbolData &Data = *MSD.SymbolData; 365 const MCSymbol &Symbol = Data.getSymbol(); 366 uint8_t Type = 0; 367 uint16_t Flags = Data.getFlags(); 368 uint32_t Address = 0; 369 370 // Set the N_TYPE bits. See <mach-o/nlist.h>. 371 // 372 // FIXME: Are the prebound or indirect fields possible here? 373 if (Symbol.isUndefined()) 374 Type = macho::STT_Undefined; 375 else if (Symbol.isAbsolute()) 376 Type = macho::STT_Absolute; 377 else 378 Type = macho::STT_Section; 379 380 // FIXME: Set STAB bits. 381 382 if (Data.isPrivateExtern()) 383 Type |= macho::STF_PrivateExtern; 384 385 // Set external bit. 386 if (Data.isExternal() || Symbol.isUndefined()) 387 Type |= macho::STF_External; 388 389 // Compute the symbol address. 390 if (Symbol.isDefined()) { 391 if (Symbol.isAbsolute()) { 392 Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue(); 393 } else { 394 Address = getSymbolAddress(&Data, Layout); 395 } 396 } else if (Data.isCommon()) { 397 // Common symbols are encoded with the size in the address 398 // field, and their alignment in the flags. 399 Address = Data.getCommonSize(); 400 401 // Common alignment is packed into the 'desc' bits. 402 if (unsigned Align = Data.getCommonAlignment()) { 403 unsigned Log2Size = Log2_32(Align); 404 assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); 405 if (Log2Size > 15) 406 report_fatal_error("invalid 'common' alignment '" + 407 Twine(Align) + "'"); 408 // FIXME: Keep this mask with the SymbolFlags enumeration. 409 Flags = (Flags & 0xF0FF) | (Log2Size << 8); 410 } 411 } 412 413 // struct nlist (12 bytes) 414 415 Write32(MSD.StringIndex); 416 Write8(Type); 417 Write8(MSD.SectionIndex); 418 419 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' 420 // value. 421 Write16(Flags); 422 if (is64Bit()) 423 Write64(Address); 424 else 425 Write32(Address); 426 } 427 428 // FIXME: We really need to improve the relocation validation. Basically, we 429 // want to implement a separate computation which evaluates the relocation 430 // entry as the linker would, and verifies that the resultant fixup value is 431 // exactly what the encoder wanted. This will catch several classes of 432 // problems: 433 // 434 // - Relocation entry bugs, the two algorithms are unlikely to have the same 435 // exact bug. 436 // 437 // - Relaxation issues, where we forget to relax something. 438 // 439 // - Input errors, where something cannot be correctly encoded. 'as' allows 440 // these through in many cases. 441 442 static bool isFixupKindRIPRel(unsigned Kind) { 443 return Kind == X86::reloc_riprel_4byte || 444 Kind == X86::reloc_riprel_4byte_movq_load; 445 } 446 void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout, 447 const MCFragment *Fragment, 448 const MCFixup &Fixup, MCValue Target, 449 uint64_t &FixedValue) { 450 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 451 unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); 452 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); 453 454 // See <reloc.h>. 455 uint32_t FixupOffset = 456 Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); 457 uint32_t FixupAddress = 458 getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); 459 int64_t Value = 0; 460 unsigned Index = 0; 461 unsigned IsExtern = 0; 462 unsigned Type = 0; 463 464 Value = Target.getConstant(); 465 466 if (IsPCRel) { 467 // Compensate for the relocation offset, Darwin x86_64 relocations only 468 // have the addend and appear to have attempted to define it to be the 469 // actual expression addend without the PCrel bias. However, instructions 470 // with data following the relocation are not accommodated for (see comment 471 // below regarding SIGNED{1,2,4}), so it isn't exactly that either. 472 Value += 1LL << Log2Size; 473 } 474 475 if (Target.isAbsolute()) { // constant 476 // SymbolNum of 0 indicates the absolute section. 477 Type = macho::RIT_X86_64_Unsigned; 478 Index = 0; 479 480 // FIXME: I believe this is broken, I don't think the linker can 481 // understand it. I think it would require a local relocation, but I'm not 482 // sure if that would work either. The official way to get an absolute 483 // PCrel relocation is to use an absolute symbol (which we don't support 484 // yet). 485 if (IsPCRel) { 486 IsExtern = 1; 487 Type = macho::RIT_X86_64_Branch; 488 } 489 } else if (Target.getSymB()) { // A - B + constant 490 const MCSymbol *A = &Target.getSymA()->getSymbol(); 491 MCSymbolData &A_SD = Asm.getSymbolData(*A); 492 const MCSymbolData *A_Base = Asm.getAtom(&A_SD); 493 494 const MCSymbol *B = &Target.getSymB()->getSymbol(); 495 MCSymbolData &B_SD = Asm.getSymbolData(*B); 496 const MCSymbolData *B_Base = Asm.getAtom(&B_SD); 497 498 // Neither symbol can be modified. 499 if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || 500 Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) 501 report_fatal_error("unsupported relocation of modified symbol"); 502 503 // We don't support PCrel relocations of differences. Darwin 'as' doesn't 504 // implement most of these correctly. 505 if (IsPCRel) 506 report_fatal_error("unsupported pc-relative relocation of difference"); 507 508 // The support for the situation where one or both of the symbols would 509 // require a local relocation is handled just like if the symbols were 510 // external. This is certainly used in the case of debug sections where 511 // the section has only temporary symbols and thus the symbols don't have 512 // base symbols. This is encoded using the section ordinal and 513 // non-extern relocation entries. 514 515 // Darwin 'as' doesn't emit correct relocations for this (it ends up with 516 // a single SIGNED relocation); reject it for now. Except the case where 517 // both symbols don't have a base, equal but both NULL. 518 if (A_Base == B_Base && A_Base) 519 report_fatal_error("unsupported relocation with identical base"); 520 521 Value += getSymbolAddress(&A_SD, Layout) - 522 (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout)); 523 Value -= getSymbolAddress(&B_SD, Layout) - 524 (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout)); 525 526 if (A_Base) { 527 Index = A_Base->getIndex(); 528 IsExtern = 1; 529 } 530 else { 531 Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; 532 IsExtern = 0; 533 } 534 Type = macho::RIT_X86_64_Unsigned; 535 536 macho::RelocationEntry MRE; 537 MRE.Word0 = FixupOffset; 538 MRE.Word1 = ((Index << 0) | 539 (IsPCRel << 24) | 540 (Log2Size << 25) | 541 (IsExtern << 27) | 542 (Type << 28)); 543 Relocations[Fragment->getParent()].push_back(MRE); 544 545 if (B_Base) { 546 Index = B_Base->getIndex(); 547 IsExtern = 1; 548 } 549 else { 550 Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; 551 IsExtern = 0; 552 } 553 Type = macho::RIT_X86_64_Subtractor; 554 } else { 555 const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); 556 MCSymbolData &SD = Asm.getSymbolData(*Symbol); 557 const MCSymbolData *Base = Asm.getAtom(&SD); 558 559 // Relocations inside debug sections always use local relocations when 560 // possible. This seems to be done because the debugger doesn't fully 561 // understand x86_64 relocation entries, and expects to find values that 562 // have already been fixed up. 563 if (Symbol->isInSection()) { 564 const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( 565 Fragment->getParent()->getSection()); 566 if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) 567 Base = 0; 568 } 569 570 // x86_64 almost always uses external relocations, except when there is no 571 // symbol to use as a base address (a local symbol with no preceding 572 // non-local symbol). 573 if (Base) { 574 Index = Base->getIndex(); 575 IsExtern = 1; 576 577 // Add the local offset, if needed. 578 if (Base != &SD) 579 Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); 580 } else if (Symbol->isInSection() && !Symbol->isVariable()) { 581 // The index is the section ordinal (1-based). 582 Index = SD.getFragment()->getParent()->getOrdinal() + 1; 583 IsExtern = 0; 584 Value += getSymbolAddress(&SD, Layout); 585 586 if (IsPCRel) 587 Value -= FixupAddress + (1 << Log2Size); 588 } else if (Symbol->isVariable()) { 589 const MCExpr *Value = Symbol->getVariableValue(); 590 int64_t Res; 591 bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress); 592 if (isAbs) { 593 FixedValue = Res; 594 return; 595 } else { 596 report_fatal_error("unsupported relocation of variable '" + 597 Symbol->getName() + "'"); 598 } 599 } else { 600 report_fatal_error("unsupported relocation of undefined symbol '" + 601 Symbol->getName() + "'"); 602 } 603 604 MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); 605 if (IsPCRel) { 606 if (IsRIPRel) { 607 if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { 608 // x86_64 distinguishes movq foo@GOTPCREL so that the linker can 609 // rewrite the movq to an leaq at link time if the symbol ends up in 610 // the same linkage unit. 611 if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) 612 Type = macho::RIT_X86_64_GOTLoad; 613 else 614 Type = macho::RIT_X86_64_GOT; 615 } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { 616 Type = macho::RIT_X86_64_TLV; 617 } else if (Modifier != MCSymbolRefExpr::VK_None) { 618 report_fatal_error("unsupported symbol modifier in relocation"); 619 } else { 620 Type = macho::RIT_X86_64_Signed; 621 622 // The Darwin x86_64 relocation format has a problem where it cannot 623 // encode an address (L<foo> + <constant>) which is outside the atom 624 // containing L<foo>. Generally, this shouldn't occur but it does 625 // happen when we have a RIPrel instruction with data following the 626 // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel 627 // adjustment Darwin x86_64 uses, the offset is still negative and 628 // the linker has no way to recognize this. 629 // 630 // To work around this, Darwin uses several special relocation types 631 // to indicate the offsets. However, the specification or 632 // implementation of these seems to also be incomplete; they should 633 // adjust the addend as well based on the actual encoded instruction 634 // (the additional bias), but instead appear to just look at the 635 // final offset. 636 switch (-(Target.getConstant() + (1LL << Log2Size))) { 637 case 1: Type = macho::RIT_X86_64_Signed1; break; 638 case 2: Type = macho::RIT_X86_64_Signed2; break; 639 case 4: Type = macho::RIT_X86_64_Signed4; break; 640 } 641 } 642 } else { 643 if (Modifier != MCSymbolRefExpr::VK_None) 644 report_fatal_error("unsupported symbol modifier in branch " 645 "relocation"); 646 647 Type = macho::RIT_X86_64_Branch; 648 } 649 } else { 650 if (Modifier == MCSymbolRefExpr::VK_GOT) { 651 Type = macho::RIT_X86_64_GOT; 652 } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { 653 // GOTPCREL is allowed as a modifier on non-PCrel instructions, in 654 // which case all we do is set the PCrel bit in the relocation entry; 655 // this is used with exception handling, for example. The source is 656 // required to include any necessary offset directly. 657 Type = macho::RIT_X86_64_GOT; 658 IsPCRel = 1; 659 } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { 660 report_fatal_error("TLVP symbol modifier should have been rip-rel"); 661 } else if (Modifier != MCSymbolRefExpr::VK_None) 662 report_fatal_error("unsupported symbol modifier in relocation"); 663 else 664 Type = macho::RIT_X86_64_Unsigned; 665 } 666 } 667 668 // x86_64 always writes custom values into the fixups. 669 FixedValue = Value; 670 671 // struct relocation_info (8 bytes) 672 macho::RelocationEntry MRE; 673 MRE.Word0 = FixupOffset; 674 MRE.Word1 = ((Index << 0) | 675 (IsPCRel << 24) | 676 (Log2Size << 25) | 677 (IsExtern << 27) | 678 (Type << 28)); 679 Relocations[Fragment->getParent()].push_back(MRE); 680 } 681 682 void RecordScatteredRelocation(const MCAssembler &Asm, 683 const MCAsmLayout &Layout, 684 const MCFragment *Fragment, 685 const MCFixup &Fixup, MCValue Target, 686 unsigned Log2Size, 687 uint64_t &FixedValue) { 688 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 689 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 690 unsigned Type = macho::RIT_Vanilla; 691 692 // See <reloc.h>. 693 const MCSymbol *A = &Target.getSymA()->getSymbol(); 694 MCSymbolData *A_SD = &Asm.getSymbolData(*A); 695 696 if (!A_SD->getFragment()) 697 report_fatal_error("symbol '" + A->getName() + 698 "' can not be undefined in a subtraction expression"); 699 700 uint32_t Value = getSymbolAddress(A_SD, Layout); 701 uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); 702 FixedValue += SecAddr; 703 uint32_t Value2 = 0; 704 705 if (const MCSymbolRefExpr *B = Target.getSymB()) { 706 MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); 707 708 if (!B_SD->getFragment()) 709 report_fatal_error("symbol '" + B->getSymbol().getName() + 710 "' can not be undefined in a subtraction expression"); 711 712 // Select the appropriate difference relocation type. 713 // 714 // Note that there is no longer any semantic difference between these two 715 // relocation types from the linkers point of view, this is done solely 716 // for pedantic compatibility with 'as'. 717 Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference : 718 (unsigned)macho::RIT_Generic_LocalDifference; 719 Value2 = getSymbolAddress(B_SD, Layout); 720 FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); 721 } 722 723 // Relocations are written out in reverse order, so the PAIR comes first. 724 if (Type == macho::RIT_Difference || 725 Type == macho::RIT_Generic_LocalDifference) { 726 macho::RelocationEntry MRE; 727 MRE.Word0 = ((0 << 0) | 728 (macho::RIT_Pair << 24) | 729 (Log2Size << 28) | 730 (IsPCRel << 30) | 731 macho::RF_Scattered); 732 MRE.Word1 = Value2; 733 Relocations[Fragment->getParent()].push_back(MRE); 734 } 735 736 macho::RelocationEntry MRE; 737 MRE.Word0 = ((FixupOffset << 0) | 738 (Type << 24) | 739 (Log2Size << 28) | 740 (IsPCRel << 30) | 741 macho::RF_Scattered); 742 MRE.Word1 = Value; 743 Relocations[Fragment->getParent()].push_back(MRE); 744 } 745 746 void RecordARMScatteredRelocation(const MCAssembler &Asm, 747 const MCAsmLayout &Layout, 748 const MCFragment *Fragment, 749 const MCFixup &Fixup, MCValue Target, 750 unsigned Log2Size, 751 uint64_t &FixedValue) { 752 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 753 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 754 unsigned Type = macho::RIT_Vanilla; 755 756 // See <reloc.h>. 757 const MCSymbol *A = &Target.getSymA()->getSymbol(); 758 MCSymbolData *A_SD = &Asm.getSymbolData(*A); 759 760 if (!A_SD->getFragment()) 761 report_fatal_error("symbol '" + A->getName() + 762 "' can not be undefined in a subtraction expression"); 763 764 uint32_t Value = getSymbolAddress(A_SD, Layout); 765 uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); 766 FixedValue += SecAddr; 767 uint32_t Value2 = 0; 768 769 if (const MCSymbolRefExpr *B = Target.getSymB()) { 770 MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); 771 772 if (!B_SD->getFragment()) 773 report_fatal_error("symbol '" + B->getSymbol().getName() + 774 "' can not be undefined in a subtraction expression"); 775 776 // Select the appropriate difference relocation type. 777 Type = macho::RIT_Difference; 778 Value2 = getSymbolAddress(B_SD, Layout); 779 FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); 780 } 781 782 // Relocations are written out in reverse order, so the PAIR comes first. 783 if (Type == macho::RIT_Difference || 784 Type == macho::RIT_Generic_LocalDifference) { 785 macho::RelocationEntry MRE; 786 MRE.Word0 = ((0 << 0) | 787 (macho::RIT_Pair << 24) | 788 (Log2Size << 28) | 789 (IsPCRel << 30) | 790 macho::RF_Scattered); 791 MRE.Word1 = Value2; 792 Relocations[Fragment->getParent()].push_back(MRE); 793 } 794 795 macho::RelocationEntry MRE; 796 MRE.Word0 = ((FixupOffset << 0) | 797 (Type << 24) | 798 (Log2Size << 28) | 799 (IsPCRel << 30) | 800 macho::RF_Scattered); 801 MRE.Word1 = Value; 802 Relocations[Fragment->getParent()].push_back(MRE); 803 } 804 805 void RecordARMMovwMovtRelocation(const MCAssembler &Asm, 806 const MCAsmLayout &Layout, 807 const MCFragment *Fragment, 808 const MCFixup &Fixup, MCValue Target, 809 uint64_t &FixedValue) { 810 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 811 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 812 unsigned Type = macho::RIT_ARM_Half; 813 814 // See <reloc.h>. 815 const MCSymbol *A = &Target.getSymA()->getSymbol(); 816 MCSymbolData *A_SD = &Asm.getSymbolData(*A); 817 818 if (!A_SD->getFragment()) 819 report_fatal_error("symbol '" + A->getName() + 820 "' can not be undefined in a subtraction expression"); 821 822 uint32_t Value = getSymbolAddress(A_SD, Layout); 823 uint32_t Value2 = 0; 824 uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); 825 FixedValue += SecAddr; 826 827 if (const MCSymbolRefExpr *B = Target.getSymB()) { 828 MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); 829 830 if (!B_SD->getFragment()) 831 report_fatal_error("symbol '" + B->getSymbol().getName() + 832 "' can not be undefined in a subtraction expression"); 833 834 // Select the appropriate difference relocation type. 835 Type = macho::RIT_ARM_HalfDifference; 836 Value2 = getSymbolAddress(B_SD, Layout); 837 FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); 838 } 839 840 // Relocations are written out in reverse order, so the PAIR comes first. 841 // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field: 842 // 843 // For these two r_type relocations they always have a pair following them 844 // and the r_length bits are used differently. The encoding of the 845 // r_length is as follows: 846 // low bit of r_length: 847 // 0 - :lower16: for movw instructions 848 // 1 - :upper16: for movt instructions 849 // high bit of r_length: 850 // 0 - arm instructions 851 // 1 - thumb instructions 852 // the other half of the relocated expression is in the following pair 853 // relocation entry in the the low 16 bits of r_address field. 854 unsigned ThumbBit = 0; 855 unsigned MovtBit = 0; 856 switch ((unsigned)Fixup.getKind()) { 857 default: break; 858 case ARM::fixup_arm_movt_hi16: 859 case ARM::fixup_arm_movt_hi16_pcrel: 860 MovtBit = 1; 861 break; 862 case ARM::fixup_t2_movt_hi16: 863 case ARM::fixup_t2_movt_hi16_pcrel: 864 MovtBit = 1; 865 // Fallthrough 866 case ARM::fixup_t2_movw_lo16: 867 case ARM::fixup_t2_movw_lo16_pcrel: 868 ThumbBit = 1; 869 break; 870 } 871 872 873 if (Type == macho::RIT_ARM_HalfDifference) { 874 uint32_t OtherHalf = MovtBit 875 ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); 876 877 macho::RelocationEntry MRE; 878 MRE.Word0 = ((OtherHalf << 0) | 879 (macho::RIT_Pair << 24) | 880 (MovtBit << 28) | 881 (ThumbBit << 29) | 882 (IsPCRel << 30) | 883 macho::RF_Scattered); 884 MRE.Word1 = Value2; 885 Relocations[Fragment->getParent()].push_back(MRE); 886 } 887 888 macho::RelocationEntry MRE; 889 MRE.Word0 = ((FixupOffset << 0) | 890 (Type << 24) | 891 (MovtBit << 28) | 892 (ThumbBit << 29) | 893 (IsPCRel << 30) | 894 macho::RF_Scattered); 895 MRE.Word1 = Value; 896 Relocations[Fragment->getParent()].push_back(MRE); 897 } 898 899 void RecordTLVPRelocation(const MCAssembler &Asm, 900 const MCAsmLayout &Layout, 901 const MCFragment *Fragment, 902 const MCFixup &Fixup, MCValue Target, 903 uint64_t &FixedValue) { 904 assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && 905 !is64Bit() && 906 "Should only be called with a 32-bit TLVP relocation!"); 907 908 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); 909 uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 910 unsigned IsPCRel = 0; 911 912 // Get the symbol data. 913 MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); 914 unsigned Index = SD_A->getIndex(); 915 916 // We're only going to have a second symbol in pic mode and it'll be a 917 // subtraction from the picbase. For 32-bit pic the addend is the difference 918 // between the picbase and the next address. For 32-bit static the addend 919 // is zero. 920 if (Target.getSymB()) { 921 // If this is a subtraction then we're pcrel. 922 uint32_t FixupAddress = 923 getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); 924 MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); 925 IsPCRel = 1; 926 FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) + 927 Target.getConstant()); 928 FixedValue += 1ULL << Log2Size; 929 } else { 930 FixedValue = 0; 931 } 932 933 // struct relocation_info (8 bytes) 934 macho::RelocationEntry MRE; 935 MRE.Word0 = Value; 936 MRE.Word1 = ((Index << 0) | 937 (IsPCRel << 24) | 938 (Log2Size << 25) | 939 (1 << 27) | // Extern 940 (macho::RIT_Generic_TLV << 28)); // Type 941 Relocations[Fragment->getParent()].push_back(MRE); 942 } 943 944 static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, 945 unsigned &Log2Size) { 946 RelocType = unsigned(macho::RIT_Vanilla); 947 Log2Size = ~0U; 948 949 switch (Kind) { 950 default: 951 return false; 952 953 case FK_Data_1: 954 Log2Size = llvm::Log2_32(1); 955 return true; 956 case FK_Data_2: 957 Log2Size = llvm::Log2_32(2); 958 return true; 959 case FK_Data_4: 960 Log2Size = llvm::Log2_32(4); 961 return true; 962 case FK_Data_8: 963 Log2Size = llvm::Log2_32(8); 964 return true; 965 966 // Handle 24-bit branch kinds. 967 case ARM::fixup_arm_ldst_pcrel_12: 968 case ARM::fixup_arm_pcrel_10: 969 case ARM::fixup_arm_adr_pcrel_12: 970 case ARM::fixup_arm_condbranch: 971 case ARM::fixup_arm_uncondbranch: 972 RelocType = unsigned(macho::RIT_ARM_Branch24Bit); 973 // Report as 'long', even though that is not quite accurate. 974 Log2Size = llvm::Log2_32(4); 975 return true; 976 977 // Handle Thumb branches. 978 case ARM::fixup_arm_thumb_br: 979 RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); 980 Log2Size = llvm::Log2_32(2); 981 return true; 982 983 case ARM::fixup_arm_thumb_bl: 984 case ARM::fixup_arm_thumb_blx: 985 RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); 986 Log2Size = llvm::Log2_32(4); 987 return true; 988 989 case ARM::fixup_arm_movt_hi16: 990 case ARM::fixup_arm_movt_hi16_pcrel: 991 case ARM::fixup_t2_movt_hi16: 992 case ARM::fixup_t2_movt_hi16_pcrel: 993 RelocType = unsigned(macho::RIT_ARM_HalfDifference); 994 // Report as 'long', even though that is not quite accurate. 995 Log2Size = llvm::Log2_32(4); 996 return true; 997 998 case ARM::fixup_arm_movw_lo16: 999 case ARM::fixup_arm_movw_lo16_pcrel: 1000 case ARM::fixup_t2_movw_lo16: 1001 case ARM::fixup_t2_movw_lo16_pcrel: 1002 RelocType = unsigned(macho::RIT_ARM_Half); 1003 // Report as 'long', even though that is not quite accurate. 1004 Log2Size = llvm::Log2_32(4); 1005 return true; 1006 } 1007 } 1008 void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, 1009 const MCFragment *Fragment, const MCFixup &Fixup, 1010 MCValue Target, uint64_t &FixedValue) { 1011 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 1012 unsigned Log2Size; 1013 unsigned RelocType = macho::RIT_Vanilla; 1014 if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) { 1015 report_fatal_error("unknown ARM fixup kind!"); 1016 return; 1017 } 1018 1019 // If this is a difference or a defined symbol plus an offset, then we need 1020 // a scattered relocation entry. Differences always require scattered 1021 // relocations. 1022 if (Target.getSymB()) { 1023 if (RelocType == macho::RIT_ARM_Half || 1024 RelocType == macho::RIT_ARM_HalfDifference) 1025 return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup, 1026 Target, FixedValue); 1027 return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, 1028 Target, Log2Size, FixedValue); 1029 } 1030 1031 // Get the symbol data, if any. 1032 MCSymbolData *SD = 0; 1033 if (Target.getSymA()) 1034 SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); 1035 1036 // FIXME: For other platforms, we need to use scattered relocations for 1037 // internal relocations with offsets. If this is an internal relocation 1038 // with an offset, it also needs a scattered relocation entry. 1039 // 1040 // Is this right for ARM? 1041 uint32_t Offset = Target.getConstant(); 1042 if (IsPCRel && RelocType == macho::RIT_Vanilla) 1043 Offset += 1 << Log2Size; 1044 if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) 1045 return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target, 1046 Log2Size, FixedValue); 1047 1048 // See <reloc.h>. 1049 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 1050 unsigned Index = 0; 1051 unsigned IsExtern = 0; 1052 unsigned Type = 0; 1053 1054 if (Target.isAbsolute()) { // constant 1055 // FIXME! 1056 report_fatal_error("FIXME: relocations to absolute targets " 1057 "not yet implemented"); 1058 } else { 1059 // Resolve constant variables. 1060 if (SD->getSymbol().isVariable()) { 1061 int64_t Res; 1062 if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( 1063 Res, Layout, SectionAddress)) { 1064 FixedValue = Res; 1065 return; 1066 } 1067 } 1068 1069 // Check whether we need an external or internal relocation. 1070 if (doesSymbolRequireExternRelocation(SD)) { 1071 IsExtern = 1; 1072 Index = SD->getIndex(); 1073 // For external relocations, make sure to offset the fixup value to 1074 // compensate for the addend of the symbol address, if it was 1075 // undefined. This occurs with weak definitions, for example. 1076 if (!SD->Symbol->isUndefined()) 1077 FixedValue -= Layout.getSymbolOffset(SD); 1078 } else { 1079 // The index is the section ordinal (1-based). 1080 const MCSectionData &SymSD = Asm.getSectionData( 1081 SD->getSymbol().getSection()); 1082 Index = SymSD.getOrdinal() + 1; 1083 FixedValue += getSectionAddress(&SymSD); 1084 } 1085 if (IsPCRel) 1086 FixedValue -= getSectionAddress(Fragment->getParent()); 1087 1088 // The type is determined by the fixup kind. 1089 Type = RelocType; 1090 } 1091 1092 // struct relocation_info (8 bytes) 1093 macho::RelocationEntry MRE; 1094 MRE.Word0 = FixupOffset; 1095 MRE.Word1 = ((Index << 0) | 1096 (IsPCRel << 24) | 1097 (Log2Size << 25) | 1098 (IsExtern << 27) | 1099 (Type << 28)); 1100 Relocations[Fragment->getParent()].push_back(MRE); 1101 } 1102 1103 void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, 1104 const MCFragment *Fragment, const MCFixup &Fixup, 1105 MCValue Target, uint64_t &FixedValue) { 1106 // FIXME: These needs to be factored into the target Mach-O writer. 1107 if (isARM()) { 1108 RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); 1109 return; 1110 } 1111 if (is64Bit()) { 1112 RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); 1113 return; 1114 } 1115 1116 unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); 1117 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); 1118 1119 // If this is a 32-bit TLVP reloc it's handled a bit differently. 1120 if (Target.getSymA() && 1121 Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { 1122 RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); 1123 return; 1124 } 1125 1126 // If this is a difference or a defined symbol plus an offset, then we need 1127 // a scattered relocation entry. 1128 // Differences always require scattered relocations. 1129 if (Target.getSymB()) 1130 return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, 1131 Target, Log2Size, FixedValue); 1132 1133 // Get the symbol data, if any. 1134 MCSymbolData *SD = 0; 1135 if (Target.getSymA()) 1136 SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); 1137 1138 // If this is an internal relocation with an offset, it also needs a 1139 // scattered relocation entry. 1140 uint32_t Offset = Target.getConstant(); 1141 if (IsPCRel) 1142 Offset += 1 << Log2Size; 1143 if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) 1144 return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, 1145 Target, Log2Size, FixedValue); 1146 1147 // See <reloc.h>. 1148 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); 1149 unsigned Index = 0; 1150 unsigned IsExtern = 0; 1151 unsigned Type = 0; 1152 1153 if (Target.isAbsolute()) { // constant 1154 // SymbolNum of 0 indicates the absolute section. 1155 // 1156 // FIXME: Currently, these are never generated (see code below). I cannot 1157 // find a case where they are actually emitted. 1158 Type = macho::RIT_Vanilla; 1159 } else { 1160 // Resolve constant variables. 1161 if (SD->getSymbol().isVariable()) { 1162 int64_t Res; 1163 if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( 1164 Res, Layout, SectionAddress)) { 1165 FixedValue = Res; 1166 return; 1167 } 1168 } 1169 1170 // Check whether we need an external or internal relocation. 1171 if (doesSymbolRequireExternRelocation(SD)) { 1172 IsExtern = 1; 1173 Index = SD->getIndex(); 1174 // For external relocations, make sure to offset the fixup value to 1175 // compensate for the addend of the symbol address, if it was 1176 // undefined. This occurs with weak definitions, for example. 1177 if (!SD->Symbol->isUndefined()) 1178 FixedValue -= Layout.getSymbolOffset(SD); 1179 } else { 1180 // The index is the section ordinal (1-based). 1181 const MCSectionData &SymSD = Asm.getSectionData( 1182 SD->getSymbol().getSection()); 1183 Index = SymSD.getOrdinal() + 1; 1184 FixedValue += getSectionAddress(&SymSD); 1185 } 1186 if (IsPCRel) 1187 FixedValue -= getSectionAddress(Fragment->getParent()); 1188 1189 Type = macho::RIT_Vanilla; 1190 } 1191 1192 // struct relocation_info (8 bytes) 1193 macho::RelocationEntry MRE; 1194 MRE.Word0 = FixupOffset; 1195 MRE.Word1 = ((Index << 0) | 1196 (IsPCRel << 24) | 1197 (Log2Size << 25) | 1198 (IsExtern << 27) | 1199 (Type << 28)); 1200 Relocations[Fragment->getParent()].push_back(MRE); 1201 } 1202 1203 void BindIndirectSymbols(MCAssembler &Asm) { 1204 // This is the point where 'as' creates actual symbols for indirect symbols 1205 // (in the following two passes). It would be easier for us to do this 1206 // sooner when we see the attribute, but that makes getting the order in the 1207 // symbol table much more complicated than it is worth. 1208 // 1209 // FIXME: Revisit this when the dust settles. 1210 1211 // Bind non lazy symbol pointers first. 1212 unsigned IndirectIndex = 0; 1213 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 1214 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 1215 const MCSectionMachO &Section = 1216 cast<MCSectionMachO>(it->SectionData->getSection()); 1217 1218 if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) 1219 continue; 1220 1221 // Initialize the section indirect symbol base, if necessary. 1222 if (!IndirectSymBase.count(it->SectionData)) 1223 IndirectSymBase[it->SectionData] = IndirectIndex; 1224 1225 Asm.getOrCreateSymbolData(*it->Symbol); 1226 } 1227 1228 // Then lazy symbol pointers and symbol stubs. 1229 IndirectIndex = 0; 1230 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), 1231 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { 1232 const MCSectionMachO &Section = 1233 cast<MCSectionMachO>(it->SectionData->getSection()); 1234 1235 if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && 1236 Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) 1237 continue; 1238 1239 // Initialize the section indirect symbol base, if necessary. 1240 if (!IndirectSymBase.count(it->SectionData)) 1241 IndirectSymBase[it->SectionData] = IndirectIndex; 1242 1243 // Set the symbol type to undefined lazy, but only on construction. 1244 // 1245 // FIXME: Do not hardcode. 1246 bool Created; 1247 MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); 1248 if (Created) 1249 Entry.setFlags(Entry.getFlags() | 0x0001); 1250 } 1251 } 1252 1253 /// ComputeSymbolTable - Compute the symbol table data 1254 /// 1255 /// \param StringTable [out] - The string table data. 1256 /// \param StringIndexMap [out] - Map from symbol names to offsets in the 1257 /// string table. 1258 void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, 1259 std::vector<MachSymbolData> &LocalSymbolData, 1260 std::vector<MachSymbolData> &ExternalSymbolData, 1261 std::vector<MachSymbolData> &UndefinedSymbolData) { 1262 // Build section lookup table. 1263 DenseMap<const MCSection*, uint8_t> SectionIndexMap; 1264 unsigned Index = 1; 1265 for (MCAssembler::iterator it = Asm.begin(), 1266 ie = Asm.end(); it != ie; ++it, ++Index) 1267 SectionIndexMap[&it->getSection()] = Index; 1268 assert(Index <= 256 && "Too many sections!"); 1269 1270 // Index 0 is always the empty string. 1271 StringMap<uint64_t> StringIndexMap; 1272 StringTable += '\x00'; 1273 1274 // Build the symbol arrays and the string table, but only for non-local 1275 // symbols. 1276 // 1277 // The particular order that we collect the symbols and create the string 1278 // table, then sort the symbols is chosen to match 'as'. Even though it 1279 // doesn't matter for correctness, this is important for letting us diff .o 1280 // files. 1281 for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), 1282 ie = Asm.symbol_end(); it != ie; ++it) { 1283 const MCSymbol &Symbol = it->getSymbol(); 1284 1285 // Ignore non-linker visible symbols. 1286 if (!Asm.isSymbolLinkerVisible(it->getSymbol())) 1287 continue; 1288 1289 if (!it->isExternal() && !Symbol.isUndefined()) 1290 continue; 1291 1292 uint64_t &Entry = StringIndexMap[Symbol.getName()]; 1293 if (!Entry) { 1294 Entry = StringTable.size(); 1295 StringTable += Symbol.getName(); 1296 StringTable += '\x00'; 1297 } 1298 1299 MachSymbolData MSD; 1300 MSD.SymbolData = it; 1301 MSD.StringIndex = Entry; 1302 1303 if (Symbol.isUndefined()) { 1304 MSD.SectionIndex = 0; 1305 UndefinedSymbolData.push_back(MSD); 1306 } else if (Symbol.isAbsolute()) { 1307 MSD.SectionIndex = 0; 1308 ExternalSymbolData.push_back(MSD); 1309 } else { 1310 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 1311 assert(MSD.SectionIndex && "Invalid section index!"); 1312 ExternalSymbolData.push_back(MSD); 1313 } 1314 } 1315 1316 // Now add the data for local symbols. 1317 for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), 1318 ie = Asm.symbol_end(); it != ie; ++it) { 1319 const MCSymbol &Symbol = it->getSymbol(); 1320 1321 // Ignore non-linker visible symbols. 1322 if (!Asm.isSymbolLinkerVisible(it->getSymbol())) 1323 continue; 1324 1325 if (it->isExternal() || Symbol.isUndefined()) 1326 continue; 1327 1328 uint64_t &Entry = StringIndexMap[Symbol.getName()]; 1329 if (!Entry) { 1330 Entry = StringTable.size(); 1331 StringTable += Symbol.getName(); 1332 StringTable += '\x00'; 1333 } 1334 1335 MachSymbolData MSD; 1336 MSD.SymbolData = it; 1337 MSD.StringIndex = Entry; 1338 1339 if (Symbol.isAbsolute()) { 1340 MSD.SectionIndex = 0; 1341 LocalSymbolData.push_back(MSD); 1342 } else { 1343 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 1344 assert(MSD.SectionIndex && "Invalid section index!"); 1345 LocalSymbolData.push_back(MSD); 1346 } 1347 } 1348 1349 // External and undefined symbols are required to be in lexicographic order. 1350 std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); 1351 std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); 1352 1353 // Set the symbol indices. 1354 Index = 0; 1355 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 1356 LocalSymbolData[i].SymbolData->setIndex(Index++); 1357 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 1358 ExternalSymbolData[i].SymbolData->setIndex(Index++); 1359 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 1360 UndefinedSymbolData[i].SymbolData->setIndex(Index++); 1361 1362 // The string table is padded to a multiple of 4. 1363 while (StringTable.size() % 4) 1364 StringTable += '\x00'; 1365 } 1366 1367 void computeSectionAddresses(const MCAssembler &Asm, 1368 const MCAsmLayout &Layout) { 1369 uint64_t StartAddress = 0; 1370 const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder(); 1371 for (int i = 0, n = Order.size(); i != n ; ++i) { 1372 const MCSectionData *SD = Order[i]; 1373 StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); 1374 SectionAddress[SD] = StartAddress; 1375 StartAddress += Layout.getSectionAddressSize(SD); 1376 // Explicitly pad the section to match the alignment requirements of the 1377 // following one. This is for 'gas' compatibility, it shouldn't 1378 /// strictly be necessary. 1379 StartAddress += getPaddingSize(SD, Layout); 1380 } 1381 } 1382 1383 void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { 1384 computeSectionAddresses(Asm, Layout); 1385 1386 // Create symbol data for any indirect symbols. 1387 BindIndirectSymbols(Asm); 1388 1389 // Compute symbol table information and bind symbol indices. 1390 ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, 1391 UndefinedSymbolData); 1392 } 1393 1394 virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, 1395 const MCSymbolData &DataA, 1396 const MCFragment &FB, 1397 bool InSet, 1398 bool IsPCRel) const { 1399 if (InSet) 1400 return true; 1401 1402 // The effective address is 1403 // addr(atom(A)) + offset(A) 1404 // - addr(atom(B)) - offset(B) 1405 // and the offsets are not relocatable, so the fixup is fully resolved when 1406 // addr(atom(A)) - addr(atom(B)) == 0. 1407 const MCSymbolData *A_Base = 0, *B_Base = 0; 1408 1409 const MCSymbol &SA = DataA.getSymbol().AliasedSymbol(); 1410 const MCSection &SecA = SA.getSection(); 1411 const MCSection &SecB = FB.getParent()->getSection(); 1412 1413 if (IsPCRel) { 1414 // The simple (Darwin, except on x86_64) way of dealing with this was to 1415 // assume that any reference to a temporary symbol *must* be a temporary 1416 // symbol in the same atom, unless the sections differ. Therefore, any 1417 // PCrel relocation to a temporary symbol (in the same section) is fully 1418 // resolved. This also works in conjunction with absolutized .set, which 1419 // requires the compiler to use .set to absolutize the differences between 1420 // symbols which the compiler knows to be assembly time constants, so we 1421 // don't need to worry about considering symbol differences fully 1422 // resolved. 1423 1424 if (!Asm.getBackend().hasReliableSymbolDifference()) { 1425 if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB) 1426 return false; 1427 return true; 1428 } 1429 } else { 1430 if (!TargetObjectWriter->useAggressiveSymbolFolding()) 1431 return false; 1432 } 1433 1434 const MCFragment &FA = *Asm.getSymbolData(SA).getFragment(); 1435 1436 A_Base = FA.getAtom(); 1437 if (!A_Base) 1438 return false; 1439 1440 B_Base = FB.getAtom(); 1441 if (!B_Base) 1442 return false; 1443 1444 // If the atoms are the same, they are guaranteed to have the same address. 1445 if (A_Base == B_Base) 1446 return true; 1447 1448 // Otherwise, we can't prove this is fully resolved. 1449 return false; 1450 } 1451 1452 void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) { 1453 unsigned NumSections = Asm.size(); 1454 1455 // The section data starts after the header, the segment load command (and 1456 // section headers) and the symbol table. 1457 unsigned NumLoadCommands = 1; 1458 uint64_t LoadCommandsSize = is64Bit() ? 1459 macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size : 1460 macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size; 1461 1462 // Add the symbol table load command sizes, if used. 1463 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + 1464 UndefinedSymbolData.size(); 1465 if (NumSymbols) { 1466 NumLoadCommands += 2; 1467 LoadCommandsSize += (macho::SymtabLoadCommandSize + 1468 macho::DysymtabLoadCommandSize); 1469 } 1470 1471 // Compute the total size of the section data, as well as its file size and 1472 // vm size. 1473 uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size : 1474 macho::Header32Size) + LoadCommandsSize; 1475 uint64_t SectionDataSize = 0; 1476 uint64_t SectionDataFileSize = 0; 1477 uint64_t VMSize = 0; 1478 for (MCAssembler::const_iterator it = Asm.begin(), 1479 ie = Asm.end(); it != ie; ++it) { 1480 const MCSectionData &SD = *it; 1481 uint64_t Address = getSectionAddress(&SD); 1482 uint64_t Size = Layout.getSectionAddressSize(&SD); 1483 uint64_t FileSize = Layout.getSectionFileSize(&SD); 1484 FileSize += getPaddingSize(&SD, Layout); 1485 1486 VMSize = std::max(VMSize, Address + Size); 1487 1488 if (SD.getSection().isVirtualSection()) 1489 continue; 1490 1491 SectionDataSize = std::max(SectionDataSize, Address + Size); 1492 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); 1493 } 1494 1495 // The section data is padded to 4 bytes. 1496 // 1497 // FIXME: Is this machine dependent? 1498 unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); 1499 SectionDataFileSize += SectionDataPadding; 1500 1501 // Write the prolog, starting with the header and load command... 1502 WriteHeader(NumLoadCommands, LoadCommandsSize, 1503 Asm.getSubsectionsViaSymbols()); 1504 WriteSegmentLoadCommand(NumSections, VMSize, 1505 SectionDataStart, SectionDataSize); 1506 1507 // ... and then the section headers. 1508 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; 1509 for (MCAssembler::const_iterator it = Asm.begin(), 1510 ie = Asm.end(); it != ie; ++it) { 1511 std::vector<macho::RelocationEntry> &Relocs = Relocations[it]; 1512 unsigned NumRelocs = Relocs.size(); 1513 uint64_t SectionStart = SectionDataStart + getSectionAddress(it); 1514 WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); 1515 RelocTableEnd += NumRelocs * macho::RelocationInfoSize; 1516 } 1517 1518 // Write the symbol table load command, if used. 1519 if (NumSymbols) { 1520 unsigned FirstLocalSymbol = 0; 1521 unsigned NumLocalSymbols = LocalSymbolData.size(); 1522 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; 1523 unsigned NumExternalSymbols = ExternalSymbolData.size(); 1524 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; 1525 unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); 1526 unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); 1527 unsigned NumSymTabSymbols = 1528 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; 1529 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; 1530 uint64_t IndirectSymbolOffset = 0; 1531 1532 // If used, the indirect symbols are written after the section data. 1533 if (NumIndirectSymbols) 1534 IndirectSymbolOffset = RelocTableEnd; 1535 1536 // The symbol table is written after the indirect symbol data. 1537 uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; 1538 1539 // The string table is written after symbol table. 1540 uint64_t StringTableOffset = 1541 SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size : 1542 macho::Nlist32Size); 1543 WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, 1544 StringTableOffset, StringTable.size()); 1545 1546 WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, 1547 FirstExternalSymbol, NumExternalSymbols, 1548 FirstUndefinedSymbol, NumUndefinedSymbols, 1549 IndirectSymbolOffset, NumIndirectSymbols); 1550 } 1551 1552 // Write the actual section data. 1553 for (MCAssembler::const_iterator it = Asm.begin(), 1554 ie = Asm.end(); it != ie; ++it) { 1555 Asm.WriteSectionData(it, Layout); 1556 1557 uint64_t Pad = getPaddingSize(it, Layout); 1558 for (unsigned int i = 0; i < Pad; ++i) 1559 Write8(0); 1560 } 1561 1562 // Write the extra padding. 1563 WriteZeros(SectionDataPadding); 1564 1565 // Write the relocation entries. 1566 for (MCAssembler::const_iterator it = Asm.begin(), 1567 ie = Asm.end(); it != ie; ++it) { 1568 // Write the section relocation entries, in reverse order to match 'as' 1569 // (approximately, the exact algorithm is more complicated than this). 1570 std::vector<macho::RelocationEntry> &Relocs = Relocations[it]; 1571 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { 1572 Write32(Relocs[e - i - 1].Word0); 1573 Write32(Relocs[e - i - 1].Word1); 1574 } 1575 } 1576 1577 // Write the symbol table data, if used. 1578 if (NumSymbols) { 1579 // Write the indirect symbol entries. 1580 for (MCAssembler::const_indirect_symbol_iterator 1581 it = Asm.indirect_symbol_begin(), 1582 ie = Asm.indirect_symbol_end(); it != ie; ++it) { 1583 // Indirect symbols in the non lazy symbol pointer section have some 1584 // special handling. 1585 const MCSectionMachO &Section = 1586 static_cast<const MCSectionMachO&>(it->SectionData->getSection()); 1587 if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { 1588 // If this symbol is defined and internal, mark it as such. 1589 if (it->Symbol->isDefined() && 1590 !Asm.getSymbolData(*it->Symbol).isExternal()) { 1591 uint32_t Flags = macho::ISF_Local; 1592 if (it->Symbol->isAbsolute()) 1593 Flags |= macho::ISF_Absolute; 1594 Write32(Flags); 1595 continue; 1596 } 1597 } 1598 1599 Write32(Asm.getSymbolData(*it->Symbol).getIndex()); 1600 } 1601 1602 // FIXME: Check that offsets match computed ones. 1603 1604 // Write the symbol table entries. 1605 for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) 1606 WriteNlist(LocalSymbolData[i], Layout); 1607 for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) 1608 WriteNlist(ExternalSymbolData[i], Layout); 1609 for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) 1610 WriteNlist(UndefinedSymbolData[i], Layout); 1611 1612 // Write the string table. 1613 OS << StringTable.str(); 1614 } 1615 } 1616}; 1617 1618} 1619 1620MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW, 1621 raw_ostream &OS, 1622 bool IsLittleEndian) { 1623 return new MachObjectWriter(MOTW, OS, IsLittleEndian); 1624} 1625