InputFiles.cpp revision 341825
1//===- InputFiles.cpp -----------------------------------------------------===// 2// 3// The LLVM Linker 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "InputFiles.h" 11#include "Chunks.h" 12#include "Config.h" 13#include "Driver.h" 14#include "SymbolTable.h" 15#include "Symbols.h" 16#include "lld/Common/ErrorHandler.h" 17#include "lld/Common/Memory.h" 18#include "llvm-c/lto.h" 19#include "llvm/ADT/SmallVector.h" 20#include "llvm/ADT/Triple.h" 21#include "llvm/ADT/Twine.h" 22#include "llvm/BinaryFormat/COFF.h" 23#include "llvm/Object/Binary.h" 24#include "llvm/Object/COFF.h" 25#include "llvm/Support/Casting.h" 26#include "llvm/Support/Endian.h" 27#include "llvm/Support/Error.h" 28#include "llvm/Support/ErrorOr.h" 29#include "llvm/Support/FileSystem.h" 30#include "llvm/Support/Path.h" 31#include "llvm/Target/TargetOptions.h" 32#include <cstring> 33#include <system_error> 34#include <utility> 35 36using namespace llvm; 37using namespace llvm::COFF; 38using namespace llvm::object; 39using namespace llvm::support::endian; 40 41using llvm::Triple; 42using llvm::support::ulittle32_t; 43 44namespace lld { 45namespace coff { 46 47std::vector<ObjFile *> ObjFile::Instances; 48std::vector<ImportFile *> ImportFile::Instances; 49std::vector<BitcodeFile *> BitcodeFile::Instances; 50 51/// Checks that Source is compatible with being a weak alias to Target. 52/// If Source is Undefined and has no weak alias set, makes it a weak 53/// alias to Target. 54static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F, 55 Symbol *Source, Symbol *Target) { 56 if (auto *U = dyn_cast<Undefined>(Source)) { 57 if (U->WeakAlias && U->WeakAlias != Target) 58 Symtab->reportDuplicate(Source, F); 59 U->WeakAlias = Target; 60 } 61} 62 63ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} 64 65void ArchiveFile::parse() { 66 // Parse a MemoryBufferRef as an archive file. 67 File = CHECK(Archive::create(MB), this); 68 69 // Read the symbol table to construct Lazy objects. 70 for (const Archive::Symbol &Sym : File->symbols()) 71 Symtab->addLazy(this, Sym); 72} 73 74// Returns a buffer pointing to a member file containing a given symbol. 75void ArchiveFile::addMember(const Archive::Symbol *Sym) { 76 const Archive::Child &C = 77 CHECK(Sym->getMember(), 78 "could not get the member for symbol " + Sym->getName()); 79 80 // Return an empty buffer if we have already returned the same buffer. 81 if (!Seen.insert(C.getChildOffset()).second) 82 return; 83 84 Driver->enqueueArchiveMember(C, Sym->getName(), getName()); 85} 86 87std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) { 88 std::vector<MemoryBufferRef> V; 89 Error Err = Error::success(); 90 for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) { 91 Archive::Child C = 92 CHECK(COrErr, 93 File->getFileName() + ": could not get the child of the archive"); 94 MemoryBufferRef MBRef = 95 CHECK(C.getMemoryBufferRef(), 96 File->getFileName() + 97 ": could not get the buffer for a child of the archive"); 98 V.push_back(MBRef); 99 } 100 if (Err) 101 fatal(File->getFileName() + 102 ": Archive::children failed: " + toString(std::move(Err))); 103 return V; 104} 105 106void ObjFile::parse() { 107 // Parse a memory buffer as a COFF file. 108 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), this); 109 110 if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) { 111 Bin.release(); 112 COFFObj.reset(Obj); 113 } else { 114 fatal(toString(this) + " is not a COFF file"); 115 } 116 117 // Read section and symbol tables. 118 initializeChunks(); 119 initializeSymbols(); 120} 121 122// We set SectionChunk pointers in the SparseChunks vector to this value 123// temporarily to mark comdat sections as having an unknown resolution. As we 124// walk the object file's symbol table, once we visit either a leader symbol or 125// an associative section definition together with the parent comdat's leader, 126// we set the pointer to either nullptr (to mark the section as discarded) or a 127// valid SectionChunk for that section. 128static SectionChunk *const PendingComdat = reinterpret_cast<SectionChunk *>(1); 129 130void ObjFile::initializeChunks() { 131 uint32_t NumSections = COFFObj->getNumberOfSections(); 132 Chunks.reserve(NumSections); 133 SparseChunks.resize(NumSections + 1); 134 for (uint32_t I = 1; I < NumSections + 1; ++I) { 135 const coff_section *Sec; 136 if (auto EC = COFFObj->getSection(I, Sec)) 137 fatal("getSection failed: #" + Twine(I) + ": " + EC.message()); 138 139 if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT) 140 SparseChunks[I] = PendingComdat; 141 else 142 SparseChunks[I] = readSection(I, nullptr, ""); 143 } 144} 145 146SectionChunk *ObjFile::readSection(uint32_t SectionNumber, 147 const coff_aux_section_definition *Def, 148 StringRef LeaderName) { 149 const coff_section *Sec; 150 StringRef Name; 151 if (auto EC = COFFObj->getSection(SectionNumber, Sec)) 152 fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message()); 153 if (auto EC = COFFObj->getSectionName(Sec, Name)) 154 fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " + 155 EC.message()); 156 157 if (Name == ".drectve") { 158 ArrayRef<uint8_t> Data; 159 COFFObj->getSectionContents(Sec, Data); 160 Directives = std::string((const char *)Data.data(), Data.size()); 161 return nullptr; 162 } 163 164 // Object files may have DWARF debug info or MS CodeView debug info 165 // (or both). 166 // 167 // DWARF sections don't need any special handling from the perspective 168 // of the linker; they are just a data section containing relocations. 169 // We can just link them to complete debug info. 170 // 171 // CodeView needs a linker support. We need to interpret and debug 172 // info, and then write it to a separate .pdb file. 173 174 // Ignore DWARF debug info unless /debug is given. 175 if (!Config->Debug && Name.startswith(".debug_")) 176 return nullptr; 177 178 if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) 179 return nullptr; 180 auto *C = make<SectionChunk>(this, Sec); 181 if (Def) 182 C->Checksum = Def->CheckSum; 183 184 // CodeView sections are stored to a different vector because they are not 185 // linked in the regular manner. 186 if (C->isCodeView()) 187 DebugChunks.push_back(C); 188 else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gfids$y") 189 GuardFidChunks.push_back(C); 190 else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gljmp$y") 191 GuardLJmpChunks.push_back(C); 192 else if (Name == ".sxdata") 193 SXDataChunks.push_back(C); 194 else if (Config->TailMerge && Sec->NumberOfRelocations == 0 && 195 Name == ".rdata" && LeaderName.startswith("??_C@")) 196 // COFF sections that look like string literal sections (i.e. no 197 // relocations, in .rdata, leader symbol name matches the MSVC name mangling 198 // for string literals) are subject to string tail merging. 199 MergeChunk::addSection(C); 200 else 201 Chunks.push_back(C); 202 203 return C; 204} 205 206void ObjFile::readAssociativeDefinition( 207 COFFSymbolRef Sym, const coff_aux_section_definition *Def) { 208 readAssociativeDefinition(Sym, Def, Def->getNumber(Sym.isBigObj())); 209} 210 211void ObjFile::readAssociativeDefinition(COFFSymbolRef Sym, 212 const coff_aux_section_definition *Def, 213 uint32_t ParentSection) { 214 SectionChunk *Parent = SparseChunks[ParentSection]; 215 216 // If the parent is pending, it probably means that its section definition 217 // appears after us in the symbol table. Leave the associated section as 218 // pending; we will handle it during the second pass in initializeSymbols(). 219 if (Parent == PendingComdat) 220 return; 221 222 // Check whether the parent is prevailing. If it is, so are we, and we read 223 // the section; otherwise mark it as discarded. 224 int32_t SectionNumber = Sym.getSectionNumber(); 225 if (Parent) { 226 SparseChunks[SectionNumber] = readSection(SectionNumber, Def, ""); 227 if (SparseChunks[SectionNumber]) 228 Parent->addAssociative(SparseChunks[SectionNumber]); 229 } else { 230 SparseChunks[SectionNumber] = nullptr; 231 } 232} 233 234void ObjFile::recordPrevailingSymbolForMingw( 235 COFFSymbolRef Sym, DenseMap<StringRef, uint32_t> &PrevailingSectionMap) { 236 // For comdat symbols in executable sections, where this is the copy 237 // of the section chunk we actually include instead of discarding it, 238 // add the symbol to a map to allow using it for implicitly 239 // associating .[px]data$<func> sections to it. 240 int32_t SectionNumber = Sym.getSectionNumber(); 241 SectionChunk *SC = SparseChunks[SectionNumber]; 242 if (SC && SC->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) { 243 StringRef Name; 244 COFFObj->getSymbolName(Sym, Name); 245 PrevailingSectionMap[Name] = SectionNumber; 246 } 247} 248 249void ObjFile::maybeAssociateSEHForMingw( 250 COFFSymbolRef Sym, const coff_aux_section_definition *Def, 251 const DenseMap<StringRef, uint32_t> &PrevailingSectionMap) { 252 StringRef Name; 253 COFFObj->getSymbolName(Sym, Name); 254 if (Name.consume_front(".pdata$") || Name.consume_front(".xdata$")) { 255 // For MinGW, treat .[px]data$<func> as implicitly associative to 256 // the symbol <func>. 257 auto ParentSym = PrevailingSectionMap.find(Name); 258 if (ParentSym != PrevailingSectionMap.end()) 259 readAssociativeDefinition(Sym, Def, ParentSym->second); 260 } 261} 262 263Symbol *ObjFile::createRegular(COFFSymbolRef Sym) { 264 SectionChunk *SC = SparseChunks[Sym.getSectionNumber()]; 265 if (Sym.isExternal()) { 266 StringRef Name; 267 COFFObj->getSymbolName(Sym, Name); 268 if (SC) 269 return Symtab->addRegular(this, Name, Sym.getGeneric(), SC); 270 return Symtab->addUndefined(Name, this, false); 271 } 272 if (SC) 273 return make<DefinedRegular>(this, /*Name*/ "", false, 274 /*IsExternal*/ false, Sym.getGeneric(), SC); 275 return nullptr; 276} 277 278void ObjFile::initializeSymbols() { 279 uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); 280 Symbols.resize(NumSymbols); 281 282 SmallVector<std::pair<Symbol *, uint32_t>, 8> WeakAliases; 283 std::vector<uint32_t> PendingIndexes; 284 PendingIndexes.reserve(NumSymbols); 285 286 DenseMap<StringRef, uint32_t> PrevailingSectionMap; 287 std::vector<const coff_aux_section_definition *> ComdatDefs( 288 COFFObj->getNumberOfSections() + 1); 289 290 for (uint32_t I = 0; I < NumSymbols; ++I) { 291 COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I)); 292 bool PrevailingComdat; 293 if (COFFSym.isUndefined()) { 294 Symbols[I] = createUndefined(COFFSym); 295 } else if (COFFSym.isWeakExternal()) { 296 Symbols[I] = createUndefined(COFFSym); 297 uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex; 298 WeakAliases.emplace_back(Symbols[I], TagIndex); 299 } else if (Optional<Symbol *> OptSym = 300 createDefined(COFFSym, ComdatDefs, PrevailingComdat)) { 301 Symbols[I] = *OptSym; 302 if (Config->MinGW && PrevailingComdat) 303 recordPrevailingSymbolForMingw(COFFSym, PrevailingSectionMap); 304 } else { 305 // createDefined() returns None if a symbol belongs to a section that 306 // was pending at the point when the symbol was read. This can happen in 307 // two cases: 308 // 1) section definition symbol for a comdat leader; 309 // 2) symbol belongs to a comdat section associated with a section whose 310 // section definition symbol appears later in the symbol table. 311 // In both of these cases, we can expect the section to be resolved by 312 // the time we finish visiting the remaining symbols in the symbol 313 // table. So we postpone the handling of this symbol until that time. 314 PendingIndexes.push_back(I); 315 } 316 I += COFFSym.getNumberOfAuxSymbols(); 317 } 318 319 for (uint32_t I : PendingIndexes) { 320 COFFSymbolRef Sym = check(COFFObj->getSymbol(I)); 321 if (auto *Def = Sym.getSectionDefinition()) { 322 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) 323 readAssociativeDefinition(Sym, Def); 324 else if (Config->MinGW) 325 maybeAssociateSEHForMingw(Sym, Def, PrevailingSectionMap); 326 } 327 if (SparseChunks[Sym.getSectionNumber()] == PendingComdat) { 328 StringRef Name; 329 COFFObj->getSymbolName(Sym, Name); 330 log("comdat section " + Name + 331 " without leader and unassociated, discarding"); 332 continue; 333 } 334 Symbols[I] = createRegular(Sym); 335 } 336 337 for (auto &KV : WeakAliases) { 338 Symbol *Sym = KV.first; 339 uint32_t Idx = KV.second; 340 checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]); 341 } 342} 343 344Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) { 345 StringRef Name; 346 COFFObj->getSymbolName(Sym, Name); 347 return Symtab->addUndefined(Name, this, Sym.isWeakExternal()); 348} 349 350Optional<Symbol *> ObjFile::createDefined( 351 COFFSymbolRef Sym, 352 std::vector<const coff_aux_section_definition *> &ComdatDefs, 353 bool &Prevailing) { 354 Prevailing = false; 355 auto GetName = [&]() { 356 StringRef S; 357 COFFObj->getSymbolName(Sym, S); 358 return S; 359 }; 360 361 if (Sym.isCommon()) { 362 auto *C = make<CommonChunk>(Sym); 363 Chunks.push_back(C); 364 return Symtab->addCommon(this, GetName(), Sym.getValue(), Sym.getGeneric(), 365 C); 366 } 367 368 if (Sym.isAbsolute()) { 369 StringRef Name = GetName(); 370 371 // Skip special symbols. 372 if (Name == "@comp.id") 373 return nullptr; 374 if (Name == "@feat.00") { 375 Feat00Flags = Sym.getValue(); 376 return nullptr; 377 } 378 379 if (Sym.isExternal()) 380 return Symtab->addAbsolute(Name, Sym); 381 return make<DefinedAbsolute>(Name, Sym); 382 } 383 384 int32_t SectionNumber = Sym.getSectionNumber(); 385 if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) 386 return nullptr; 387 388 if (llvm::COFF::isReservedSectionNumber(SectionNumber)) 389 fatal(toString(this) + ": " + GetName() + 390 " should not refer to special section " + Twine(SectionNumber)); 391 392 if ((uint32_t)SectionNumber >= SparseChunks.size()) 393 fatal(toString(this) + ": " + GetName() + 394 " should not refer to non-existent section " + Twine(SectionNumber)); 395 396 // Handle comdat leader symbols. 397 if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) { 398 ComdatDefs[SectionNumber] = nullptr; 399 Symbol *Leader; 400 if (Sym.isExternal()) { 401 std::tie(Leader, Prevailing) = 402 Symtab->addComdat(this, GetName(), Sym.getGeneric()); 403 } else { 404 Leader = make<DefinedRegular>(this, /*Name*/ "", false, 405 /*IsExternal*/ false, Sym.getGeneric()); 406 Prevailing = true; 407 } 408 409 if (Prevailing) { 410 SectionChunk *C = readSection(SectionNumber, Def, GetName()); 411 SparseChunks[SectionNumber] = C; 412 C->Sym = cast<DefinedRegular>(Leader); 413 cast<DefinedRegular>(Leader)->Data = &C->Repl; 414 } else { 415 SparseChunks[SectionNumber] = nullptr; 416 } 417 return Leader; 418 } 419 420 // Read associative section definitions and prepare to handle the comdat 421 // leader symbol by setting the section's ComdatDefs pointer if we encounter a 422 // non-associative comdat. 423 if (SparseChunks[SectionNumber] == PendingComdat) { 424 if (auto *Def = Sym.getSectionDefinition()) { 425 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) 426 readAssociativeDefinition(Sym, Def); 427 else 428 ComdatDefs[SectionNumber] = Def; 429 } 430 } 431 432 if (SparseChunks[SectionNumber] == PendingComdat) 433 return None; 434 return createRegular(Sym); 435} 436 437MachineTypes ObjFile::getMachineType() { 438 if (COFFObj) 439 return static_cast<MachineTypes>(COFFObj->getMachine()); 440 return IMAGE_FILE_MACHINE_UNKNOWN; 441} 442 443StringRef ltrim1(StringRef S, const char *Chars) { 444 if (!S.empty() && strchr(Chars, S[0])) 445 return S.substr(1); 446 return S; 447} 448 449void ImportFile::parse() { 450 const char *Buf = MB.getBufferStart(); 451 const char *End = MB.getBufferEnd(); 452 const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf); 453 454 // Check if the total size is valid. 455 if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData)) 456 fatal("broken import library"); 457 458 // Read names and create an __imp_ symbol. 459 StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr))); 460 StringRef ImpName = Saver.save("__imp_" + Name); 461 const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1; 462 DLLName = StringRef(NameStart); 463 StringRef ExtName; 464 switch (Hdr->getNameType()) { 465 case IMPORT_ORDINAL: 466 ExtName = ""; 467 break; 468 case IMPORT_NAME: 469 ExtName = Name; 470 break; 471 case IMPORT_NAME_NOPREFIX: 472 ExtName = ltrim1(Name, "?@_"); 473 break; 474 case IMPORT_NAME_UNDECORATE: 475 ExtName = ltrim1(Name, "?@_"); 476 ExtName = ExtName.substr(0, ExtName.find('@')); 477 break; 478 } 479 480 this->Hdr = Hdr; 481 ExternalName = ExtName; 482 483 ImpSym = Symtab->addImportData(ImpName, this); 484 485 if (Hdr->getType() == llvm::COFF::IMPORT_CONST) 486 static_cast<void>(Symtab->addImportData(Name, this)); 487 488 // If type is function, we need to create a thunk which jump to an 489 // address pointed by the __imp_ symbol. (This allows you to call 490 // DLL functions just like regular non-DLL functions.) 491 if (Hdr->getType() == llvm::COFF::IMPORT_CODE) 492 ThunkSym = Symtab->addImportThunk( 493 Name, cast_or_null<DefinedImportData>(ImpSym), Hdr->Machine); 494} 495 496void BitcodeFile::parse() { 497 Obj = check(lto::InputFile::create(MemoryBufferRef( 498 MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier())))); 499 std::vector<std::pair<Symbol *, bool>> Comdat(Obj->getComdatTable().size()); 500 for (size_t I = 0; I != Obj->getComdatTable().size(); ++I) 501 Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I])); 502 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) { 503 StringRef SymName = Saver.save(ObjSym.getName()); 504 int ComdatIndex = ObjSym.getComdatIndex(); 505 Symbol *Sym; 506 if (ObjSym.isUndefined()) { 507 Sym = Symtab->addUndefined(SymName, this, false); 508 } else if (ObjSym.isCommon()) { 509 Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize()); 510 } else if (ObjSym.isWeak() && ObjSym.isIndirect()) { 511 // Weak external. 512 Sym = Symtab->addUndefined(SymName, this, true); 513 std::string Fallback = ObjSym.getCOFFWeakExternalFallback(); 514 Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback)); 515 checkAndSetWeakAlias(Symtab, this, Sym, Alias); 516 } else if (ComdatIndex != -1) { 517 if (SymName == Obj->getComdatTable()[ComdatIndex]) 518 Sym = Comdat[ComdatIndex].first; 519 else if (Comdat[ComdatIndex].second) 520 Sym = Symtab->addRegular(this, SymName); 521 else 522 Sym = Symtab->addUndefined(SymName, this, false); 523 } else { 524 Sym = Symtab->addRegular(this, SymName); 525 } 526 Symbols.push_back(Sym); 527 } 528 Directives = Obj->getCOFFLinkerOpts(); 529} 530 531MachineTypes BitcodeFile::getMachineType() { 532 switch (Triple(Obj->getTargetTriple()).getArch()) { 533 case Triple::x86_64: 534 return AMD64; 535 case Triple::x86: 536 return I386; 537 case Triple::arm: 538 return ARMNT; 539 case Triple::aarch64: 540 return ARM64; 541 default: 542 return IMAGE_FILE_MACHINE_UNKNOWN; 543 } 544} 545} // namespace coff 546} // namespace lld 547 548// Returns the last element of a path, which is supposed to be a filename. 549static StringRef getBasename(StringRef Path) { 550 return sys::path::filename(Path, sys::path::Style::windows); 551} 552 553// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". 554std::string lld::toString(const coff::InputFile *File) { 555 if (!File) 556 return "<internal>"; 557 if (File->ParentName.empty()) 558 return File->getName(); 559 560 return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) + 561 ")") 562 .str(); 563} 564