InputFiles.cpp revision 327952
1//===- InputFiles.cpp -----------------------------------------------------===// 2// 3// The LLVM Linker 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "InputFiles.h" 11#include "Chunks.h" 12#include "Config.h" 13#include "Driver.h" 14#include "SymbolTable.h" 15#include "Symbols.h" 16#include "lld/Common/ErrorHandler.h" 17#include "lld/Common/Memory.h" 18#include "llvm-c/lto.h" 19#include "llvm/ADT/SmallVector.h" 20#include "llvm/ADT/Triple.h" 21#include "llvm/ADT/Twine.h" 22#include "llvm/BinaryFormat/COFF.h" 23#include "llvm/Object/Binary.h" 24#include "llvm/Object/COFF.h" 25#include "llvm/Support/Casting.h" 26#include "llvm/Support/Endian.h" 27#include "llvm/Support/Error.h" 28#include "llvm/Support/ErrorOr.h" 29#include "llvm/Support/FileSystem.h" 30#include "llvm/Target/TargetOptions.h" 31#include <cstring> 32#include <system_error> 33#include <utility> 34 35using namespace llvm; 36using namespace llvm::COFF; 37using namespace llvm::object; 38using namespace llvm::support::endian; 39 40using llvm::Triple; 41using llvm::support::ulittle32_t; 42 43namespace lld { 44namespace coff { 45 46std::vector<ObjFile *> ObjFile::Instances; 47std::vector<ImportFile *> ImportFile::Instances; 48std::vector<BitcodeFile *> BitcodeFile::Instances; 49 50/// Checks that Source is compatible with being a weak alias to Target. 51/// If Source is Undefined and has no weak alias set, makes it a weak 52/// alias to Target. 53static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F, 54 Symbol *Source, Symbol *Target) { 55 if (auto *U = dyn_cast<Undefined>(Source)) { 56 if (U->WeakAlias && U->WeakAlias != Target) 57 Symtab->reportDuplicate(Source, F); 58 U->WeakAlias = Target; 59 } 60} 61 62ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} 63 64void ArchiveFile::parse() { 65 // Parse a MemoryBufferRef as an archive file. 66 File = CHECK(Archive::create(MB), this); 67 68 // Read the symbol table to construct Lazy objects. 69 for (const Archive::Symbol &Sym : File->symbols()) 70 Symtab->addLazy(this, Sym); 71} 72 73// Returns a buffer pointing to a member file containing a given symbol. 74void ArchiveFile::addMember(const Archive::Symbol *Sym) { 75 const Archive::Child &C = 76 CHECK(Sym->getMember(), 77 "could not get the member for symbol " + Sym->getName()); 78 79 // Return an empty buffer if we have already returned the same buffer. 80 if (!Seen.insert(C.getChildOffset()).second) 81 return; 82 83 Driver->enqueueArchiveMember(C, Sym->getName(), getName()); 84} 85 86std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) { 87 std::vector<MemoryBufferRef> V; 88 Error Err = Error::success(); 89 for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) { 90 Archive::Child C = 91 CHECK(COrErr, 92 File->getFileName() + ": could not get the child of the archive"); 93 MemoryBufferRef MBRef = 94 CHECK(C.getMemoryBufferRef(), 95 File->getFileName() + 96 ": could not get the buffer for a child of the archive"); 97 V.push_back(MBRef); 98 } 99 if (Err) 100 fatal(File->getFileName() + 101 ": Archive::children failed: " + toString(std::move(Err))); 102 return V; 103} 104 105void ObjFile::parse() { 106 // Parse a memory buffer as a COFF file. 107 std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), this); 108 109 if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) { 110 Bin.release(); 111 COFFObj.reset(Obj); 112 } else { 113 fatal(toString(this) + " is not a COFF file"); 114 } 115 116 // Read section and symbol tables. 117 initializeChunks(); 118 initializeSymbols(); 119} 120 121// We set SectionChunk pointers in the SparseChunks vector to this value 122// temporarily to mark comdat sections as having an unknown resolution. As we 123// walk the object file's symbol table, once we visit either a leader symbol or 124// an associative section definition together with the parent comdat's leader, 125// we set the pointer to either nullptr (to mark the section as discarded) or a 126// valid SectionChunk for that section. 127static SectionChunk *const PendingComdat = reinterpret_cast<SectionChunk *>(1); 128 129void ObjFile::initializeChunks() { 130 uint32_t NumSections = COFFObj->getNumberOfSections(); 131 Chunks.reserve(NumSections); 132 SparseChunks.resize(NumSections + 1); 133 for (uint32_t I = 1; I < NumSections + 1; ++I) { 134 const coff_section *Sec; 135 if (auto EC = COFFObj->getSection(I, Sec)) 136 fatal("getSection failed: #" + Twine(I) + ": " + EC.message()); 137 138 if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT) 139 SparseChunks[I] = PendingComdat; 140 else 141 SparseChunks[I] = readSection(I, nullptr); 142 } 143} 144 145SectionChunk *ObjFile::readSection(uint32_t SectionNumber, 146 const coff_aux_section_definition *Def) { 147 const coff_section *Sec; 148 StringRef Name; 149 if (auto EC = COFFObj->getSection(SectionNumber, Sec)) 150 fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message()); 151 if (auto EC = COFFObj->getSectionName(Sec, Name)) 152 fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " + 153 EC.message()); 154 if (Name == ".sxdata") { 155 ArrayRef<uint8_t> Data; 156 COFFObj->getSectionContents(Sec, Data); 157 if (Data.size() % 4 != 0) 158 fatal(".sxdata must be an array of symbol table indices"); 159 SXData = {reinterpret_cast<const ulittle32_t *>(Data.data()), 160 Data.size() / 4}; 161 return nullptr; 162 } 163 if (Name == ".drectve") { 164 ArrayRef<uint8_t> Data; 165 COFFObj->getSectionContents(Sec, Data); 166 Directives = std::string((const char *)Data.data(), Data.size()); 167 return nullptr; 168 } 169 170 // Object files may have DWARF debug info or MS CodeView debug info 171 // (or both). 172 // 173 // DWARF sections don't need any special handling from the perspective 174 // of the linker; they are just a data section containing relocations. 175 // We can just link them to complete debug info. 176 // 177 // CodeView needs a linker support. We need to interpret and debug 178 // info, and then write it to a separate .pdb file. 179 180 // Ignore debug info unless /debug is given. 181 if (!Config->Debug && Name.startswith(".debug")) 182 return nullptr; 183 184 if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) 185 return nullptr; 186 auto *C = make<SectionChunk>(this, Sec); 187 if (Def) 188 C->Checksum = Def->CheckSum; 189 190 // CodeView sections are stored to a different vector because they are not 191 // linked in the regular manner. 192 if (C->isCodeView()) 193 DebugChunks.push_back(C); 194 else 195 Chunks.push_back(C); 196 197 return C; 198} 199 200void ObjFile::readAssociativeDefinition( 201 COFFSymbolRef Sym, const coff_aux_section_definition *Def) { 202 SectionChunk *Parent = SparseChunks[Def->getNumber(Sym.isBigObj())]; 203 204 // If the parent is pending, it probably means that its section definition 205 // appears after us in the symbol table. Leave the associated section as 206 // pending; we will handle it during the second pass in initializeSymbols(). 207 if (Parent == PendingComdat) 208 return; 209 210 // Check whether the parent is prevailing. If it is, so are we, and we read 211 // the section; otherwise mark it as discarded. 212 int32_t SectionNumber = Sym.getSectionNumber(); 213 if (Parent) { 214 SparseChunks[SectionNumber] = readSection(SectionNumber, Def); 215 if (SparseChunks[SectionNumber]) 216 Parent->addAssociative(SparseChunks[SectionNumber]); 217 } else { 218 SparseChunks[SectionNumber] = nullptr; 219 } 220} 221 222Symbol *ObjFile::createRegular(COFFSymbolRef Sym) { 223 SectionChunk *SC = SparseChunks[Sym.getSectionNumber()]; 224 if (Sym.isExternal()) { 225 StringRef Name; 226 COFFObj->getSymbolName(Sym, Name); 227 if (SC) 228 return Symtab->addRegular(this, Name, Sym.getGeneric(), SC); 229 return Symtab->addUndefined(Name, this, false); 230 } 231 if (SC) 232 return make<DefinedRegular>(this, /*Name*/ "", false, 233 /*IsExternal*/ false, Sym.getGeneric(), SC); 234 return nullptr; 235} 236 237void ObjFile::initializeSymbols() { 238 uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); 239 Symbols.resize(NumSymbols); 240 241 SmallVector<std::pair<Symbol *, uint32_t>, 8> WeakAliases; 242 std::vector<uint32_t> PendingIndexes; 243 PendingIndexes.reserve(NumSymbols); 244 245 std::vector<const coff_aux_section_definition *> ComdatDefs( 246 COFFObj->getNumberOfSections() + 1); 247 248 for (uint32_t I = 0; I < NumSymbols; ++I) { 249 COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I)); 250 if (COFFSym.isUndefined()) { 251 Symbols[I] = createUndefined(COFFSym); 252 } else if (COFFSym.isWeakExternal()) { 253 Symbols[I] = createUndefined(COFFSym); 254 uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex; 255 WeakAliases.emplace_back(Symbols[I], TagIndex); 256 } else if (Optional<Symbol *> OptSym = createDefined(COFFSym, ComdatDefs)) { 257 Symbols[I] = *OptSym; 258 } else { 259 // createDefined() returns None if a symbol belongs to a section that 260 // was pending at the point when the symbol was read. This can happen in 261 // two cases: 262 // 1) section definition symbol for a comdat leader; 263 // 2) symbol belongs to a comdat section associated with a section whose 264 // section definition symbol appears later in the symbol table. 265 // In both of these cases, we can expect the section to be resolved by 266 // the time we finish visiting the remaining symbols in the symbol 267 // table. So we postpone the handling of this symbol until that time. 268 PendingIndexes.push_back(I); 269 } 270 I += COFFSym.getNumberOfAuxSymbols(); 271 } 272 273 for (uint32_t I : PendingIndexes) { 274 COFFSymbolRef Sym = check(COFFObj->getSymbol(I)); 275 if (auto *Def = Sym.getSectionDefinition()) 276 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) 277 readAssociativeDefinition(Sym, Def); 278 Symbols[I] = createRegular(Sym); 279 } 280 281 for (auto &KV : WeakAliases) { 282 Symbol *Sym = KV.first; 283 uint32_t Idx = KV.second; 284 checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]); 285 } 286} 287 288Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) { 289 StringRef Name; 290 COFFObj->getSymbolName(Sym, Name); 291 return Symtab->addUndefined(Name, this, Sym.isWeakExternal()); 292} 293 294Optional<Symbol *> ObjFile::createDefined( 295 COFFSymbolRef Sym, 296 std::vector<const coff_aux_section_definition *> &ComdatDefs) { 297 StringRef Name; 298 if (Sym.isCommon()) { 299 auto *C = make<CommonChunk>(Sym); 300 Chunks.push_back(C); 301 COFFObj->getSymbolName(Sym, Name); 302 Symbol *S = 303 Symtab->addCommon(this, Name, Sym.getValue(), Sym.getGeneric(), C); 304 return S; 305 } 306 if (Sym.isAbsolute()) { 307 COFFObj->getSymbolName(Sym, Name); 308 // Skip special symbols. 309 if (Name == "@comp.id") 310 return nullptr; 311 // COFF spec 5.10.1. The .sxdata section. 312 if (Name == "@feat.00") { 313 if (Sym.getValue() & 1) 314 SEHCompat = true; 315 return nullptr; 316 } 317 if (Sym.isExternal()) 318 return Symtab->addAbsolute(Name, Sym); 319 else 320 return make<DefinedAbsolute>(Name, Sym); 321 } 322 int32_t SectionNumber = Sym.getSectionNumber(); 323 if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) 324 return nullptr; 325 326 // Reserved sections numbers don't have contents. 327 if (llvm::COFF::isReservedSectionNumber(SectionNumber)) 328 fatal("broken object file: " + toString(this)); 329 330 // This symbol references a section which is not present in the section 331 // header. 332 if ((uint32_t)SectionNumber >= SparseChunks.size()) 333 fatal("broken object file: " + toString(this)); 334 335 // Handle comdat leader symbols. 336 if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) { 337 ComdatDefs[SectionNumber] = nullptr; 338 Symbol *Leader; 339 bool Prevailing; 340 if (Sym.isExternal()) { 341 COFFObj->getSymbolName(Sym, Name); 342 std::tie(Leader, Prevailing) = 343 Symtab->addComdat(this, Name, Sym.getGeneric()); 344 } else { 345 Leader = make<DefinedRegular>(this, /*Name*/ "", false, 346 /*IsExternal*/ false, Sym.getGeneric()); 347 Prevailing = true; 348 } 349 if (Prevailing) { 350 SectionChunk *C = readSection(SectionNumber, Def); 351 SparseChunks[SectionNumber] = C; 352 C->Sym = cast<DefinedRegular>(Leader); 353 cast<DefinedRegular>(Leader)->Data = &C->Repl; 354 } else { 355 SparseChunks[SectionNumber] = nullptr; 356 } 357 return Leader; 358 } 359 360 // Read associative section definitions and prepare to handle the comdat 361 // leader symbol by setting the section's ComdatDefs pointer if we encounter a 362 // non-associative comdat. 363 if (SparseChunks[SectionNumber] == PendingComdat) { 364 if (auto *Def = Sym.getSectionDefinition()) { 365 if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) 366 readAssociativeDefinition(Sym, Def); 367 else 368 ComdatDefs[SectionNumber] = Def; 369 } 370 } 371 372 if (SparseChunks[SectionNumber] == PendingComdat) 373 return None; 374 return createRegular(Sym); 375} 376 377MachineTypes ObjFile::getMachineType() { 378 if (COFFObj) 379 return static_cast<MachineTypes>(COFFObj->getMachine()); 380 return IMAGE_FILE_MACHINE_UNKNOWN; 381} 382 383StringRef ltrim1(StringRef S, const char *Chars) { 384 if (!S.empty() && strchr(Chars, S[0])) 385 return S.substr(1); 386 return S; 387} 388 389void ImportFile::parse() { 390 const char *Buf = MB.getBufferStart(); 391 const char *End = MB.getBufferEnd(); 392 const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf); 393 394 // Check if the total size is valid. 395 if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData)) 396 fatal("broken import library"); 397 398 // Read names and create an __imp_ symbol. 399 StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr))); 400 StringRef ImpName = Saver.save("__imp_" + Name); 401 const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1; 402 DLLName = StringRef(NameStart); 403 StringRef ExtName; 404 switch (Hdr->getNameType()) { 405 case IMPORT_ORDINAL: 406 ExtName = ""; 407 break; 408 case IMPORT_NAME: 409 ExtName = Name; 410 break; 411 case IMPORT_NAME_NOPREFIX: 412 ExtName = ltrim1(Name, "?@_"); 413 break; 414 case IMPORT_NAME_UNDECORATE: 415 ExtName = ltrim1(Name, "?@_"); 416 ExtName = ExtName.substr(0, ExtName.find('@')); 417 break; 418 } 419 420 this->Hdr = Hdr; 421 ExternalName = ExtName; 422 423 ImpSym = Symtab->addImportData(ImpName, this); 424 425 if (Hdr->getType() == llvm::COFF::IMPORT_CONST) 426 static_cast<void>(Symtab->addImportData(Name, this)); 427 428 // If type is function, we need to create a thunk which jump to an 429 // address pointed by the __imp_ symbol. (This allows you to call 430 // DLL functions just like regular non-DLL functions.) 431 if (Hdr->getType() == llvm::COFF::IMPORT_CODE) 432 ThunkSym = Symtab->addImportThunk(Name, ImpSym, Hdr->Machine); 433} 434 435void BitcodeFile::parse() { 436 Obj = check(lto::InputFile::create(MemoryBufferRef( 437 MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier())))); 438 std::vector<std::pair<Symbol *, bool>> Comdat(Obj->getComdatTable().size()); 439 for (size_t I = 0; I != Obj->getComdatTable().size(); ++I) 440 Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I])); 441 for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) { 442 StringRef SymName = Saver.save(ObjSym.getName()); 443 int ComdatIndex = ObjSym.getComdatIndex(); 444 Symbol *Sym; 445 if (ObjSym.isUndefined()) { 446 Sym = Symtab->addUndefined(SymName, this, false); 447 } else if (ObjSym.isCommon()) { 448 Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize()); 449 } else if (ObjSym.isWeak() && ObjSym.isIndirect()) { 450 // Weak external. 451 Sym = Symtab->addUndefined(SymName, this, true); 452 std::string Fallback = ObjSym.getCOFFWeakExternalFallback(); 453 Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback)); 454 checkAndSetWeakAlias(Symtab, this, Sym, Alias); 455 } else if (ComdatIndex != -1) { 456 if (SymName == Obj->getComdatTable()[ComdatIndex]) 457 Sym = Comdat[ComdatIndex].first; 458 else if (Comdat[ComdatIndex].second) 459 Sym = Symtab->addRegular(this, SymName); 460 else 461 Sym = Symtab->addUndefined(SymName, this, false); 462 } else { 463 Sym = Symtab->addRegular(this, SymName); 464 } 465 SymbolBodies.push_back(Sym); 466 } 467 Directives = Obj->getCOFFLinkerOpts(); 468} 469 470MachineTypes BitcodeFile::getMachineType() { 471 switch (Triple(Obj->getTargetTriple()).getArch()) { 472 case Triple::x86_64: 473 return AMD64; 474 case Triple::x86: 475 return I386; 476 case Triple::arm: 477 return ARMNT; 478 case Triple::aarch64: 479 return ARM64; 480 default: 481 return IMAGE_FILE_MACHINE_UNKNOWN; 482 } 483} 484} // namespace coff 485} // namespace lld 486 487// Returns the last element of a path, which is supposed to be a filename. 488static StringRef getBasename(StringRef Path) { 489 size_t Pos = Path.find_last_of("\\/"); 490 if (Pos == StringRef::npos) 491 return Path; 492 return Path.substr(Pos + 1); 493} 494 495// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". 496std::string lld::toString(const coff::InputFile *File) { 497 if (!File) 498 return "<internal>"; 499 if (File->ParentName.empty()) 500 return File->getName(); 501 502 return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) + 503 ")") 504 .str(); 505} 506