InputFiles.cpp revision 341825
1//===- InputFiles.cpp -----------------------------------------------------===//
2//
3//                             The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "InputFiles.h"
11#include "Chunks.h"
12#include "Config.h"
13#include "Driver.h"
14#include "SymbolTable.h"
15#include "Symbols.h"
16#include "lld/Common/ErrorHandler.h"
17#include "lld/Common/Memory.h"
18#include "llvm-c/lto.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/Triple.h"
21#include "llvm/ADT/Twine.h"
22#include "llvm/BinaryFormat/COFF.h"
23#include "llvm/Object/Binary.h"
24#include "llvm/Object/COFF.h"
25#include "llvm/Support/Casting.h"
26#include "llvm/Support/Endian.h"
27#include "llvm/Support/Error.h"
28#include "llvm/Support/ErrorOr.h"
29#include "llvm/Support/FileSystem.h"
30#include "llvm/Support/Path.h"
31#include "llvm/Target/TargetOptions.h"
32#include <cstring>
33#include <system_error>
34#include <utility>
35
36using namespace llvm;
37using namespace llvm::COFF;
38using namespace llvm::object;
39using namespace llvm::support::endian;
40
41using llvm::Triple;
42using llvm::support::ulittle32_t;
43
44namespace lld {
45namespace coff {
46
47std::vector<ObjFile *> ObjFile::Instances;
48std::vector<ImportFile *> ImportFile::Instances;
49std::vector<BitcodeFile *> BitcodeFile::Instances;
50
51/// Checks that Source is compatible with being a weak alias to Target.
52/// If Source is Undefined and has no weak alias set, makes it a weak
53/// alias to Target.
54static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F,
55                                 Symbol *Source, Symbol *Target) {
56  if (auto *U = dyn_cast<Undefined>(Source)) {
57    if (U->WeakAlias && U->WeakAlias != Target)
58      Symtab->reportDuplicate(Source, F);
59    U->WeakAlias = Target;
60  }
61}
62
63ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
64
65void ArchiveFile::parse() {
66  // Parse a MemoryBufferRef as an archive file.
67  File = CHECK(Archive::create(MB), this);
68
69  // Read the symbol table to construct Lazy objects.
70  for (const Archive::Symbol &Sym : File->symbols())
71    Symtab->addLazy(this, Sym);
72}
73
74// Returns a buffer pointing to a member file containing a given symbol.
75void ArchiveFile::addMember(const Archive::Symbol *Sym) {
76  const Archive::Child &C =
77      CHECK(Sym->getMember(),
78            "could not get the member for symbol " + Sym->getName());
79
80  // Return an empty buffer if we have already returned the same buffer.
81  if (!Seen.insert(C.getChildOffset()).second)
82    return;
83
84  Driver->enqueueArchiveMember(C, Sym->getName(), getName());
85}
86
87std::vector<MemoryBufferRef> getArchiveMembers(Archive *File) {
88  std::vector<MemoryBufferRef> V;
89  Error Err = Error::success();
90  for (const ErrorOr<Archive::Child> &COrErr : File->children(Err)) {
91    Archive::Child C =
92        CHECK(COrErr,
93              File->getFileName() + ": could not get the child of the archive");
94    MemoryBufferRef MBRef =
95        CHECK(C.getMemoryBufferRef(),
96              File->getFileName() +
97                  ": could not get the buffer for a child of the archive");
98    V.push_back(MBRef);
99  }
100  if (Err)
101    fatal(File->getFileName() +
102          ": Archive::children failed: " + toString(std::move(Err)));
103  return V;
104}
105
106void ObjFile::parse() {
107  // Parse a memory buffer as a COFF file.
108  std::unique_ptr<Binary> Bin = CHECK(createBinary(MB), this);
109
110  if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) {
111    Bin.release();
112    COFFObj.reset(Obj);
113  } else {
114    fatal(toString(this) + " is not a COFF file");
115  }
116
117  // Read section and symbol tables.
118  initializeChunks();
119  initializeSymbols();
120}
121
122// We set SectionChunk pointers in the SparseChunks vector to this value
123// temporarily to mark comdat sections as having an unknown resolution. As we
124// walk the object file's symbol table, once we visit either a leader symbol or
125// an associative section definition together with the parent comdat's leader,
126// we set the pointer to either nullptr (to mark the section as discarded) or a
127// valid SectionChunk for that section.
128static SectionChunk *const PendingComdat = reinterpret_cast<SectionChunk *>(1);
129
130void ObjFile::initializeChunks() {
131  uint32_t NumSections = COFFObj->getNumberOfSections();
132  Chunks.reserve(NumSections);
133  SparseChunks.resize(NumSections + 1);
134  for (uint32_t I = 1; I < NumSections + 1; ++I) {
135    const coff_section *Sec;
136    if (auto EC = COFFObj->getSection(I, Sec))
137      fatal("getSection failed: #" + Twine(I) + ": " + EC.message());
138
139    if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
140      SparseChunks[I] = PendingComdat;
141    else
142      SparseChunks[I] = readSection(I, nullptr, "");
143  }
144}
145
146SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
147                                   const coff_aux_section_definition *Def,
148                                   StringRef LeaderName) {
149  const coff_section *Sec;
150  StringRef Name;
151  if (auto EC = COFFObj->getSection(SectionNumber, Sec))
152    fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message());
153  if (auto EC = COFFObj->getSectionName(Sec, Name))
154    fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " +
155          EC.message());
156
157  if (Name == ".drectve") {
158    ArrayRef<uint8_t> Data;
159    COFFObj->getSectionContents(Sec, Data);
160    Directives = std::string((const char *)Data.data(), Data.size());
161    return nullptr;
162  }
163
164  // Object files may have DWARF debug info or MS CodeView debug info
165  // (or both).
166  //
167  // DWARF sections don't need any special handling from the perspective
168  // of the linker; they are just a data section containing relocations.
169  // We can just link them to complete debug info.
170  //
171  // CodeView needs a linker support. We need to interpret and debug
172  // info, and then write it to a separate .pdb file.
173
174  // Ignore DWARF debug info unless /debug is given.
175  if (!Config->Debug && Name.startswith(".debug_"))
176    return nullptr;
177
178  if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
179    return nullptr;
180  auto *C = make<SectionChunk>(this, Sec);
181  if (Def)
182    C->Checksum = Def->CheckSum;
183
184  // CodeView sections are stored to a different vector because they are not
185  // linked in the regular manner.
186  if (C->isCodeView())
187    DebugChunks.push_back(C);
188  else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gfids$y")
189    GuardFidChunks.push_back(C);
190  else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gljmp$y")
191    GuardLJmpChunks.push_back(C);
192  else if (Name == ".sxdata")
193    SXDataChunks.push_back(C);
194  else if (Config->TailMerge && Sec->NumberOfRelocations == 0 &&
195           Name == ".rdata" && LeaderName.startswith("??_C@"))
196    // COFF sections that look like string literal sections (i.e. no
197    // relocations, in .rdata, leader symbol name matches the MSVC name mangling
198    // for string literals) are subject to string tail merging.
199    MergeChunk::addSection(C);
200  else
201    Chunks.push_back(C);
202
203  return C;
204}
205
206void ObjFile::readAssociativeDefinition(
207    COFFSymbolRef Sym, const coff_aux_section_definition *Def) {
208  readAssociativeDefinition(Sym, Def, Def->getNumber(Sym.isBigObj()));
209}
210
211void ObjFile::readAssociativeDefinition(COFFSymbolRef Sym,
212                                        const coff_aux_section_definition *Def,
213                                        uint32_t ParentSection) {
214  SectionChunk *Parent = SparseChunks[ParentSection];
215
216  // If the parent is pending, it probably means that its section definition
217  // appears after us in the symbol table. Leave the associated section as
218  // pending; we will handle it during the second pass in initializeSymbols().
219  if (Parent == PendingComdat)
220    return;
221
222  // Check whether the parent is prevailing. If it is, so are we, and we read
223  // the section; otherwise mark it as discarded.
224  int32_t SectionNumber = Sym.getSectionNumber();
225  if (Parent) {
226    SparseChunks[SectionNumber] = readSection(SectionNumber, Def, "");
227    if (SparseChunks[SectionNumber])
228      Parent->addAssociative(SparseChunks[SectionNumber]);
229  } else {
230    SparseChunks[SectionNumber] = nullptr;
231  }
232}
233
234void ObjFile::recordPrevailingSymbolForMingw(
235    COFFSymbolRef Sym, DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
236  // For comdat symbols in executable sections, where this is the copy
237  // of the section chunk we actually include instead of discarding it,
238  // add the symbol to a map to allow using it for implicitly
239  // associating .[px]data$<func> sections to it.
240  int32_t SectionNumber = Sym.getSectionNumber();
241  SectionChunk *SC = SparseChunks[SectionNumber];
242  if (SC && SC->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
243    StringRef Name;
244    COFFObj->getSymbolName(Sym, Name);
245    PrevailingSectionMap[Name] = SectionNumber;
246  }
247}
248
249void ObjFile::maybeAssociateSEHForMingw(
250    COFFSymbolRef Sym, const coff_aux_section_definition *Def,
251    const DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
252  StringRef Name;
253  COFFObj->getSymbolName(Sym, Name);
254  if (Name.consume_front(".pdata$") || Name.consume_front(".xdata$")) {
255    // For MinGW, treat .[px]data$<func> as implicitly associative to
256    // the symbol <func>.
257    auto ParentSym = PrevailingSectionMap.find(Name);
258    if (ParentSym != PrevailingSectionMap.end())
259      readAssociativeDefinition(Sym, Def, ParentSym->second);
260  }
261}
262
263Symbol *ObjFile::createRegular(COFFSymbolRef Sym) {
264  SectionChunk *SC = SparseChunks[Sym.getSectionNumber()];
265  if (Sym.isExternal()) {
266    StringRef Name;
267    COFFObj->getSymbolName(Sym, Name);
268    if (SC)
269      return Symtab->addRegular(this, Name, Sym.getGeneric(), SC);
270    return Symtab->addUndefined(Name, this, false);
271  }
272  if (SC)
273    return make<DefinedRegular>(this, /*Name*/ "", false,
274                                /*IsExternal*/ false, Sym.getGeneric(), SC);
275  return nullptr;
276}
277
278void ObjFile::initializeSymbols() {
279  uint32_t NumSymbols = COFFObj->getNumberOfSymbols();
280  Symbols.resize(NumSymbols);
281
282  SmallVector<std::pair<Symbol *, uint32_t>, 8> WeakAliases;
283  std::vector<uint32_t> PendingIndexes;
284  PendingIndexes.reserve(NumSymbols);
285
286  DenseMap<StringRef, uint32_t> PrevailingSectionMap;
287  std::vector<const coff_aux_section_definition *> ComdatDefs(
288      COFFObj->getNumberOfSections() + 1);
289
290  for (uint32_t I = 0; I < NumSymbols; ++I) {
291    COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I));
292    bool PrevailingComdat;
293    if (COFFSym.isUndefined()) {
294      Symbols[I] = createUndefined(COFFSym);
295    } else if (COFFSym.isWeakExternal()) {
296      Symbols[I] = createUndefined(COFFSym);
297      uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex;
298      WeakAliases.emplace_back(Symbols[I], TagIndex);
299    } else if (Optional<Symbol *> OptSym =
300                   createDefined(COFFSym, ComdatDefs, PrevailingComdat)) {
301      Symbols[I] = *OptSym;
302      if (Config->MinGW && PrevailingComdat)
303        recordPrevailingSymbolForMingw(COFFSym, PrevailingSectionMap);
304    } else {
305      // createDefined() returns None if a symbol belongs to a section that
306      // was pending at the point when the symbol was read. This can happen in
307      // two cases:
308      // 1) section definition symbol for a comdat leader;
309      // 2) symbol belongs to a comdat section associated with a section whose
310      //    section definition symbol appears later in the symbol table.
311      // In both of these cases, we can expect the section to be resolved by
312      // the time we finish visiting the remaining symbols in the symbol
313      // table. So we postpone the handling of this symbol until that time.
314      PendingIndexes.push_back(I);
315    }
316    I += COFFSym.getNumberOfAuxSymbols();
317  }
318
319  for (uint32_t I : PendingIndexes) {
320    COFFSymbolRef Sym = check(COFFObj->getSymbol(I));
321    if (auto *Def = Sym.getSectionDefinition()) {
322      if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
323        readAssociativeDefinition(Sym, Def);
324      else if (Config->MinGW)
325        maybeAssociateSEHForMingw(Sym, Def, PrevailingSectionMap);
326    }
327    if (SparseChunks[Sym.getSectionNumber()] == PendingComdat) {
328      StringRef Name;
329      COFFObj->getSymbolName(Sym, Name);
330      log("comdat section " + Name +
331          " without leader and unassociated, discarding");
332      continue;
333    }
334    Symbols[I] = createRegular(Sym);
335  }
336
337  for (auto &KV : WeakAliases) {
338    Symbol *Sym = KV.first;
339    uint32_t Idx = KV.second;
340    checkAndSetWeakAlias(Symtab, this, Sym, Symbols[Idx]);
341  }
342}
343
344Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) {
345  StringRef Name;
346  COFFObj->getSymbolName(Sym, Name);
347  return Symtab->addUndefined(Name, this, Sym.isWeakExternal());
348}
349
350Optional<Symbol *> ObjFile::createDefined(
351    COFFSymbolRef Sym,
352    std::vector<const coff_aux_section_definition *> &ComdatDefs,
353    bool &Prevailing) {
354  Prevailing = false;
355  auto GetName = [&]() {
356    StringRef S;
357    COFFObj->getSymbolName(Sym, S);
358    return S;
359  };
360
361  if (Sym.isCommon()) {
362    auto *C = make<CommonChunk>(Sym);
363    Chunks.push_back(C);
364    return Symtab->addCommon(this, GetName(), Sym.getValue(), Sym.getGeneric(),
365                             C);
366  }
367
368  if (Sym.isAbsolute()) {
369    StringRef Name = GetName();
370
371    // Skip special symbols.
372    if (Name == "@comp.id")
373      return nullptr;
374    if (Name == "@feat.00") {
375      Feat00Flags = Sym.getValue();
376      return nullptr;
377    }
378
379    if (Sym.isExternal())
380      return Symtab->addAbsolute(Name, Sym);
381    return make<DefinedAbsolute>(Name, Sym);
382  }
383
384  int32_t SectionNumber = Sym.getSectionNumber();
385  if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
386    return nullptr;
387
388  if (llvm::COFF::isReservedSectionNumber(SectionNumber))
389    fatal(toString(this) + ": " + GetName() +
390          " should not refer to special section " + Twine(SectionNumber));
391
392  if ((uint32_t)SectionNumber >= SparseChunks.size())
393    fatal(toString(this) + ": " + GetName() +
394          " should not refer to non-existent section " + Twine(SectionNumber));
395
396  // Handle comdat leader symbols.
397  if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) {
398    ComdatDefs[SectionNumber] = nullptr;
399    Symbol *Leader;
400    if (Sym.isExternal()) {
401      std::tie(Leader, Prevailing) =
402          Symtab->addComdat(this, GetName(), Sym.getGeneric());
403    } else {
404      Leader = make<DefinedRegular>(this, /*Name*/ "", false,
405                                    /*IsExternal*/ false, Sym.getGeneric());
406      Prevailing = true;
407    }
408
409    if (Prevailing) {
410      SectionChunk *C = readSection(SectionNumber, Def, GetName());
411      SparseChunks[SectionNumber] = C;
412      C->Sym = cast<DefinedRegular>(Leader);
413      cast<DefinedRegular>(Leader)->Data = &C->Repl;
414    } else {
415      SparseChunks[SectionNumber] = nullptr;
416    }
417    return Leader;
418  }
419
420  // Read associative section definitions and prepare to handle the comdat
421  // leader symbol by setting the section's ComdatDefs pointer if we encounter a
422  // non-associative comdat.
423  if (SparseChunks[SectionNumber] == PendingComdat) {
424    if (auto *Def = Sym.getSectionDefinition()) {
425      if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
426        readAssociativeDefinition(Sym, Def);
427      else
428        ComdatDefs[SectionNumber] = Def;
429    }
430  }
431
432  if (SparseChunks[SectionNumber] == PendingComdat)
433    return None;
434  return createRegular(Sym);
435}
436
437MachineTypes ObjFile::getMachineType() {
438  if (COFFObj)
439    return static_cast<MachineTypes>(COFFObj->getMachine());
440  return IMAGE_FILE_MACHINE_UNKNOWN;
441}
442
443StringRef ltrim1(StringRef S, const char *Chars) {
444  if (!S.empty() && strchr(Chars, S[0]))
445    return S.substr(1);
446  return S;
447}
448
449void ImportFile::parse() {
450  const char *Buf = MB.getBufferStart();
451  const char *End = MB.getBufferEnd();
452  const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf);
453
454  // Check if the total size is valid.
455  if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData))
456    fatal("broken import library");
457
458  // Read names and create an __imp_ symbol.
459  StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr)));
460  StringRef ImpName = Saver.save("__imp_" + Name);
461  const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1;
462  DLLName = StringRef(NameStart);
463  StringRef ExtName;
464  switch (Hdr->getNameType()) {
465  case IMPORT_ORDINAL:
466    ExtName = "";
467    break;
468  case IMPORT_NAME:
469    ExtName = Name;
470    break;
471  case IMPORT_NAME_NOPREFIX:
472    ExtName = ltrim1(Name, "?@_");
473    break;
474  case IMPORT_NAME_UNDECORATE:
475    ExtName = ltrim1(Name, "?@_");
476    ExtName = ExtName.substr(0, ExtName.find('@'));
477    break;
478  }
479
480  this->Hdr = Hdr;
481  ExternalName = ExtName;
482
483  ImpSym = Symtab->addImportData(ImpName, this);
484
485  if (Hdr->getType() == llvm::COFF::IMPORT_CONST)
486    static_cast<void>(Symtab->addImportData(Name, this));
487
488  // If type is function, we need to create a thunk which jump to an
489  // address pointed by the __imp_ symbol. (This allows you to call
490  // DLL functions just like regular non-DLL functions.)
491  if (Hdr->getType() == llvm::COFF::IMPORT_CODE)
492    ThunkSym = Symtab->addImportThunk(
493        Name, cast_or_null<DefinedImportData>(ImpSym), Hdr->Machine);
494}
495
496void BitcodeFile::parse() {
497  Obj = check(lto::InputFile::create(MemoryBufferRef(
498      MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier()))));
499  std::vector<std::pair<Symbol *, bool>> Comdat(Obj->getComdatTable().size());
500  for (size_t I = 0; I != Obj->getComdatTable().size(); ++I)
501    Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I]));
502  for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) {
503    StringRef SymName = Saver.save(ObjSym.getName());
504    int ComdatIndex = ObjSym.getComdatIndex();
505    Symbol *Sym;
506    if (ObjSym.isUndefined()) {
507      Sym = Symtab->addUndefined(SymName, this, false);
508    } else if (ObjSym.isCommon()) {
509      Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize());
510    } else if (ObjSym.isWeak() && ObjSym.isIndirect()) {
511      // Weak external.
512      Sym = Symtab->addUndefined(SymName, this, true);
513      std::string Fallback = ObjSym.getCOFFWeakExternalFallback();
514      Symbol *Alias = Symtab->addUndefined(Saver.save(Fallback));
515      checkAndSetWeakAlias(Symtab, this, Sym, Alias);
516    } else if (ComdatIndex != -1) {
517      if (SymName == Obj->getComdatTable()[ComdatIndex])
518        Sym = Comdat[ComdatIndex].first;
519      else if (Comdat[ComdatIndex].second)
520        Sym = Symtab->addRegular(this, SymName);
521      else
522        Sym = Symtab->addUndefined(SymName, this, false);
523    } else {
524      Sym = Symtab->addRegular(this, SymName);
525    }
526    Symbols.push_back(Sym);
527  }
528  Directives = Obj->getCOFFLinkerOpts();
529}
530
531MachineTypes BitcodeFile::getMachineType() {
532  switch (Triple(Obj->getTargetTriple()).getArch()) {
533  case Triple::x86_64:
534    return AMD64;
535  case Triple::x86:
536    return I386;
537  case Triple::arm:
538    return ARMNT;
539  case Triple::aarch64:
540    return ARM64;
541  default:
542    return IMAGE_FILE_MACHINE_UNKNOWN;
543  }
544}
545} // namespace coff
546} // namespace lld
547
548// Returns the last element of a path, which is supposed to be a filename.
549static StringRef getBasename(StringRef Path) {
550  return sys::path::filename(Path, sys::path::Style::windows);
551}
552
553// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
554std::string lld::toString(const coff::InputFile *File) {
555  if (!File)
556    return "<internal>";
557  if (File->ParentName.empty())
558    return File->getName();
559
560  return (getBasename(File->ParentName) + "(" + getBasename(File->getName()) +
561          ")")
562      .str();
563}
564