InputFiles.cpp revision 353358
1//===- InputFiles.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "Chunks.h"
11#include "Config.h"
12#include "DebugTypes.h"
13#include "Driver.h"
14#include "SymbolTable.h"
15#include "Symbols.h"
16#include "lld/Common/ErrorHandler.h"
17#include "lld/Common/Memory.h"
18#include "llvm-c/lto.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/Triple.h"
21#include "llvm/ADT/Twine.h"
22#include "llvm/BinaryFormat/COFF.h"
23#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
24#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
25#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
26#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
27#include "llvm/Object/Binary.h"
28#include "llvm/Object/COFF.h"
29#include "llvm/Support/Casting.h"
30#include "llvm/Support/Endian.h"
31#include "llvm/Support/Error.h"
32#include "llvm/Support/ErrorOr.h"
33#include "llvm/Support/FileSystem.h"
34#include "llvm/Support/Path.h"
35#include "llvm/Target/TargetOptions.h"
36#include <cstring>
37#include <system_error>
38#include <utility>
39
40using namespace llvm;
41using namespace llvm::COFF;
42using namespace llvm::codeview;
43using namespace llvm::object;
44using namespace llvm::support::endian;
45
46using llvm::Triple;
47using llvm::support::ulittle32_t;
48
49namespace lld {
50namespace coff {
51
52std::vector<ObjFile *> ObjFile::instances;
53std::vector<ImportFile *> ImportFile::instances;
54std::vector<BitcodeFile *> BitcodeFile::instances;
55
56/// Checks that Source is compatible with being a weak alias to Target.
57/// If Source is Undefined and has no weak alias set, makes it a weak
58/// alias to Target.
59static void checkAndSetWeakAlias(SymbolTable *symtab, InputFile *f,
60                                 Symbol *source, Symbol *target) {
61  if (auto *u = dyn_cast<Undefined>(source)) {
62    if (u->weakAlias && u->weakAlias != target) {
63      // Weak aliases as produced by GCC are named in the form
64      // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
65      // of another symbol emitted near the weak symbol.
66      // Just use the definition from the first object file that defined
67      // this weak symbol.
68      if (config->mingw)
69        return;
70      symtab->reportDuplicate(source, f);
71    }
72    u->weakAlias = target;
73  }
74}
75
76ArchiveFile::ArchiveFile(MemoryBufferRef m) : InputFile(ArchiveKind, m) {}
77
78void ArchiveFile::parse() {
79  // Parse a MemoryBufferRef as an archive file.
80  file = CHECK(Archive::create(mb), this);
81
82  // Read the symbol table to construct Lazy objects.
83  for (const Archive::Symbol &sym : file->symbols())
84    symtab->addLazy(this, sym);
85}
86
87// Returns a buffer pointing to a member file containing a given symbol.
88void ArchiveFile::addMember(const Archive::Symbol &sym) {
89  const Archive::Child &c =
90      CHECK(sym.getMember(),
91            "could not get the member for symbol " + toCOFFString(sym));
92
93  // Return an empty buffer if we have already returned the same buffer.
94  if (!seen.insert(c.getChildOffset()).second)
95    return;
96
97  driver->enqueueArchiveMember(c, sym, getName());
98}
99
100std::vector<MemoryBufferRef> getArchiveMembers(Archive *file) {
101  std::vector<MemoryBufferRef> v;
102  Error err = Error::success();
103  for (const ErrorOr<Archive::Child> &cOrErr : file->children(err)) {
104    Archive::Child c =
105        CHECK(cOrErr,
106              file->getFileName() + ": could not get the child of the archive");
107    MemoryBufferRef mbref =
108        CHECK(c.getMemoryBufferRef(),
109              file->getFileName() +
110                  ": could not get the buffer for a child of the archive");
111    v.push_back(mbref);
112  }
113  if (err)
114    fatal(file->getFileName() +
115          ": Archive::children failed: " + toString(std::move(err)));
116  return v;
117}
118
119void ObjFile::parse() {
120  // Parse a memory buffer as a COFF file.
121  std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
122
123  if (auto *obj = dyn_cast<COFFObjectFile>(bin.get())) {
124    bin.release();
125    coffObj.reset(obj);
126  } else {
127    fatal(toString(this) + " is not a COFF file");
128  }
129
130  // Read section and symbol tables.
131  initializeChunks();
132  initializeSymbols();
133  initializeFlags();
134  initializeDependencies();
135}
136
137const coff_section* ObjFile::getSection(uint32_t i) {
138  const coff_section *sec;
139  if (auto ec = coffObj->getSection(i, sec))
140    fatal("getSection failed: #" + Twine(i) + ": " + ec.message());
141  return sec;
142}
143
144// We set SectionChunk pointers in the SparseChunks vector to this value
145// temporarily to mark comdat sections as having an unknown resolution. As we
146// walk the object file's symbol table, once we visit either a leader symbol or
147// an associative section definition together with the parent comdat's leader,
148// we set the pointer to either nullptr (to mark the section as discarded) or a
149// valid SectionChunk for that section.
150static SectionChunk *const pendingComdat = reinterpret_cast<SectionChunk *>(1);
151
152void ObjFile::initializeChunks() {
153  uint32_t numSections = coffObj->getNumberOfSections();
154  chunks.reserve(numSections);
155  sparseChunks.resize(numSections + 1);
156  for (uint32_t i = 1; i < numSections + 1; ++i) {
157    const coff_section *sec = getSection(i);
158    if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
159      sparseChunks[i] = pendingComdat;
160    else
161      sparseChunks[i] = readSection(i, nullptr, "");
162  }
163}
164
165SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
166                                   const coff_aux_section_definition *def,
167                                   StringRef leaderName) {
168  const coff_section *sec = getSection(sectionNumber);
169
170  StringRef name;
171  if (Expected<StringRef> e = coffObj->getSectionName(sec))
172    name = *e;
173  else
174    fatal("getSectionName failed: #" + Twine(sectionNumber) + ": " +
175          toString(e.takeError()));
176
177  if (name == ".drectve") {
178    ArrayRef<uint8_t> data;
179    cantFail(coffObj->getSectionContents(sec, data));
180    directives = StringRef((const char *)data.data(), data.size());
181    return nullptr;
182  }
183
184  if (name == ".llvm_addrsig") {
185    addrsigSec = sec;
186    return nullptr;
187  }
188
189  // Object files may have DWARF debug info or MS CodeView debug info
190  // (or both).
191  //
192  // DWARF sections don't need any special handling from the perspective
193  // of the linker; they are just a data section containing relocations.
194  // We can just link them to complete debug info.
195  //
196  // CodeView needs linker support. We need to interpret debug info,
197  // and then write it to a separate .pdb file.
198
199  // Ignore DWARF debug info unless /debug is given.
200  if (!config->debug && name.startswith(".debug_"))
201    return nullptr;
202
203  if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
204    return nullptr;
205  auto *c = make<SectionChunk>(this, sec);
206  if (def)
207    c->checksum = def->CheckSum;
208
209  // link.exe uses the presence of .rsrc$01 for LNK4078, so match that.
210  if (name == ".rsrc$01")
211    isResourceObjFile = true;
212
213  // CodeView sections are stored to a different vector because they are not
214  // linked in the regular manner.
215  if (c->isCodeView())
216    debugChunks.push_back(c);
217  else if (name == ".gfids$y")
218    guardFidChunks.push_back(c);
219  else if (name == ".gljmp$y")
220    guardLJmpChunks.push_back(c);
221  else if (name == ".sxdata")
222    sXDataChunks.push_back(c);
223  else if (config->tailMerge && sec->NumberOfRelocations == 0 &&
224           name == ".rdata" && leaderName.startswith("??_C@"))
225    // COFF sections that look like string literal sections (i.e. no
226    // relocations, in .rdata, leader symbol name matches the MSVC name mangling
227    // for string literals) are subject to string tail merging.
228    MergeChunk::addSection(c);
229  else
230    chunks.push_back(c);
231
232  return c;
233}
234
235void ObjFile::readAssociativeDefinition(
236    COFFSymbolRef sym, const coff_aux_section_definition *def) {
237  readAssociativeDefinition(sym, def, def->getNumber(sym.isBigObj()));
238}
239
240void ObjFile::readAssociativeDefinition(COFFSymbolRef sym,
241                                        const coff_aux_section_definition *def,
242                                        uint32_t parentIndex) {
243  SectionChunk *parent = sparseChunks[parentIndex];
244  int32_t sectionNumber = sym.getSectionNumber();
245
246  auto diag = [&]() {
247    StringRef name, parentName;
248    coffObj->getSymbolName(sym, name);
249
250    const coff_section *parentSec = getSection(parentIndex);
251    if (Expected<StringRef> e = coffObj->getSectionName(parentSec))
252      parentName = *e;
253    error(toString(this) + ": associative comdat " + name + " (sec " +
254          Twine(sectionNumber) + ") has invalid reference to section " +
255          parentName + " (sec " + Twine(parentIndex) + ")");
256  };
257
258  if (parent == pendingComdat) {
259    // This can happen if an associative comdat refers to another associative
260    // comdat that appears after it (invalid per COFF spec) or to a section
261    // without any symbols.
262    diag();
263    return;
264  }
265
266  // Check whether the parent is prevailing. If it is, so are we, and we read
267  // the section; otherwise mark it as discarded.
268  if (parent) {
269    SectionChunk *c = readSection(sectionNumber, def, "");
270    sparseChunks[sectionNumber] = c;
271    if (c) {
272      c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE;
273      parent->addAssociative(c);
274    }
275  } else {
276    sparseChunks[sectionNumber] = nullptr;
277  }
278}
279
280void ObjFile::recordPrevailingSymbolForMingw(
281    COFFSymbolRef sym, DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
282  // For comdat symbols in executable sections, where this is the copy
283  // of the section chunk we actually include instead of discarding it,
284  // add the symbol to a map to allow using it for implicitly
285  // associating .[px]data$<func> sections to it.
286  int32_t sectionNumber = sym.getSectionNumber();
287  SectionChunk *sc = sparseChunks[sectionNumber];
288  if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
289    StringRef name;
290    coffObj->getSymbolName(sym, name);
291    if (getMachineType() == I386)
292      name.consume_front("_");
293    prevailingSectionMap[name] = sectionNumber;
294  }
295}
296
297void ObjFile::maybeAssociateSEHForMingw(
298    COFFSymbolRef sym, const coff_aux_section_definition *def,
299    const DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
300  StringRef name;
301  coffObj->getSymbolName(sym, name);
302  if (name.consume_front(".pdata$") || name.consume_front(".xdata$") ||
303      name.consume_front(".eh_frame$")) {
304    // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
305    // associative to the symbol <func>.
306    auto parentSym = prevailingSectionMap.find(name);
307    if (parentSym != prevailingSectionMap.end())
308      readAssociativeDefinition(sym, def, parentSym->second);
309  }
310}
311
312Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
313  SectionChunk *sc = sparseChunks[sym.getSectionNumber()];
314  if (sym.isExternal()) {
315    StringRef name;
316    coffObj->getSymbolName(sym, name);
317    if (sc)
318      return symtab->addRegular(this, name, sym.getGeneric(), sc);
319    // For MinGW symbols named .weak.* that point to a discarded section,
320    // don't create an Undefined symbol. If nothing ever refers to the symbol,
321    // everything should be fine. If something actually refers to the symbol
322    // (e.g. the undefined weak alias), linking will fail due to undefined
323    // references at the end.
324    if (config->mingw && name.startswith(".weak."))
325      return nullptr;
326    return symtab->addUndefined(name, this, false);
327  }
328  if (sc)
329    return make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
330                                /*IsExternal*/ false, sym.getGeneric(), sc);
331  return nullptr;
332}
333
334void ObjFile::initializeSymbols() {
335  uint32_t numSymbols = coffObj->getNumberOfSymbols();
336  symbols.resize(numSymbols);
337
338  SmallVector<std::pair<Symbol *, uint32_t>, 8> weakAliases;
339  std::vector<uint32_t> pendingIndexes;
340  pendingIndexes.reserve(numSymbols);
341
342  DenseMap<StringRef, uint32_t> prevailingSectionMap;
343  std::vector<const coff_aux_section_definition *> comdatDefs(
344      coffObj->getNumberOfSections() + 1);
345
346  for (uint32_t i = 0; i < numSymbols; ++i) {
347    COFFSymbolRef coffSym = check(coffObj->getSymbol(i));
348    bool prevailingComdat;
349    if (coffSym.isUndefined()) {
350      symbols[i] = createUndefined(coffSym);
351    } else if (coffSym.isWeakExternal()) {
352      symbols[i] = createUndefined(coffSym);
353      uint32_t tagIndex = coffSym.getAux<coff_aux_weak_external>()->TagIndex;
354      weakAliases.emplace_back(symbols[i], tagIndex);
355    } else if (Optional<Symbol *> optSym =
356                   createDefined(coffSym, comdatDefs, prevailingComdat)) {
357      symbols[i] = *optSym;
358      if (config->mingw && prevailingComdat)
359        recordPrevailingSymbolForMingw(coffSym, prevailingSectionMap);
360    } else {
361      // createDefined() returns None if a symbol belongs to a section that
362      // was pending at the point when the symbol was read. This can happen in
363      // two cases:
364      // 1) section definition symbol for a comdat leader;
365      // 2) symbol belongs to a comdat section associated with another section.
366      // In both of these cases, we can expect the section to be resolved by
367      // the time we finish visiting the remaining symbols in the symbol
368      // table. So we postpone the handling of this symbol until that time.
369      pendingIndexes.push_back(i);
370    }
371    i += coffSym.getNumberOfAuxSymbols();
372  }
373
374  for (uint32_t i : pendingIndexes) {
375    COFFSymbolRef sym = check(coffObj->getSymbol(i));
376    if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
377      if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
378        readAssociativeDefinition(sym, def);
379      else if (config->mingw)
380        maybeAssociateSEHForMingw(sym, def, prevailingSectionMap);
381    }
382    if (sparseChunks[sym.getSectionNumber()] == pendingComdat) {
383      StringRef name;
384      coffObj->getSymbolName(sym, name);
385      log("comdat section " + name +
386          " without leader and unassociated, discarding");
387      continue;
388    }
389    symbols[i] = createRegular(sym);
390  }
391
392  for (auto &kv : weakAliases) {
393    Symbol *sym = kv.first;
394    uint32_t idx = kv.second;
395    checkAndSetWeakAlias(symtab, this, sym, symbols[idx]);
396  }
397}
398
399Symbol *ObjFile::createUndefined(COFFSymbolRef sym) {
400  StringRef name;
401  coffObj->getSymbolName(sym, name);
402  return symtab->addUndefined(name, this, sym.isWeakExternal());
403}
404
405void ObjFile::handleComdatSelection(COFFSymbolRef sym, COMDATType &selection,
406                                    bool &prevailing, DefinedRegular *leader) {
407  if (prevailing)
408    return;
409  // There's already an existing comdat for this symbol: `Leader`.
410  // Use the comdats's selection field to determine if the new
411  // symbol in `Sym` should be discarded, produce a duplicate symbol
412  // error, etc.
413
414  SectionChunk *leaderChunk = nullptr;
415  COMDATType leaderSelection = IMAGE_COMDAT_SELECT_ANY;
416
417  if (leader->data) {
418    leaderChunk = leader->getChunk();
419    leaderSelection = leaderChunk->selection;
420  } else {
421    // FIXME: comdats from LTO files don't know their selection; treat them
422    // as "any".
423    selection = leaderSelection;
424  }
425
426  if ((selection == IMAGE_COMDAT_SELECT_ANY &&
427       leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) ||
428      (selection == IMAGE_COMDAT_SELECT_LARGEST &&
429       leaderSelection == IMAGE_COMDAT_SELECT_ANY)) {
430    // cl.exe picks "any" for vftables when building with /GR- and
431    // "largest" when building with /GR. To be able to link object files
432    // compiled with each flag, "any" and "largest" are merged as "largest".
433    leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST;
434  }
435
436  // Other than that, comdat selections must match.  This is a bit more
437  // strict than link.exe which allows merging "any" and "largest" if "any"
438  // is the first symbol the linker sees, and it allows merging "largest"
439  // with everything (!) if "largest" is the first symbol the linker sees.
440  // Making this symmetric independent of which selection is seen first
441  // seems better though.
442  // (This behavior matches ModuleLinker::getComdatResult().)
443  if (selection != leaderSelection) {
444    log(("conflicting comdat type for " + toString(*leader) + ": " +
445         Twine((int)leaderSelection) + " in " + toString(leader->getFile()) +
446         " and " + Twine((int)selection) + " in " + toString(this))
447            .str());
448    symtab->reportDuplicate(leader, this);
449    return;
450  }
451
452  switch (selection) {
453  case IMAGE_COMDAT_SELECT_NODUPLICATES:
454    symtab->reportDuplicate(leader, this);
455    break;
456
457  case IMAGE_COMDAT_SELECT_ANY:
458    // Nothing to do.
459    break;
460
461  case IMAGE_COMDAT_SELECT_SAME_SIZE:
462    if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData)
463      symtab->reportDuplicate(leader, this);
464    break;
465
466  case IMAGE_COMDAT_SELECT_EXACT_MATCH: {
467    SectionChunk newChunk(this, getSection(sym));
468    // link.exe only compares section contents here and doesn't complain
469    // if the two comdat sections have e.g. different alignment.
470    // Match that.
471    if (leaderChunk->getContents() != newChunk.getContents())
472      symtab->reportDuplicate(leader, this);
473    break;
474  }
475
476  case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
477    // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
478    // (This means lld-link doesn't produce duplicate symbol errors for
479    // associative comdats while link.exe does, but associate comdats
480    // are never extern in practice.)
481    llvm_unreachable("createDefined not called for associative comdats");
482
483  case IMAGE_COMDAT_SELECT_LARGEST:
484    if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) {
485      // Replace the existing comdat symbol with the new one.
486      StringRef name;
487      coffObj->getSymbolName(sym, name);
488      // FIXME: This is incorrect: With /opt:noref, the previous sections
489      // make it into the final executable as well. Correct handling would
490      // be to undo reading of the whole old section that's being replaced,
491      // or doing one pass that determines what the final largest comdat
492      // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
493      // only the largest one.
494      replaceSymbol<DefinedRegular>(leader, this, name, /*IsCOMDAT*/ true,
495                                    /*IsExternal*/ true, sym.getGeneric(),
496                                    nullptr);
497      prevailing = true;
498    }
499    break;
500
501  case IMAGE_COMDAT_SELECT_NEWEST:
502    llvm_unreachable("should have been rejected earlier");
503  }
504}
505
506Optional<Symbol *> ObjFile::createDefined(
507    COFFSymbolRef sym,
508    std::vector<const coff_aux_section_definition *> &comdatDefs,
509    bool &prevailing) {
510  prevailing = false;
511  auto getName = [&]() {
512    StringRef s;
513    coffObj->getSymbolName(sym, s);
514    return s;
515  };
516
517  if (sym.isCommon()) {
518    auto *c = make<CommonChunk>(sym);
519    chunks.push_back(c);
520    return symtab->addCommon(this, getName(), sym.getValue(), sym.getGeneric(),
521                             c);
522  }
523
524  if (sym.isAbsolute()) {
525    StringRef name = getName();
526
527    // Skip special symbols.
528    if (name == "@comp.id")
529      return nullptr;
530    if (name == "@feat.00") {
531      feat00Flags = sym.getValue();
532      return nullptr;
533    }
534
535    if (sym.isExternal())
536      return symtab->addAbsolute(name, sym);
537    return make<DefinedAbsolute>(name, sym);
538  }
539
540  int32_t sectionNumber = sym.getSectionNumber();
541  if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
542    return nullptr;
543
544  if (llvm::COFF::isReservedSectionNumber(sectionNumber))
545    fatal(toString(this) + ": " + getName() +
546          " should not refer to special section " + Twine(sectionNumber));
547
548  if ((uint32_t)sectionNumber >= sparseChunks.size())
549    fatal(toString(this) + ": " + getName() +
550          " should not refer to non-existent section " + Twine(sectionNumber));
551
552  // Comdat handling.
553  // A comdat symbol consists of two symbol table entries.
554  // The first symbol entry has the name of the section (e.g. .text), fixed
555  // values for the other fields, and one auxilliary record.
556  // The second symbol entry has the name of the comdat symbol, called the
557  // "comdat leader".
558  // When this function is called for the first symbol entry of a comdat,
559  // it sets comdatDefs and returns None, and when it's called for the second
560  // symbol entry it reads comdatDefs and then sets it back to nullptr.
561
562  // Handle comdat leader.
563  if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) {
564    comdatDefs[sectionNumber] = nullptr;
565    DefinedRegular *leader;
566
567    if (sym.isExternal()) {
568      std::tie(leader, prevailing) =
569          symtab->addComdat(this, getName(), sym.getGeneric());
570    } else {
571      leader = make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
572                                    /*IsExternal*/ false, sym.getGeneric());
573      prevailing = true;
574    }
575
576    if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES ||
577        // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
578        // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
579        def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) {
580      fatal("unknown comdat type " + std::to_string((int)def->Selection) +
581            " for " + getName() + " in " + toString(this));
582    }
583    COMDATType selection = (COMDATType)def->Selection;
584
585    if (leader->isCOMDAT)
586      handleComdatSelection(sym, selection, prevailing, leader);
587
588    if (prevailing) {
589      SectionChunk *c = readSection(sectionNumber, def, getName());
590      sparseChunks[sectionNumber] = c;
591      c->sym = cast<DefinedRegular>(leader);
592      c->selection = selection;
593      cast<DefinedRegular>(leader)->data = &c->repl;
594    } else {
595      sparseChunks[sectionNumber] = nullptr;
596    }
597    return leader;
598  }
599
600  // Prepare to handle the comdat leader symbol by setting the section's
601  // ComdatDefs pointer if we encounter a non-associative comdat.
602  if (sparseChunks[sectionNumber] == pendingComdat) {
603    if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
604      if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE)
605        comdatDefs[sectionNumber] = def;
606    }
607    return None;
608  }
609
610  return createRegular(sym);
611}
612
613MachineTypes ObjFile::getMachineType() {
614  if (coffObj)
615    return static_cast<MachineTypes>(coffObj->getMachine());
616  return IMAGE_FILE_MACHINE_UNKNOWN;
617}
618
619ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
620  if (SectionChunk *sec = SectionChunk::findByName(debugChunks, secName))
621    return sec->consumeDebugMagic();
622  return {};
623}
624
625// OBJ files systematically store critical informations in a .debug$S stream,
626// even if the TU was compiled with no debug info. At least two records are
627// always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
628// PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
629// currently used to initialize the hotPatchable member.
630void ObjFile::initializeFlags() {
631  ArrayRef<uint8_t> data = getDebugSection(".debug$S");
632  if (data.empty())
633    return;
634
635  DebugSubsectionArray subsections;
636
637  BinaryStreamReader reader(data, support::little);
638  ExitOnError exitOnErr;
639  exitOnErr(reader.readArray(subsections, data.size()));
640
641  for (const DebugSubsectionRecord &ss : subsections) {
642    if (ss.kind() != DebugSubsectionKind::Symbols)
643      continue;
644
645    unsigned offset = 0;
646
647    // Only parse the first two records. We are only looking for S_OBJNAME
648    // and S_COMPILE3, and they usually appear at the beginning of the
649    // stream.
650    for (unsigned i = 0; i < 2; ++i) {
651      Expected<CVSymbol> sym = readSymbolFromStream(ss.getRecordData(), offset);
652      if (!sym) {
653        consumeError(sym.takeError());
654        return;
655      }
656      if (sym->kind() == SymbolKind::S_COMPILE3) {
657        auto cs =
658            cantFail(SymbolDeserializer::deserializeAs<Compile3Sym>(sym.get()));
659        hotPatchable =
660            (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None;
661      }
662      if (sym->kind() == SymbolKind::S_OBJNAME) {
663        auto objName = cantFail(SymbolDeserializer::deserializeAs<ObjNameSym>(
664            sym.get()));
665        pchSignature = objName.Signature;
666      }
667      offset += sym->length();
668    }
669  }
670}
671
672// Depending on the compilation flags, OBJs can refer to external files,
673// necessary to merge this OBJ into the final PDB. We currently support two
674// types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
675// And PDB type servers, when compiling with /Zi. This function extracts these
676// dependencies and makes them available as a TpiSource interface (see
677// DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
678// output even with /Yc and /Yu and with /Zi.
679void ObjFile::initializeDependencies() {
680  if (!config->debug)
681    return;
682
683  bool isPCH = false;
684
685  ArrayRef<uint8_t> data = getDebugSection(".debug$P");
686  if (!data.empty())
687    isPCH = true;
688  else
689    data = getDebugSection(".debug$T");
690
691  if (data.empty())
692    return;
693
694  CVTypeArray types;
695  BinaryStreamReader reader(data, support::little);
696  cantFail(reader.readArray(types, reader.getLength()));
697
698  CVTypeArray::Iterator firstType = types.begin();
699  if (firstType == types.end())
700    return;
701
702  debugTypes.emplace(types);
703
704  if (isPCH) {
705    debugTypesObj = makePrecompSource(this);
706    return;
707  }
708
709  if (firstType->kind() == LF_TYPESERVER2) {
710    TypeServer2Record ts = cantFail(
711        TypeDeserializer::deserializeAs<TypeServer2Record>(firstType->data()));
712    debugTypesObj = makeUseTypeServerSource(this, &ts);
713    return;
714  }
715
716  if (firstType->kind() == LF_PRECOMP) {
717    PrecompRecord precomp = cantFail(
718        TypeDeserializer::deserializeAs<PrecompRecord>(firstType->data()));
719    debugTypesObj = makeUsePrecompSource(this, &precomp);
720    return;
721  }
722
723  debugTypesObj = makeTpiSource(this);
724}
725
726StringRef ltrim1(StringRef s, const char *chars) {
727  if (!s.empty() && strchr(chars, s[0]))
728    return s.substr(1);
729  return s;
730}
731
732void ImportFile::parse() {
733  const char *buf = mb.getBufferStart();
734  const auto *hdr = reinterpret_cast<const coff_import_header *>(buf);
735
736  // Check if the total size is valid.
737  if (mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData)
738    fatal("broken import library");
739
740  // Read names and create an __imp_ symbol.
741  StringRef name = saver.save(StringRef(buf + sizeof(*hdr)));
742  StringRef impName = saver.save("__imp_" + name);
743  const char *nameStart = buf + sizeof(coff_import_header) + name.size() + 1;
744  dllName = StringRef(nameStart);
745  StringRef extName;
746  switch (hdr->getNameType()) {
747  case IMPORT_ORDINAL:
748    extName = "";
749    break;
750  case IMPORT_NAME:
751    extName = name;
752    break;
753  case IMPORT_NAME_NOPREFIX:
754    extName = ltrim1(name, "?@_");
755    break;
756  case IMPORT_NAME_UNDECORATE:
757    extName = ltrim1(name, "?@_");
758    extName = extName.substr(0, extName.find('@'));
759    break;
760  }
761
762  this->hdr = hdr;
763  externalName = extName;
764
765  impSym = symtab->addImportData(impName, this);
766  // If this was a duplicate, we logged an error but may continue;
767  // in this case, impSym is nullptr.
768  if (!impSym)
769    return;
770
771  if (hdr->getType() == llvm::COFF::IMPORT_CONST)
772    static_cast<void>(symtab->addImportData(name, this));
773
774  // If type is function, we need to create a thunk which jump to an
775  // address pointed by the __imp_ symbol. (This allows you to call
776  // DLL functions just like regular non-DLL functions.)
777  if (hdr->getType() == llvm::COFF::IMPORT_CODE)
778    thunkSym = symtab->addImportThunk(
779        name, cast_or_null<DefinedImportData>(impSym), hdr->Machine);
780}
781
782BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
783                         uint64_t offsetInArchive)
784    : InputFile(BitcodeKind, mb) {
785  std::string path = mb.getBufferIdentifier().str();
786  if (config->thinLTOIndexOnly)
787    path = replaceThinLTOSuffix(mb.getBufferIdentifier());
788
789  // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
790  // name. If two archives define two members with the same name, this
791  // causes a collision which result in only one of the objects being taken
792  // into consideration at LTO time (which very likely causes undefined
793  // symbols later in the link stage). So we append file offset to make
794  // filename unique.
795  MemoryBufferRef mbref(
796      mb.getBuffer(),
797      saver.save(archiveName + path +
798                 (archiveName.empty() ? "" : utostr(offsetInArchive))));
799
800  obj = check(lto::InputFile::create(mbref));
801}
802
803void BitcodeFile::parse() {
804  std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size());
805  for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
806    // FIXME: lto::InputFile doesn't keep enough data to do correct comdat
807    // selection handling.
808    comdat[i] = symtab->addComdat(this, saver.save(obj->getComdatTable()[i]));
809  for (const lto::InputFile::Symbol &objSym : obj->symbols()) {
810    StringRef symName = saver.save(objSym.getName());
811    int comdatIndex = objSym.getComdatIndex();
812    Symbol *sym;
813    if (objSym.isUndefined()) {
814      sym = symtab->addUndefined(symName, this, false);
815    } else if (objSym.isCommon()) {
816      sym = symtab->addCommon(this, symName, objSym.getCommonSize());
817    } else if (objSym.isWeak() && objSym.isIndirect()) {
818      // Weak external.
819      sym = symtab->addUndefined(symName, this, true);
820      std::string fallback = objSym.getCOFFWeakExternalFallback();
821      Symbol *alias = symtab->addUndefined(saver.save(fallback));
822      checkAndSetWeakAlias(symtab, this, sym, alias);
823    } else if (comdatIndex != -1) {
824      if (symName == obj->getComdatTable()[comdatIndex])
825        sym = comdat[comdatIndex].first;
826      else if (comdat[comdatIndex].second)
827        sym = symtab->addRegular(this, symName);
828      else
829        sym = symtab->addUndefined(symName, this, false);
830    } else {
831      sym = symtab->addRegular(this, symName);
832    }
833    symbols.push_back(sym);
834    if (objSym.isUsed())
835      config->gcroot.push_back(sym);
836  }
837  directives = obj->getCOFFLinkerOpts();
838}
839
840MachineTypes BitcodeFile::getMachineType() {
841  switch (Triple(obj->getTargetTriple()).getArch()) {
842  case Triple::x86_64:
843    return AMD64;
844  case Triple::x86:
845    return I386;
846  case Triple::arm:
847    return ARMNT;
848  case Triple::aarch64:
849    return ARM64;
850  default:
851    return IMAGE_FILE_MACHINE_UNKNOWN;
852  }
853}
854
855std::string replaceThinLTOSuffix(StringRef path) {
856  StringRef suffix = config->thinLTOObjectSuffixReplace.first;
857  StringRef repl = config->thinLTOObjectSuffixReplace.second;
858
859  if (path.consume_back(suffix))
860    return (path + repl).str();
861  return path;
862}
863} // namespace coff
864} // namespace lld
865
866// Returns the last element of a path, which is supposed to be a filename.
867static StringRef getBasename(StringRef path) {
868  return sys::path::filename(path, sys::path::Style::windows);
869}
870
871// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
872std::string lld::toString(const coff::InputFile *file) {
873  if (!file)
874    return "<internal>";
875  if (file->parentName.empty() || file->kind() == coff::InputFile::ImportKind)
876    return file->getName();
877
878  return (getBasename(file->parentName) + "(" + getBasename(file->getName()) +
879          ")")
880      .str();
881}
882