1//===- InputFiles.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFiles.h"
10#include "Chunks.h"
11#include "Config.h"
12#include "DebugTypes.h"
13#include "Driver.h"
14#include "SymbolTable.h"
15#include "Symbols.h"
16#include "lld/Common/DWARF.h"
17#include "lld/Common/ErrorHandler.h"
18#include "lld/Common/Memory.h"
19#include "llvm-c/lto.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Triple.h"
22#include "llvm/ADT/Twine.h"
23#include "llvm/BinaryFormat/COFF.h"
24#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
25#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
26#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
27#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
28#include "llvm/LTO/LTO.h"
29#include "llvm/Object/Binary.h"
30#include "llvm/Object/COFF.h"
31#include "llvm/Support/Casting.h"
32#include "llvm/Support/Endian.h"
33#include "llvm/Support/Error.h"
34#include "llvm/Support/ErrorOr.h"
35#include "llvm/Support/FileSystem.h"
36#include "llvm/Support/Path.h"
37#include "llvm/Target/TargetOptions.h"
38#include <cstring>
39#include <system_error>
40#include <utility>
41
42using namespace llvm;
43using namespace llvm::COFF;
44using namespace llvm::codeview;
45using namespace llvm::object;
46using namespace llvm::support::endian;
47
48using llvm::Triple;
49using llvm::support::ulittle32_t;
50
51namespace lld {
52
53// Returns the last element of a path, which is supposed to be a filename.
54static StringRef getBasename(StringRef path) {
55  return sys::path::filename(path, sys::path::Style::windows);
56}
57
58// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
59std::string toString(const coff::InputFile *file) {
60  if (!file)
61    return "<internal>";
62  if (file->parentName.empty() || file->kind() == coff::InputFile::ImportKind)
63    return file->getName();
64
65  return (getBasename(file->parentName) + "(" + getBasename(file->getName()) +
66          ")")
67      .str();
68}
69
70namespace coff {
71
72std::vector<ObjFile *> ObjFile::instances;
73std::vector<ImportFile *> ImportFile::instances;
74std::vector<BitcodeFile *> BitcodeFile::instances;
75
76/// Checks that Source is compatible with being a weak alias to Target.
77/// If Source is Undefined and has no weak alias set, makes it a weak
78/// alias to Target.
79static void checkAndSetWeakAlias(SymbolTable *symtab, InputFile *f,
80                                 Symbol *source, Symbol *target) {
81  if (auto *u = dyn_cast<Undefined>(source)) {
82    if (u->weakAlias && u->weakAlias != target) {
83      // Weak aliases as produced by GCC are named in the form
84      // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name
85      // of another symbol emitted near the weak symbol.
86      // Just use the definition from the first object file that defined
87      // this weak symbol.
88      if (config->mingw)
89        return;
90      symtab->reportDuplicate(source, f);
91    }
92    u->weakAlias = target;
93  }
94}
95
96static bool ignoredSymbolName(StringRef name) {
97  return name == "@feat.00" || name == "@comp.id";
98}
99
100ArchiveFile::ArchiveFile(MemoryBufferRef m) : InputFile(ArchiveKind, m) {}
101
102void ArchiveFile::parse() {
103  // Parse a MemoryBufferRef as an archive file.
104  file = CHECK(Archive::create(mb), this);
105
106  // Read the symbol table to construct Lazy objects.
107  for (const Archive::Symbol &sym : file->symbols())
108    symtab->addLazyArchive(this, sym);
109}
110
111// Returns a buffer pointing to a member file containing a given symbol.
112void ArchiveFile::addMember(const Archive::Symbol &sym) {
113  const Archive::Child &c =
114      CHECK(sym.getMember(),
115            "could not get the member for symbol " + toCOFFString(sym));
116
117  // Return an empty buffer if we have already returned the same buffer.
118  if (!seen.insert(c.getChildOffset()).second)
119    return;
120
121  driver->enqueueArchiveMember(c, sym, getName());
122}
123
124std::vector<MemoryBufferRef> getArchiveMembers(Archive *file) {
125  std::vector<MemoryBufferRef> v;
126  Error err = Error::success();
127  for (const Archive::Child &c : file->children(err)) {
128    MemoryBufferRef mbref =
129        CHECK(c.getMemoryBufferRef(),
130              file->getFileName() +
131                  ": could not get the buffer for a child of the archive");
132    v.push_back(mbref);
133  }
134  if (err)
135    fatal(file->getFileName() +
136          ": Archive::children failed: " + toString(std::move(err)));
137  return v;
138}
139
140void LazyObjFile::fetch() {
141  if (mb.getBuffer().empty())
142    return;
143
144  InputFile *file;
145  if (isBitcode(mb))
146    file = make<BitcodeFile>(mb, "", 0, std::move(symbols));
147  else
148    file = make<ObjFile>(mb, std::move(symbols));
149  mb = {};
150  symtab->addFile(file);
151}
152
153void LazyObjFile::parse() {
154  if (isBitcode(this->mb)) {
155    // Bitcode file.
156    std::unique_ptr<lto::InputFile> obj =
157        CHECK(lto::InputFile::create(this->mb), this);
158    for (const lto::InputFile::Symbol &sym : obj->symbols()) {
159      if (!sym.isUndefined())
160        symtab->addLazyObject(this, sym.getName());
161    }
162    return;
163  }
164
165  // Native object file.
166  std::unique_ptr<Binary> coffObjPtr = CHECK(createBinary(mb), this);
167  COFFObjectFile *coffObj = cast<COFFObjectFile>(coffObjPtr.get());
168  uint32_t numSymbols = coffObj->getNumberOfSymbols();
169  for (uint32_t i = 0; i < numSymbols; ++i) {
170    COFFSymbolRef coffSym = check(coffObj->getSymbol(i));
171    if (coffSym.isUndefined() || !coffSym.isExternal() ||
172        coffSym.isWeakExternal())
173      continue;
174    StringRef name;
175    coffObj->getSymbolName(coffSym, name);
176    if (coffSym.isAbsolute() && ignoredSymbolName(name))
177      continue;
178    symtab->addLazyObject(this, name);
179    i += coffSym.getNumberOfAuxSymbols();
180  }
181}
182
183void ObjFile::parse() {
184  // Parse a memory buffer as a COFF file.
185  std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
186
187  if (auto *obj = dyn_cast<COFFObjectFile>(bin.get())) {
188    bin.release();
189    coffObj.reset(obj);
190  } else {
191    fatal(toString(this) + " is not a COFF file");
192  }
193
194  // Read section and symbol tables.
195  initializeChunks();
196  initializeSymbols();
197  initializeFlags();
198  initializeDependencies();
199}
200
201const coff_section* ObjFile::getSection(uint32_t i) {
202  const coff_section *sec;
203  if (auto ec = coffObj->getSection(i, sec))
204    fatal("getSection failed: #" + Twine(i) + ": " + ec.message());
205  return sec;
206}
207
208// We set SectionChunk pointers in the SparseChunks vector to this value
209// temporarily to mark comdat sections as having an unknown resolution. As we
210// walk the object file's symbol table, once we visit either a leader symbol or
211// an associative section definition together with the parent comdat's leader,
212// we set the pointer to either nullptr (to mark the section as discarded) or a
213// valid SectionChunk for that section.
214static SectionChunk *const pendingComdat = reinterpret_cast<SectionChunk *>(1);
215
216void ObjFile::initializeChunks() {
217  uint32_t numSections = coffObj->getNumberOfSections();
218  chunks.reserve(numSections);
219  sparseChunks.resize(numSections + 1);
220  for (uint32_t i = 1; i < numSections + 1; ++i) {
221    const coff_section *sec = getSection(i);
222    if (sec->Characteristics & IMAGE_SCN_LNK_COMDAT)
223      sparseChunks[i] = pendingComdat;
224    else
225      sparseChunks[i] = readSection(i, nullptr, "");
226  }
227}
228
229SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
230                                   const coff_aux_section_definition *def,
231                                   StringRef leaderName) {
232  const coff_section *sec = getSection(sectionNumber);
233
234  StringRef name;
235  if (Expected<StringRef> e = coffObj->getSectionName(sec))
236    name = *e;
237  else
238    fatal("getSectionName failed: #" + Twine(sectionNumber) + ": " +
239          toString(e.takeError()));
240
241  if (name == ".drectve") {
242    ArrayRef<uint8_t> data;
243    cantFail(coffObj->getSectionContents(sec, data));
244    directives = StringRef((const char *)data.data(), data.size());
245    return nullptr;
246  }
247
248  if (name == ".llvm_addrsig") {
249    addrsigSec = sec;
250    return nullptr;
251  }
252
253  // Object files may have DWARF debug info or MS CodeView debug info
254  // (or both).
255  //
256  // DWARF sections don't need any special handling from the perspective
257  // of the linker; they are just a data section containing relocations.
258  // We can just link them to complete debug info.
259  //
260  // CodeView needs linker support. We need to interpret debug info,
261  // and then write it to a separate .pdb file.
262
263  // Ignore DWARF debug info unless /debug is given.
264  if (!config->debug && name.startswith(".debug_"))
265    return nullptr;
266
267  if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
268    return nullptr;
269  auto *c = make<SectionChunk>(this, sec);
270  if (def)
271    c->checksum = def->CheckSum;
272
273  // CodeView sections are stored to a different vector because they are not
274  // linked in the regular manner.
275  if (c->isCodeView())
276    debugChunks.push_back(c);
277  else if (name == ".gfids$y")
278    guardFidChunks.push_back(c);
279  else if (name == ".gljmp$y")
280    guardLJmpChunks.push_back(c);
281  else if (name == ".sxdata")
282    sXDataChunks.push_back(c);
283  else if (config->tailMerge && sec->NumberOfRelocations == 0 &&
284           name == ".rdata" && leaderName.startswith("??_C@"))
285    // COFF sections that look like string literal sections (i.e. no
286    // relocations, in .rdata, leader symbol name matches the MSVC name mangling
287    // for string literals) are subject to string tail merging.
288    MergeChunk::addSection(c);
289  else if (name == ".rsrc" || name.startswith(".rsrc$"))
290    resourceChunks.push_back(c);
291  else
292    chunks.push_back(c);
293
294  return c;
295}
296
297void ObjFile::includeResourceChunks() {
298  chunks.insert(chunks.end(), resourceChunks.begin(), resourceChunks.end());
299}
300
301void ObjFile::readAssociativeDefinition(
302    COFFSymbolRef sym, const coff_aux_section_definition *def) {
303  readAssociativeDefinition(sym, def, def->getNumber(sym.isBigObj()));
304}
305
306void ObjFile::readAssociativeDefinition(COFFSymbolRef sym,
307                                        const coff_aux_section_definition *def,
308                                        uint32_t parentIndex) {
309  SectionChunk *parent = sparseChunks[parentIndex];
310  int32_t sectionNumber = sym.getSectionNumber();
311
312  auto diag = [&]() {
313    StringRef name, parentName;
314    coffObj->getSymbolName(sym, name);
315
316    const coff_section *parentSec = getSection(parentIndex);
317    if (Expected<StringRef> e = coffObj->getSectionName(parentSec))
318      parentName = *e;
319    error(toString(this) + ": associative comdat " + name + " (sec " +
320          Twine(sectionNumber) + ") has invalid reference to section " +
321          parentName + " (sec " + Twine(parentIndex) + ")");
322  };
323
324  if (parent == pendingComdat) {
325    // This can happen if an associative comdat refers to another associative
326    // comdat that appears after it (invalid per COFF spec) or to a section
327    // without any symbols.
328    diag();
329    return;
330  }
331
332  // Check whether the parent is prevailing. If it is, so are we, and we read
333  // the section; otherwise mark it as discarded.
334  if (parent) {
335    SectionChunk *c = readSection(sectionNumber, def, "");
336    sparseChunks[sectionNumber] = c;
337    if (c) {
338      c->selection = IMAGE_COMDAT_SELECT_ASSOCIATIVE;
339      parent->addAssociative(c);
340    }
341  } else {
342    sparseChunks[sectionNumber] = nullptr;
343  }
344}
345
346void ObjFile::recordPrevailingSymbolForMingw(
347    COFFSymbolRef sym, DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
348  // For comdat symbols in executable sections, where this is the copy
349  // of the section chunk we actually include instead of discarding it,
350  // add the symbol to a map to allow using it for implicitly
351  // associating .[px]data$<func> sections to it.
352  int32_t sectionNumber = sym.getSectionNumber();
353  SectionChunk *sc = sparseChunks[sectionNumber];
354  if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
355    StringRef name;
356    coffObj->getSymbolName(sym, name);
357    if (getMachineType() == I386)
358      name.consume_front("_");
359    prevailingSectionMap[name] = sectionNumber;
360  }
361}
362
363void ObjFile::maybeAssociateSEHForMingw(
364    COFFSymbolRef sym, const coff_aux_section_definition *def,
365    const DenseMap<StringRef, uint32_t> &prevailingSectionMap) {
366  StringRef name;
367  coffObj->getSymbolName(sym, name);
368  if (name.consume_front(".pdata$") || name.consume_front(".xdata$") ||
369      name.consume_front(".eh_frame$")) {
370    // For MinGW, treat .[px]data$<func> and .eh_frame$<func> as implicitly
371    // associative to the symbol <func>.
372    auto parentSym = prevailingSectionMap.find(name);
373    if (parentSym != prevailingSectionMap.end())
374      readAssociativeDefinition(sym, def, parentSym->second);
375  }
376}
377
378Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
379  SectionChunk *sc = sparseChunks[sym.getSectionNumber()];
380  if (sym.isExternal()) {
381    StringRef name;
382    coffObj->getSymbolName(sym, name);
383    if (sc)
384      return symtab->addRegular(this, name, sym.getGeneric(), sc,
385                                sym.getValue());
386    // For MinGW symbols named .weak.* that point to a discarded section,
387    // don't create an Undefined symbol. If nothing ever refers to the symbol,
388    // everything should be fine. If something actually refers to the symbol
389    // (e.g. the undefined weak alias), linking will fail due to undefined
390    // references at the end.
391    if (config->mingw && name.startswith(".weak."))
392      return nullptr;
393    return symtab->addUndefined(name, this, false);
394  }
395  if (sc)
396    return make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
397                                /*IsExternal*/ false, sym.getGeneric(), sc);
398  return nullptr;
399}
400
401void ObjFile::initializeSymbols() {
402  uint32_t numSymbols = coffObj->getNumberOfSymbols();
403  symbols.resize(numSymbols);
404
405  SmallVector<std::pair<Symbol *, uint32_t>, 8> weakAliases;
406  std::vector<uint32_t> pendingIndexes;
407  pendingIndexes.reserve(numSymbols);
408
409  DenseMap<StringRef, uint32_t> prevailingSectionMap;
410  std::vector<const coff_aux_section_definition *> comdatDefs(
411      coffObj->getNumberOfSections() + 1);
412
413  for (uint32_t i = 0; i < numSymbols; ++i) {
414    COFFSymbolRef coffSym = check(coffObj->getSymbol(i));
415    bool prevailingComdat;
416    if (coffSym.isUndefined()) {
417      symbols[i] = createUndefined(coffSym);
418    } else if (coffSym.isWeakExternal()) {
419      symbols[i] = createUndefined(coffSym);
420      uint32_t tagIndex = coffSym.getAux<coff_aux_weak_external>()->TagIndex;
421      weakAliases.emplace_back(symbols[i], tagIndex);
422    } else if (Optional<Symbol *> optSym =
423                   createDefined(coffSym, comdatDefs, prevailingComdat)) {
424      symbols[i] = *optSym;
425      if (config->mingw && prevailingComdat)
426        recordPrevailingSymbolForMingw(coffSym, prevailingSectionMap);
427    } else {
428      // createDefined() returns None if a symbol belongs to a section that
429      // was pending at the point when the symbol was read. This can happen in
430      // two cases:
431      // 1) section definition symbol for a comdat leader;
432      // 2) symbol belongs to a comdat section associated with another section.
433      // In both of these cases, we can expect the section to be resolved by
434      // the time we finish visiting the remaining symbols in the symbol
435      // table. So we postpone the handling of this symbol until that time.
436      pendingIndexes.push_back(i);
437    }
438    i += coffSym.getNumberOfAuxSymbols();
439  }
440
441  for (uint32_t i : pendingIndexes) {
442    COFFSymbolRef sym = check(coffObj->getSymbol(i));
443    if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
444      if (def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
445        readAssociativeDefinition(sym, def);
446      else if (config->mingw)
447        maybeAssociateSEHForMingw(sym, def, prevailingSectionMap);
448    }
449    if (sparseChunks[sym.getSectionNumber()] == pendingComdat) {
450      StringRef name;
451      coffObj->getSymbolName(sym, name);
452      log("comdat section " + name +
453          " without leader and unassociated, discarding");
454      continue;
455    }
456    symbols[i] = createRegular(sym);
457  }
458
459  for (auto &kv : weakAliases) {
460    Symbol *sym = kv.first;
461    uint32_t idx = kv.second;
462    checkAndSetWeakAlias(symtab, this, sym, symbols[idx]);
463  }
464}
465
466Symbol *ObjFile::createUndefined(COFFSymbolRef sym) {
467  StringRef name;
468  coffObj->getSymbolName(sym, name);
469  return symtab->addUndefined(name, this, sym.isWeakExternal());
470}
471
472void ObjFile::handleComdatSelection(COFFSymbolRef sym, COMDATType &selection,
473                                    bool &prevailing, DefinedRegular *leader) {
474  if (prevailing)
475    return;
476  // There's already an existing comdat for this symbol: `Leader`.
477  // Use the comdats's selection field to determine if the new
478  // symbol in `Sym` should be discarded, produce a duplicate symbol
479  // error, etc.
480
481  SectionChunk *leaderChunk = nullptr;
482  COMDATType leaderSelection = IMAGE_COMDAT_SELECT_ANY;
483
484  if (leader->data) {
485    leaderChunk = leader->getChunk();
486    leaderSelection = leaderChunk->selection;
487  } else {
488    // FIXME: comdats from LTO files don't know their selection; treat them
489    // as "any".
490    selection = leaderSelection;
491  }
492
493  if ((selection == IMAGE_COMDAT_SELECT_ANY &&
494       leaderSelection == IMAGE_COMDAT_SELECT_LARGEST) ||
495      (selection == IMAGE_COMDAT_SELECT_LARGEST &&
496       leaderSelection == IMAGE_COMDAT_SELECT_ANY)) {
497    // cl.exe picks "any" for vftables when building with /GR- and
498    // "largest" when building with /GR. To be able to link object files
499    // compiled with each flag, "any" and "largest" are merged as "largest".
500    leaderSelection = selection = IMAGE_COMDAT_SELECT_LARGEST;
501  }
502
503  // GCCs __declspec(selectany) doesn't actually pick "any" but "same size as".
504  // Clang on the other hand picks "any". To be able to link two object files
505  // with a __declspec(selectany) declaration, one compiled with gcc and the
506  // other with clang, we merge them as proper "same size as"
507  if (config->mingw && ((selection == IMAGE_COMDAT_SELECT_ANY &&
508                         leaderSelection == IMAGE_COMDAT_SELECT_SAME_SIZE) ||
509                        (selection == IMAGE_COMDAT_SELECT_SAME_SIZE &&
510                         leaderSelection == IMAGE_COMDAT_SELECT_ANY))) {
511    leaderSelection = selection = IMAGE_COMDAT_SELECT_SAME_SIZE;
512  }
513
514  // Other than that, comdat selections must match.  This is a bit more
515  // strict than link.exe which allows merging "any" and "largest" if "any"
516  // is the first symbol the linker sees, and it allows merging "largest"
517  // with everything (!) if "largest" is the first symbol the linker sees.
518  // Making this symmetric independent of which selection is seen first
519  // seems better though.
520  // (This behavior matches ModuleLinker::getComdatResult().)
521  if (selection != leaderSelection) {
522    log(("conflicting comdat type for " + toString(*leader) + ": " +
523         Twine((int)leaderSelection) + " in " + toString(leader->getFile()) +
524         " and " + Twine((int)selection) + " in " + toString(this))
525            .str());
526    symtab->reportDuplicate(leader, this);
527    return;
528  }
529
530  switch (selection) {
531  case IMAGE_COMDAT_SELECT_NODUPLICATES:
532    symtab->reportDuplicate(leader, this);
533    break;
534
535  case IMAGE_COMDAT_SELECT_ANY:
536    // Nothing to do.
537    break;
538
539  case IMAGE_COMDAT_SELECT_SAME_SIZE:
540    if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData)
541      symtab->reportDuplicate(leader, this);
542    break;
543
544  case IMAGE_COMDAT_SELECT_EXACT_MATCH: {
545    SectionChunk newChunk(this, getSection(sym));
546    // link.exe only compares section contents here and doesn't complain
547    // if the two comdat sections have e.g. different alignment.
548    // Match that.
549    if (leaderChunk->getContents() != newChunk.getContents())
550      symtab->reportDuplicate(leader, this, &newChunk, sym.getValue());
551    break;
552  }
553
554  case IMAGE_COMDAT_SELECT_ASSOCIATIVE:
555    // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE.
556    // (This means lld-link doesn't produce duplicate symbol errors for
557    // associative comdats while link.exe does, but associate comdats
558    // are never extern in practice.)
559    llvm_unreachable("createDefined not called for associative comdats");
560
561  case IMAGE_COMDAT_SELECT_LARGEST:
562    if (leaderChunk->getSize() < getSection(sym)->SizeOfRawData) {
563      // Replace the existing comdat symbol with the new one.
564      StringRef name;
565      coffObj->getSymbolName(sym, name);
566      // FIXME: This is incorrect: With /opt:noref, the previous sections
567      // make it into the final executable as well. Correct handling would
568      // be to undo reading of the whole old section that's being replaced,
569      // or doing one pass that determines what the final largest comdat
570      // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading
571      // only the largest one.
572      replaceSymbol<DefinedRegular>(leader, this, name, /*IsCOMDAT*/ true,
573                                    /*IsExternal*/ true, sym.getGeneric(),
574                                    nullptr);
575      prevailing = true;
576    }
577    break;
578
579  case IMAGE_COMDAT_SELECT_NEWEST:
580    llvm_unreachable("should have been rejected earlier");
581  }
582}
583
584Optional<Symbol *> ObjFile::createDefined(
585    COFFSymbolRef sym,
586    std::vector<const coff_aux_section_definition *> &comdatDefs,
587    bool &prevailing) {
588  prevailing = false;
589  auto getName = [&]() {
590    StringRef s;
591    coffObj->getSymbolName(sym, s);
592    return s;
593  };
594
595  if (sym.isCommon()) {
596    auto *c = make<CommonChunk>(sym);
597    chunks.push_back(c);
598    return symtab->addCommon(this, getName(), sym.getValue(), sym.getGeneric(),
599                             c);
600  }
601
602  if (sym.isAbsolute()) {
603    StringRef name = getName();
604
605    if (name == "@feat.00")
606      feat00Flags = sym.getValue();
607    // Skip special symbols.
608    if (ignoredSymbolName(name))
609      return nullptr;
610
611    if (sym.isExternal())
612      return symtab->addAbsolute(name, sym);
613    return make<DefinedAbsolute>(name, sym);
614  }
615
616  int32_t sectionNumber = sym.getSectionNumber();
617  if (sectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
618    return nullptr;
619
620  if (llvm::COFF::isReservedSectionNumber(sectionNumber))
621    fatal(toString(this) + ": " + getName() +
622          " should not refer to special section " + Twine(sectionNumber));
623
624  if ((uint32_t)sectionNumber >= sparseChunks.size())
625    fatal(toString(this) + ": " + getName() +
626          " should not refer to non-existent section " + Twine(sectionNumber));
627
628  // Comdat handling.
629  // A comdat symbol consists of two symbol table entries.
630  // The first symbol entry has the name of the section (e.g. .text), fixed
631  // values for the other fields, and one auxiliary record.
632  // The second symbol entry has the name of the comdat symbol, called the
633  // "comdat leader".
634  // When this function is called for the first symbol entry of a comdat,
635  // it sets comdatDefs and returns None, and when it's called for the second
636  // symbol entry it reads comdatDefs and then sets it back to nullptr.
637
638  // Handle comdat leader.
639  if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) {
640    comdatDefs[sectionNumber] = nullptr;
641    DefinedRegular *leader;
642
643    if (sym.isExternal()) {
644      std::tie(leader, prevailing) =
645          symtab->addComdat(this, getName(), sym.getGeneric());
646    } else {
647      leader = make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
648                                    /*IsExternal*/ false, sym.getGeneric());
649      prevailing = true;
650    }
651
652    if (def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES ||
653        // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe
654        // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either.
655        def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) {
656      fatal("unknown comdat type " + std::to_string((int)def->Selection) +
657            " for " + getName() + " in " + toString(this));
658    }
659    COMDATType selection = (COMDATType)def->Selection;
660
661    if (leader->isCOMDAT)
662      handleComdatSelection(sym, selection, prevailing, leader);
663
664    if (prevailing) {
665      SectionChunk *c = readSection(sectionNumber, def, getName());
666      sparseChunks[sectionNumber] = c;
667      c->sym = cast<DefinedRegular>(leader);
668      c->selection = selection;
669      cast<DefinedRegular>(leader)->data = &c->repl;
670    } else {
671      sparseChunks[sectionNumber] = nullptr;
672    }
673    return leader;
674  }
675
676  // Prepare to handle the comdat leader symbol by setting the section's
677  // ComdatDefs pointer if we encounter a non-associative comdat.
678  if (sparseChunks[sectionNumber] == pendingComdat) {
679    if (const coff_aux_section_definition *def = sym.getSectionDefinition()) {
680      if (def->Selection != IMAGE_COMDAT_SELECT_ASSOCIATIVE)
681        comdatDefs[sectionNumber] = def;
682    }
683    return None;
684  }
685
686  return createRegular(sym);
687}
688
689MachineTypes ObjFile::getMachineType() {
690  if (coffObj)
691    return static_cast<MachineTypes>(coffObj->getMachine());
692  return IMAGE_FILE_MACHINE_UNKNOWN;
693}
694
695ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
696  if (SectionChunk *sec = SectionChunk::findByName(debugChunks, secName))
697    return sec->consumeDebugMagic();
698  return {};
699}
700
701// OBJ files systematically store critical information in a .debug$S stream,
702// even if the TU was compiled with no debug info. At least two records are
703// always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
704// PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
705// currently used to initialize the hotPatchable member.
706void ObjFile::initializeFlags() {
707  ArrayRef<uint8_t> data = getDebugSection(".debug$S");
708  if (data.empty())
709    return;
710
711  DebugSubsectionArray subsections;
712
713  BinaryStreamReader reader(data, support::little);
714  ExitOnError exitOnErr;
715  exitOnErr(reader.readArray(subsections, data.size()));
716
717  for (const DebugSubsectionRecord &ss : subsections) {
718    if (ss.kind() != DebugSubsectionKind::Symbols)
719      continue;
720
721    unsigned offset = 0;
722
723    // Only parse the first two records. We are only looking for S_OBJNAME
724    // and S_COMPILE3, and they usually appear at the beginning of the
725    // stream.
726    for (unsigned i = 0; i < 2; ++i) {
727      Expected<CVSymbol> sym = readSymbolFromStream(ss.getRecordData(), offset);
728      if (!sym) {
729        consumeError(sym.takeError());
730        return;
731      }
732      if (sym->kind() == SymbolKind::S_COMPILE3) {
733        auto cs =
734            cantFail(SymbolDeserializer::deserializeAs<Compile3Sym>(sym.get()));
735        hotPatchable =
736            (cs.Flags & CompileSym3Flags::HotPatch) != CompileSym3Flags::None;
737      }
738      if (sym->kind() == SymbolKind::S_OBJNAME) {
739        auto objName = cantFail(SymbolDeserializer::deserializeAs<ObjNameSym>(
740            sym.get()));
741        pchSignature = objName.Signature;
742      }
743      offset += sym->length();
744    }
745  }
746}
747
748// Depending on the compilation flags, OBJs can refer to external files,
749// necessary to merge this OBJ into the final PDB. We currently support two
750// types of external files: Precomp/PCH OBJs, when compiling with /Yc and /Yu.
751// And PDB type servers, when compiling with /Zi. This function extracts these
752// dependencies and makes them available as a TpiSource interface (see
753// DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
754// output even with /Yc and /Yu and with /Zi.
755void ObjFile::initializeDependencies() {
756  if (!config->debug)
757    return;
758
759  bool isPCH = false;
760
761  ArrayRef<uint8_t> data = getDebugSection(".debug$P");
762  if (!data.empty())
763    isPCH = true;
764  else
765    data = getDebugSection(".debug$T");
766
767  if (data.empty())
768    return;
769
770  CVTypeArray types;
771  BinaryStreamReader reader(data, support::little);
772  cantFail(reader.readArray(types, reader.getLength()));
773
774  CVTypeArray::Iterator firstType = types.begin();
775  if (firstType == types.end())
776    return;
777
778  // Remember the .debug$T or .debug$P section.
779  debugTypes = data;
780
781  if (isPCH) {
782    debugTypesObj = makePrecompSource(this);
783    return;
784  }
785
786  if (firstType->kind() == LF_TYPESERVER2) {
787    TypeServer2Record ts = cantFail(
788        TypeDeserializer::deserializeAs<TypeServer2Record>(firstType->data()));
789    debugTypesObj = makeUseTypeServerSource(this, &ts);
790    return;
791  }
792
793  if (firstType->kind() == LF_PRECOMP) {
794    PrecompRecord precomp = cantFail(
795        TypeDeserializer::deserializeAs<PrecompRecord>(firstType->data()));
796    debugTypesObj = makeUsePrecompSource(this, &precomp);
797    return;
798  }
799
800  debugTypesObj = makeTpiSource(this);
801}
802
803// Used only for DWARF debug info, which is not common (except in MinGW
804// environments). This returns an optional pair of file name and line
805// number for where the variable was defined.
806Optional<std::pair<StringRef, uint32_t>>
807ObjFile::getVariableLocation(StringRef var) {
808  if (!dwarf) {
809    dwarf = make<DWARFCache>(DWARFContext::create(*getCOFFObj()));
810    if (!dwarf)
811      return None;
812  }
813  if (config->machine == I386)
814    var.consume_front("_");
815  Optional<std::pair<std::string, unsigned>> ret = dwarf->getVariableLoc(var);
816  if (!ret)
817    return None;
818  return std::make_pair(saver.save(ret->first), ret->second);
819}
820
821// Used only for DWARF debug info, which is not common (except in MinGW
822// environments).
823Optional<DILineInfo> ObjFile::getDILineInfo(uint32_t offset,
824                                            uint32_t sectionIndex) {
825  if (!dwarf) {
826    dwarf = make<DWARFCache>(DWARFContext::create(*getCOFFObj()));
827    if (!dwarf)
828      return None;
829  }
830
831  return dwarf->getDILineInfo(offset, sectionIndex);
832}
833
834StringRef ltrim1(StringRef s, const char *chars) {
835  if (!s.empty() && strchr(chars, s[0]))
836    return s.substr(1);
837  return s;
838}
839
840void ImportFile::parse() {
841  const char *buf = mb.getBufferStart();
842  const auto *hdr = reinterpret_cast<const coff_import_header *>(buf);
843
844  // Check if the total size is valid.
845  if (mb.getBufferSize() != sizeof(*hdr) + hdr->SizeOfData)
846    fatal("broken import library");
847
848  // Read names and create an __imp_ symbol.
849  StringRef name = saver.save(StringRef(buf + sizeof(*hdr)));
850  StringRef impName = saver.save("__imp_" + name);
851  const char *nameStart = buf + sizeof(coff_import_header) + name.size() + 1;
852  dllName = StringRef(nameStart);
853  StringRef extName;
854  switch (hdr->getNameType()) {
855  case IMPORT_ORDINAL:
856    extName = "";
857    break;
858  case IMPORT_NAME:
859    extName = name;
860    break;
861  case IMPORT_NAME_NOPREFIX:
862    extName = ltrim1(name, "?@_");
863    break;
864  case IMPORT_NAME_UNDECORATE:
865    extName = ltrim1(name, "?@_");
866    extName = extName.substr(0, extName.find('@'));
867    break;
868  }
869
870  this->hdr = hdr;
871  externalName = extName;
872
873  impSym = symtab->addImportData(impName, this);
874  // If this was a duplicate, we logged an error but may continue;
875  // in this case, impSym is nullptr.
876  if (!impSym)
877    return;
878
879  if (hdr->getType() == llvm::COFF::IMPORT_CONST)
880    static_cast<void>(symtab->addImportData(name, this));
881
882  // If type is function, we need to create a thunk which jump to an
883  // address pointed by the __imp_ symbol. (This allows you to call
884  // DLL functions just like regular non-DLL functions.)
885  if (hdr->getType() == llvm::COFF::IMPORT_CODE)
886    thunkSym = symtab->addImportThunk(
887        name, cast_or_null<DefinedImportData>(impSym), hdr->Machine);
888}
889
890BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
891                         uint64_t offsetInArchive)
892    : BitcodeFile(mb, archiveName, offsetInArchive, {}) {}
893
894BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
895                         uint64_t offsetInArchive,
896                         std::vector<Symbol *> &&symbols)
897    : InputFile(BitcodeKind, mb), symbols(std::move(symbols)) {
898  std::string path = mb.getBufferIdentifier().str();
899  if (config->thinLTOIndexOnly)
900    path = replaceThinLTOSuffix(mb.getBufferIdentifier());
901
902  // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
903  // name. If two archives define two members with the same name, this
904  // causes a collision which result in only one of the objects being taken
905  // into consideration at LTO time (which very likely causes undefined
906  // symbols later in the link stage). So we append file offset to make
907  // filename unique.
908  MemoryBufferRef mbref(
909      mb.getBuffer(),
910      saver.save(archiveName + path +
911                 (archiveName.empty() ? "" : utostr(offsetInArchive))));
912
913  obj = check(lto::InputFile::create(mbref));
914}
915
916BitcodeFile::~BitcodeFile() = default;
917
918void BitcodeFile::parse() {
919  std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size());
920  for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
921    // FIXME: lto::InputFile doesn't keep enough data to do correct comdat
922    // selection handling.
923    comdat[i] = symtab->addComdat(this, saver.save(obj->getComdatTable()[i]));
924  for (const lto::InputFile::Symbol &objSym : obj->symbols()) {
925    StringRef symName = saver.save(objSym.getName());
926    int comdatIndex = objSym.getComdatIndex();
927    Symbol *sym;
928    if (objSym.isUndefined()) {
929      sym = symtab->addUndefined(symName, this, false);
930    } else if (objSym.isCommon()) {
931      sym = symtab->addCommon(this, symName, objSym.getCommonSize());
932    } else if (objSym.isWeak() && objSym.isIndirect()) {
933      // Weak external.
934      sym = symtab->addUndefined(symName, this, true);
935      std::string fallback = objSym.getCOFFWeakExternalFallback();
936      Symbol *alias = symtab->addUndefined(saver.save(fallback));
937      checkAndSetWeakAlias(symtab, this, sym, alias);
938    } else if (comdatIndex != -1) {
939      if (symName == obj->getComdatTable()[comdatIndex])
940        sym = comdat[comdatIndex].first;
941      else if (comdat[comdatIndex].second)
942        sym = symtab->addRegular(this, symName);
943      else
944        sym = symtab->addUndefined(symName, this, false);
945    } else {
946      sym = symtab->addRegular(this, symName);
947    }
948    symbols.push_back(sym);
949    if (objSym.isUsed())
950      config->gcroot.push_back(sym);
951  }
952  directives = obj->getCOFFLinkerOpts();
953}
954
955MachineTypes BitcodeFile::getMachineType() {
956  switch (Triple(obj->getTargetTriple()).getArch()) {
957  case Triple::x86_64:
958    return AMD64;
959  case Triple::x86:
960    return I386;
961  case Triple::arm:
962    return ARMNT;
963  case Triple::aarch64:
964    return ARM64;
965  default:
966    return IMAGE_FILE_MACHINE_UNKNOWN;
967  }
968}
969
970std::string replaceThinLTOSuffix(StringRef path) {
971  StringRef suffix = config->thinLTOObjectSuffixReplace.first;
972  StringRef repl = config->thinLTOObjectSuffixReplace.second;
973
974  if (path.consume_back(suffix))
975    return (path + repl).str();
976  return path;
977}
978
979} // namespace coff
980} // namespace lld
981