InputFiles.cpp revision 321369
1//===- InputFiles.cpp -----------------------------------------------------===//
2//
3//                             The LLVM Linker
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "InputFiles.h"
11#include "Chunks.h"
12#include "Config.h"
13#include "Driver.h"
14#include "Error.h"
15#include "Memory.h"
16#include "SymbolTable.h"
17#include "Symbols.h"
18#include "llvm-c/lto.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/Triple.h"
21#include "llvm/ADT/Twine.h"
22#include "llvm/BinaryFormat/COFF.h"
23#include "llvm/Object/Binary.h"
24#include "llvm/Object/COFF.h"
25#include "llvm/Support/Casting.h"
26#include "llvm/Support/Endian.h"
27#include "llvm/Support/Error.h"
28#include "llvm/Support/ErrorOr.h"
29#include "llvm/Support/FileSystem.h"
30#include "llvm/Target/TargetOptions.h"
31#include <cstring>
32#include <system_error>
33#include <utility>
34
35using namespace llvm;
36using namespace llvm::COFF;
37using namespace llvm::object;
38using namespace llvm::support::endian;
39
40using llvm::Triple;
41using llvm::support::ulittle32_t;
42
43namespace lld {
44namespace coff {
45
46/// Checks that Source is compatible with being a weak alias to Target.
47/// If Source is Undefined and has no weak alias set, makes it a weak
48/// alias to Target.
49static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F,
50                                 SymbolBody *Source, SymbolBody *Target) {
51  if (auto *U = dyn_cast<Undefined>(Source)) {
52    if (U->WeakAlias && U->WeakAlias != Target)
53      Symtab->reportDuplicate(Source->symbol(), F);
54    U->WeakAlias = Target;
55  }
56}
57
58ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {}
59
60void ArchiveFile::parse() {
61  // Parse a MemoryBufferRef as an archive file.
62  File = check(Archive::create(MB), toString(this));
63
64  // Read the symbol table to construct Lazy objects.
65  for (const Archive::Symbol &Sym : File->symbols())
66    Symtab->addLazy(this, Sym);
67}
68
69// Returns a buffer pointing to a member file containing a given symbol.
70void ArchiveFile::addMember(const Archive::Symbol *Sym) {
71  const Archive::Child &C =
72      check(Sym->getMember(),
73            "could not get the member for symbol " + Sym->getName());
74
75  // Return an empty buffer if we have already returned the same buffer.
76  if (!Seen.insert(C.getChildOffset()).second)
77    return;
78
79  Driver->enqueueArchiveMember(C, Sym->getName(), getName());
80}
81
82void ObjectFile::parse() {
83  // Parse a memory buffer as a COFF file.
84  std::unique_ptr<Binary> Bin = check(createBinary(MB), toString(this));
85
86  if (auto *Obj = dyn_cast<COFFObjectFile>(Bin.get())) {
87    Bin.release();
88    COFFObj.reset(Obj);
89  } else {
90    fatal(toString(this) + " is not a COFF file");
91  }
92
93  // Read section and symbol tables.
94  initializeChunks();
95  initializeSymbols();
96  initializeSEH();
97}
98
99void ObjectFile::initializeChunks() {
100  uint32_t NumSections = COFFObj->getNumberOfSections();
101  Chunks.reserve(NumSections);
102  SparseChunks.resize(NumSections + 1);
103  for (uint32_t I = 1; I < NumSections + 1; ++I) {
104    const coff_section *Sec;
105    StringRef Name;
106    if (auto EC = COFFObj->getSection(I, Sec))
107      fatal(EC, "getSection failed: #" + Twine(I));
108    if (auto EC = COFFObj->getSectionName(Sec, Name))
109      fatal(EC, "getSectionName failed: #" + Twine(I));
110    if (Name == ".sxdata") {
111      SXData = Sec;
112      continue;
113    }
114    if (Name == ".drectve") {
115      ArrayRef<uint8_t> Data;
116      COFFObj->getSectionContents(Sec, Data);
117      Directives = std::string((const char *)Data.data(), Data.size());
118      continue;
119    }
120
121    // Object files may have DWARF debug info or MS CodeView debug info
122    // (or both).
123    //
124    // DWARF sections don't need any special handling from the perspective
125    // of the linker; they are just a data section containing relocations.
126    // We can just link them to complete debug info.
127    //
128    // CodeView needs a linker support. We need to interpret and debug
129    // info, and then write it to a separate .pdb file.
130
131    // Ignore debug info unless /debug is given.
132    if (!Config->Debug && Name.startswith(".debug"))
133      continue;
134
135    if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
136      continue;
137    auto *C = make<SectionChunk>(this, Sec);
138
139    // CodeView sections are stored to a different vector because they are not
140    // linked in the regular manner.
141    if (C->isCodeView())
142      DebugChunks.push_back(C);
143    else
144      Chunks.push_back(C);
145
146    SparseChunks[I] = C;
147  }
148}
149
150void ObjectFile::initializeSymbols() {
151  uint32_t NumSymbols = COFFObj->getNumberOfSymbols();
152  SymbolBodies.reserve(NumSymbols);
153  SparseSymbolBodies.resize(NumSymbols);
154
155  SmallVector<std::pair<SymbolBody *, uint32_t>, 8> WeakAliases;
156  int32_t LastSectionNumber = 0;
157
158  for (uint32_t I = 0; I < NumSymbols; ++I) {
159    // Get a COFFSymbolRef object.
160    ErrorOr<COFFSymbolRef> SymOrErr = COFFObj->getSymbol(I);
161    if (!SymOrErr)
162      fatal(SymOrErr.getError(), "broken object file: " + toString(this));
163    COFFSymbolRef Sym = *SymOrErr;
164
165    const void *AuxP = nullptr;
166    if (Sym.getNumberOfAuxSymbols())
167      AuxP = COFFObj->getSymbol(I + 1)->getRawPtr();
168    bool IsFirst = (LastSectionNumber != Sym.getSectionNumber());
169
170    SymbolBody *Body = nullptr;
171    if (Sym.isUndefined()) {
172      Body = createUndefined(Sym);
173    } else if (Sym.isWeakExternal()) {
174      Body = createUndefined(Sym);
175      uint32_t TagIndex =
176          static_cast<const coff_aux_weak_external *>(AuxP)->TagIndex;
177      WeakAliases.emplace_back(Body, TagIndex);
178    } else {
179      Body = createDefined(Sym, AuxP, IsFirst);
180    }
181    if (Body) {
182      SymbolBodies.push_back(Body);
183      SparseSymbolBodies[I] = Body;
184    }
185    I += Sym.getNumberOfAuxSymbols();
186    LastSectionNumber = Sym.getSectionNumber();
187  }
188
189  for (auto &KV : WeakAliases) {
190    SymbolBody *Sym = KV.first;
191    uint32_t Idx = KV.second;
192    checkAndSetWeakAlias(Symtab, this, Sym, SparseSymbolBodies[Idx]);
193  }
194}
195
196SymbolBody *ObjectFile::createUndefined(COFFSymbolRef Sym) {
197  StringRef Name;
198  COFFObj->getSymbolName(Sym, Name);
199  return Symtab->addUndefined(Name, this, Sym.isWeakExternal())->body();
200}
201
202SymbolBody *ObjectFile::createDefined(COFFSymbolRef Sym, const void *AuxP,
203                                      bool IsFirst) {
204  StringRef Name;
205  if (Sym.isCommon()) {
206    auto *C = make<CommonChunk>(Sym);
207    Chunks.push_back(C);
208    COFFObj->getSymbolName(Sym, Name);
209    Symbol *S =
210        Symtab->addCommon(this, Name, Sym.getValue(), Sym.getGeneric(), C);
211    return S->body();
212  }
213  if (Sym.isAbsolute()) {
214    COFFObj->getSymbolName(Sym, Name);
215    // Skip special symbols.
216    if (Name == "@comp.id")
217      return nullptr;
218    // COFF spec 5.10.1. The .sxdata section.
219    if (Name == "@feat.00") {
220      if (Sym.getValue() & 1)
221        SEHCompat = true;
222      return nullptr;
223    }
224    if (Sym.isExternal())
225      return Symtab->addAbsolute(Name, Sym)->body();
226    else
227      return make<DefinedAbsolute>(Name, Sym);
228  }
229  int32_t SectionNumber = Sym.getSectionNumber();
230  if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
231    return nullptr;
232
233  // Reserved sections numbers don't have contents.
234  if (llvm::COFF::isReservedSectionNumber(SectionNumber))
235    fatal("broken object file: " + toString(this));
236
237  // This symbol references a section which is not present in the section
238  // header.
239  if ((uint32_t)SectionNumber >= SparseChunks.size())
240    fatal("broken object file: " + toString(this));
241
242  // Nothing else to do without a section chunk.
243  auto *SC = cast_or_null<SectionChunk>(SparseChunks[SectionNumber]);
244  if (!SC)
245    return nullptr;
246
247  // Handle section definitions
248  if (IsFirst && AuxP) {
249    auto *Aux = reinterpret_cast<const coff_aux_section_definition *>(AuxP);
250    if (Aux->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
251      if (auto *ParentSC = cast_or_null<SectionChunk>(
252              SparseChunks[Aux->getNumber(Sym.isBigObj())])) {
253        ParentSC->addAssociative(SC);
254        // If we already discarded the parent, discard the child.
255        if (ParentSC->isDiscarded())
256          SC->markDiscarded();
257      }
258    SC->Checksum = Aux->CheckSum;
259  }
260
261  DefinedRegular *B;
262  if (Sym.isExternal()) {
263    COFFObj->getSymbolName(Sym, Name);
264    Symbol *S =
265        Symtab->addRegular(this, Name, SC->isCOMDAT(), Sym.getGeneric(), SC);
266    B = cast<DefinedRegular>(S->body());
267  } else
268    B = make<DefinedRegular>(this, /*Name*/ "", SC->isCOMDAT(),
269                             /*IsExternal*/ false, Sym.getGeneric(), SC);
270  if (SC->isCOMDAT() && Sym.getValue() == 0 && !AuxP)
271    SC->setSymbol(B);
272
273  return B;
274}
275
276void ObjectFile::initializeSEH() {
277  if (!SEHCompat || !SXData)
278    return;
279  ArrayRef<uint8_t> A;
280  COFFObj->getSectionContents(SXData, A);
281  if (A.size() % 4 != 0)
282    fatal(".sxdata must be an array of symbol table indices");
283  auto *I = reinterpret_cast<const ulittle32_t *>(A.data());
284  auto *E = reinterpret_cast<const ulittle32_t *>(A.data() + A.size());
285  for (; I != E; ++I)
286    SEHandlers.insert(SparseSymbolBodies[*I]);
287}
288
289MachineTypes ObjectFile::getMachineType() {
290  if (COFFObj)
291    return static_cast<MachineTypes>(COFFObj->getMachine());
292  return IMAGE_FILE_MACHINE_UNKNOWN;
293}
294
295StringRef ltrim1(StringRef S, const char *Chars) {
296  if (!S.empty() && strchr(Chars, S[0]))
297    return S.substr(1);
298  return S;
299}
300
301void ImportFile::parse() {
302  const char *Buf = MB.getBufferStart();
303  const char *End = MB.getBufferEnd();
304  const auto *Hdr = reinterpret_cast<const coff_import_header *>(Buf);
305
306  // Check if the total size is valid.
307  if ((size_t)(End - Buf) != (sizeof(*Hdr) + Hdr->SizeOfData))
308    fatal("broken import library");
309
310  // Read names and create an __imp_ symbol.
311  StringRef Name = Saver.save(StringRef(Buf + sizeof(*Hdr)));
312  StringRef ImpName = Saver.save("__imp_" + Name);
313  const char *NameStart = Buf + sizeof(coff_import_header) + Name.size() + 1;
314  DLLName = StringRef(NameStart);
315  StringRef ExtName;
316  switch (Hdr->getNameType()) {
317  case IMPORT_ORDINAL:
318    ExtName = "";
319    break;
320  case IMPORT_NAME:
321    ExtName = Name;
322    break;
323  case IMPORT_NAME_NOPREFIX:
324    ExtName = ltrim1(Name, "?@_");
325    break;
326  case IMPORT_NAME_UNDECORATE:
327    ExtName = ltrim1(Name, "?@_");
328    ExtName = ExtName.substr(0, ExtName.find('@'));
329    break;
330  }
331
332  this->Hdr = Hdr;
333  ExternalName = ExtName;
334
335  ImpSym = cast<DefinedImportData>(
336      Symtab->addImportData(ImpName, this)->body());
337  if (Hdr->getType() == llvm::COFF::IMPORT_CONST)
338    ConstSym =
339        cast<DefinedImportData>(Symtab->addImportData(Name, this)->body());
340
341  // If type is function, we need to create a thunk which jump to an
342  // address pointed by the __imp_ symbol. (This allows you to call
343  // DLL functions just like regular non-DLL functions.)
344  if (Hdr->getType() != llvm::COFF::IMPORT_CODE)
345    return;
346  ThunkSym = cast<DefinedImportThunk>(
347      Symtab->addImportThunk(Name, ImpSym, Hdr->Machine)->body());
348}
349
350void BitcodeFile::parse() {
351  Obj = check(lto::InputFile::create(MemoryBufferRef(
352      MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier()))));
353  for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) {
354    StringRef SymName = Saver.save(ObjSym.getName());
355    Symbol *Sym;
356    if (ObjSym.isUndefined()) {
357      Sym = Symtab->addUndefined(SymName, this, false);
358    } else if (ObjSym.isCommon()) {
359      Sym = Symtab->addCommon(this, SymName, ObjSym.getCommonSize());
360    } else if (ObjSym.isWeak() && ObjSym.isIndirect()) {
361      // Weak external.
362      Sym = Symtab->addUndefined(SymName, this, true);
363      std::string Fallback = ObjSym.getCOFFWeakExternalFallback();
364      SymbolBody *Alias = Symtab->addUndefined(Saver.save(Fallback));
365      checkAndSetWeakAlias(Symtab, this, Sym->body(), Alias);
366    } else {
367      bool IsCOMDAT = ObjSym.getComdatIndex() != -1;
368      Sym = Symtab->addRegular(this, SymName, IsCOMDAT);
369    }
370    SymbolBodies.push_back(Sym->body());
371  }
372  Directives = Obj->getCOFFLinkerOpts();
373}
374
375MachineTypes BitcodeFile::getMachineType() {
376  switch (Triple(Obj->getTargetTriple()).getArch()) {
377  case Triple::x86_64:
378    return AMD64;
379  case Triple::x86:
380    return I386;
381  case Triple::arm:
382    return ARMNT;
383  case Triple::aarch64:
384    return ARM64;
385  default:
386    return IMAGE_FILE_MACHINE_UNKNOWN;
387  }
388}
389} // namespace coff
390} // namespace lld
391
392// Returns the last element of a path, which is supposed to be a filename.
393static StringRef getBasename(StringRef Path) {
394  size_t Pos = Path.find_last_of("\\/");
395  if (Pos == StringRef::npos)
396    return Path;
397  return Path.substr(Pos + 1);
398}
399
400// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)".
401std::string lld::toString(coff::InputFile *File) {
402  if (!File)
403    return "(internal)";
404  if (File->ParentName.empty())
405    return File->getName().lower();
406
407  std::string Res =
408      (getBasename(File->ParentName) + "(" + getBasename(File->getName()) + ")")
409          .str();
410  return StringRef(Res).lower();
411}
412