1//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_COFF_INPUT_FILES_H
10#define LLD_COFF_INPUT_FILES_H
11
12#include "Config.h"
13#include "lld/Common/LLVM.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/BinaryFormat/Magic.h"
18#include "llvm/Object/Archive.h"
19#include "llvm/Object/COFF.h"
20#include "llvm/Support/StringSaver.h"
21#include <memory>
22#include <set>
23#include <vector>
24
25namespace llvm {
26struct DILineInfo;
27namespace pdb {
28class DbiModuleDescriptorBuilder;
29class NativeSession;
30}
31namespace lto {
32class InputFile;
33}
34}
35
36namespace lld {
37class DWARFCache;
38
39namespace coff {
40
41std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
42
43using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
44using llvm::COFF::MachineTypes;
45using llvm::object::Archive;
46using llvm::object::COFFObjectFile;
47using llvm::object::COFFSymbolRef;
48using llvm::object::coff_import_header;
49using llvm::object::coff_section;
50
51class Chunk;
52class Defined;
53class DefinedImportData;
54class DefinedImportThunk;
55class DefinedRegular;
56class SectionChunk;
57class Symbol;
58class Undefined;
59class TpiSource;
60
61// The root class of input files.
62class InputFile {
63public:
64  enum Kind {
65    ArchiveKind,
66    ObjectKind,
67    LazyObjectKind,
68    PDBKind,
69    ImportKind,
70    BitcodeKind
71  };
72  Kind kind() const { return fileKind; }
73  virtual ~InputFile() {}
74
75  // Returns the filename.
76  StringRef getName() const { return mb.getBufferIdentifier(); }
77
78  // Reads a file (the constructor doesn't do that).
79  virtual void parse() = 0;
80
81  // Returns the CPU type this file was compiled to.
82  virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
83
84  MemoryBufferRef mb;
85
86  // An archive file name if this file is created from an archive.
87  StringRef parentName;
88
89  // Returns .drectve section contents if exist.
90  StringRef getDirectives() { return directives; }
91
92protected:
93  InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
94
95  StringRef directives;
96
97private:
98  const Kind fileKind;
99};
100
101// .lib or .a file.
102class ArchiveFile : public InputFile {
103public:
104  explicit ArchiveFile(MemoryBufferRef m);
105  static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
106  void parse() override;
107
108  // Enqueues an archive member load for the given symbol. If we've already
109  // enqueued a load for the same archive member, this function does nothing,
110  // which ensures that we don't load the same member more than once.
111  void addMember(const Archive::Symbol &sym);
112
113private:
114  std::unique_ptr<Archive> file;
115  llvm::DenseSet<uint64_t> seen;
116};
117
118// .obj or .o file between -start-lib and -end-lib.
119class LazyObjFile : public InputFile {
120public:
121  explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {}
122  static bool classof(const InputFile *f) {
123    return f->kind() == LazyObjectKind;
124  }
125  // Makes this object file part of the link.
126  void fetch();
127  // Adds the symbols in this file to the symbol table as LazyObject symbols.
128  void parse() override;
129
130private:
131  std::vector<Symbol *> symbols;
132};
133
134// .obj or .o file. This may be a member of an archive file.
135class ObjFile : public InputFile {
136public:
137  explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {}
138  explicit ObjFile(MemoryBufferRef m, std::vector<Symbol *> &&symbols)
139      : InputFile(ObjectKind, m), symbols(std::move(symbols)) {}
140  static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
141  void parse() override;
142  MachineTypes getMachineType() override;
143  ArrayRef<Chunk *> getChunks() { return chunks; }
144  ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
145  ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
146  ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
147  ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
148  ArrayRef<Symbol *> getSymbols() { return symbols; }
149
150  ArrayRef<uint8_t> getDebugSection(StringRef secName);
151
152  // Returns a Symbol object for the symbolIndex'th symbol in the
153  // underlying object file.
154  Symbol *getSymbol(uint32_t symbolIndex) {
155    return symbols[symbolIndex];
156  }
157
158  // Returns the underlying COFF file.
159  COFFObjectFile *getCOFFObj() { return coffObj.get(); }
160
161  // Add a symbol for a range extension thunk. Return the new symbol table
162  // index. This index can be used to modify a relocation.
163  uint32_t addRangeThunkSymbol(Symbol *thunk) {
164    symbols.push_back(thunk);
165    return symbols.size() - 1;
166  }
167
168  void includeResourceChunks();
169
170  bool isResourceObjFile() const { return !resourceChunks.empty(); }
171
172  static std::vector<ObjFile *> instances;
173
174  // Flags in the absolute @feat.00 symbol if it is present. These usually
175  // indicate if an object was compiled with certain security features enabled
176  // like stack guard, safeseh, /guard:cf, or other things.
177  uint32_t feat00Flags = 0;
178
179  // True if this object file is compatible with SEH.  COFF-specific and
180  // x86-only. COFF spec 5.10.1. The .sxdata section.
181  bool hasSafeSEH() { return feat00Flags & 0x1; }
182
183  // True if this file was compiled with /guard:cf.
184  bool hasGuardCF() { return feat00Flags & 0x800; }
185
186  // Pointer to the PDB module descriptor builder. Various debug info records
187  // will reference object files by "module index", which is here. Things like
188  // source files and section contributions are also recorded here. Will be null
189  // if we are not producing a PDB.
190  llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
191
192  const coff_section *addrsigSec = nullptr;
193
194  // When using Microsoft precompiled headers, this is the PCH's key.
195  // The same key is used by both the precompiled object, and objects using the
196  // precompiled object. Any difference indicates out-of-date objects.
197  llvm::Optional<uint32_t> pchSignature;
198
199  // Whether this file was compiled with /hotpatch.
200  bool hotPatchable = false;
201
202  // Whether the object was already merged into the final PDB.
203  bool mergedIntoPDB = false;
204
205  // If the OBJ has a .debug$T stream, this tells how it will be handled.
206  TpiSource *debugTypesObj = nullptr;
207
208  // The .debug$P or .debug$T section data if present. Empty otherwise.
209  ArrayRef<uint8_t> debugTypes;
210
211  llvm::Optional<std::pair<StringRef, uint32_t>>
212  getVariableLocation(StringRef var);
213
214  llvm::Optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
215                                                 uint32_t sectionIndex);
216
217private:
218  const coff_section* getSection(uint32_t i);
219  const coff_section *getSection(COFFSymbolRef sym) {
220    return getSection(sym.getSectionNumber());
221  }
222
223  void initializeChunks();
224  void initializeSymbols();
225  void initializeFlags();
226  void initializeDependencies();
227
228  SectionChunk *
229  readSection(uint32_t sectionNumber,
230              const llvm::object::coff_aux_section_definition *def,
231              StringRef leaderName);
232
233  void readAssociativeDefinition(
234      COFFSymbolRef coffSym,
235      const llvm::object::coff_aux_section_definition *def);
236
237  void readAssociativeDefinition(
238      COFFSymbolRef coffSym,
239      const llvm::object::coff_aux_section_definition *def,
240      uint32_t parentSection);
241
242  void recordPrevailingSymbolForMingw(
243      COFFSymbolRef coffSym,
244      llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
245
246  void maybeAssociateSEHForMingw(
247      COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
248      const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
249
250  // Given a new symbol Sym with comdat selection Selection, if the new
251  // symbol is not (yet) Prevailing and the existing comdat leader set to
252  // Leader, emits a diagnostic if the new symbol and its selection doesn't
253  // match the existing symbol and its selection. If either old or new
254  // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
255  // the existing leader. In that case, Prevailing is set to true.
256  void handleComdatSelection(COFFSymbolRef sym,
257                             llvm::COFF::COMDATType &selection,
258                             bool &prevailing, DefinedRegular *leader);
259
260  llvm::Optional<Symbol *>
261  createDefined(COFFSymbolRef sym,
262                std::vector<const llvm::object::coff_aux_section_definition *>
263                    &comdatDefs,
264                bool &prevailingComdat);
265  Symbol *createRegular(COFFSymbolRef sym);
266  Symbol *createUndefined(COFFSymbolRef sym);
267
268  std::unique_ptr<COFFObjectFile> coffObj;
269
270  // List of all chunks defined by this file. This includes both section
271  // chunks and non-section chunks for common symbols.
272  std::vector<Chunk *> chunks;
273
274  std::vector<SectionChunk *> resourceChunks;
275
276  // CodeView debug info sections.
277  std::vector<SectionChunk *> debugChunks;
278
279  // Chunks containing symbol table indices of exception handlers. Only used for
280  // 32-bit x86.
281  std::vector<SectionChunk *> sxDataChunks;
282
283  // Chunks containing symbol table indices of address taken symbols and longjmp
284  // targets.  These are not linked into the final binary when /guard:cf is set.
285  std::vector<SectionChunk *> guardFidChunks;
286  std::vector<SectionChunk *> guardLJmpChunks;
287
288  // This vector contains a list of all symbols defined or referenced by this
289  // file. They are indexed such that you can get a Symbol by symbol
290  // index. Nonexistent indices (which are occupied by auxiliary
291  // symbols in the real symbol table) are filled with null pointers.
292  std::vector<Symbol *> symbols;
293
294  // This vector contains the same chunks as Chunks, but they are
295  // indexed such that you can get a SectionChunk by section index.
296  // Nonexistent section indices are filled with null pointers.
297  // (Because section number is 1-based, the first slot is always a
298  // null pointer.) This vector is only valid during initialization.
299  std::vector<SectionChunk *> sparseChunks;
300
301  DWARFCache *dwarf = nullptr;
302};
303
304// This is a PDB type server dependency, that is not a input file per se, but
305// needs to be treated like one. Such files are discovered from the debug type
306// stream.
307class PDBInputFile : public InputFile {
308public:
309  explicit PDBInputFile(MemoryBufferRef m);
310  ~PDBInputFile();
311  static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
312  void parse() override;
313
314  static void enqueue(StringRef path, ObjFile *fromFile);
315
316  static PDBInputFile *findFromRecordPath(StringRef path, ObjFile *fromFile);
317
318  static std::map<std::string, PDBInputFile *> instances;
319
320  // Record possible errors while opening the PDB file
321  llvm::Optional<Error> loadErr;
322
323  // This is the actual interface to the PDB (if it was opened successfully)
324  std::unique_ptr<llvm::pdb::NativeSession> session;
325
326  // If the PDB has a .debug$T stream, this tells how it will be handled.
327  TpiSource *debugTypesObj = nullptr;
328};
329
330// This type represents import library members that contain DLL names
331// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
332// for details about the format.
333class ImportFile : public InputFile {
334public:
335  explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {}
336
337  static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
338
339  static std::vector<ImportFile *> instances;
340
341  Symbol *impSym = nullptr;
342  Symbol *thunkSym = nullptr;
343  std::string dllName;
344
345private:
346  void parse() override;
347
348public:
349  StringRef externalName;
350  const coff_import_header *hdr;
351  Chunk *location = nullptr;
352
353  // We want to eliminate dllimported symbols if no one actually refers them.
354  // These "Live" bits are used to keep track of which import library members
355  // are actually in use.
356  //
357  // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
358  // symbols provided by this import library member. We also track whether the
359  // imported symbol is used separately from whether the thunk is used in order
360  // to avoid creating unnecessary thunks.
361  bool live = !config->doGC;
362  bool thunkLive = !config->doGC;
363};
364
365// Used for LTO.
366class BitcodeFile : public InputFile {
367public:
368  BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
369              uint64_t offsetInArchive);
370  explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName,
371                       uint64_t offsetInArchive,
372                       std::vector<Symbol *> &&symbols);
373  ~BitcodeFile();
374  static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
375  ArrayRef<Symbol *> getSymbols() { return symbols; }
376  MachineTypes getMachineType() override;
377  static std::vector<BitcodeFile *> instances;
378  std::unique_ptr<llvm::lto::InputFile> obj;
379
380private:
381  void parse() override;
382
383  std::vector<Symbol *> symbols;
384};
385
386inline bool isBitcode(MemoryBufferRef mb) {
387  return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
388}
389
390std::string replaceThinLTOSuffix(StringRef path);
391} // namespace coff
392
393std::string toString(const coff::InputFile *file);
394} // namespace lld
395
396#endif
397