1//===- PDB.cpp ------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "PDB.h"
10#include "Chunks.h"
11#include "Config.h"
12#include "DebugTypes.h"
13#include "Driver.h"
14#include "SymbolTable.h"
15#include "Symbols.h"
16#include "TypeMerger.h"
17#include "Writer.h"
18#include "lld/Common/ErrorHandler.h"
19#include "lld/Common/Timer.h"
20#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
21#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
22#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
23#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
24#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
25#include "llvm/DebugInfo/CodeView/RecordName.h"
26#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
27#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
28#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
29#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
30#include "llvm/DebugInfo/MSF/MSFBuilder.h"
31#include "llvm/DebugInfo/MSF/MSFCommon.h"
32#include "llvm/DebugInfo/PDB/GenericError.h"
33#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
34#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
35#include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
36#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
37#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
38#include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h"
39#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
40#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
41#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
42#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
43#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
44#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
45#include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
46#include "llvm/DebugInfo/PDB/PDB.h"
47#include "llvm/Object/COFF.h"
48#include "llvm/Object/CVDebugRecord.h"
49#include "llvm/Support/BinaryByteStream.h"
50#include "llvm/Support/CRC.h"
51#include "llvm/Support/Endian.h"
52#include "llvm/Support/Errc.h"
53#include "llvm/Support/FormatAdapters.h"
54#include "llvm/Support/FormatVariadic.h"
55#include "llvm/Support/Path.h"
56#include "llvm/Support/ScopedPrinter.h"
57#include <memory>
58
59using namespace llvm;
60using namespace llvm::codeview;
61using namespace lld;
62using namespace lld::coff;
63
64using llvm::object::coff_section;
65
66static ExitOnError exitOnErr;
67
68static Timer totalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root());
69
70static Timer addObjectsTimer("Add Objects", totalPdbLinkTimer);
71static Timer typeMergingTimer("Type Merging", addObjectsTimer);
72static Timer symbolMergingTimer("Symbol Merging", addObjectsTimer);
73static Timer publicsLayoutTimer("Publics Stream Layout", totalPdbLinkTimer);
74static Timer tpiStreamLayoutTimer("TPI Stream Layout", totalPdbLinkTimer);
75static Timer diskCommitTimer("Commit to Disk", totalPdbLinkTimer);
76
77namespace {
78class DebugSHandler;
79
80class PDBLinker {
81  friend DebugSHandler;
82
83public:
84  PDBLinker(SymbolTable *symtab)
85      : symtab(symtab), builder(bAlloc), tMerger(bAlloc) {
86    // This isn't strictly necessary, but link.exe usually puts an empty string
87    // as the first "valid" string in the string table, so we do the same in
88    // order to maintain as much byte-for-byte compatibility as possible.
89    pdbStrTab.insert("");
90  }
91
92  /// Emit the basic PDB structure: initial streams, headers, etc.
93  void initialize(llvm::codeview::DebugInfo *buildId);
94
95  /// Add natvis files specified on the command line.
96  void addNatvisFiles();
97
98  /// Add named streams specified on the command line.
99  void addNamedStreams();
100
101  /// Link CodeView from each object file in the symbol table into the PDB.
102  void addObjectsToPDB();
103
104  /// Add every live, defined public symbol to the PDB.
105  void addPublicsToPDB();
106
107  /// Link info for each import file in the symbol table into the PDB.
108  void addImportFilesToPDB(ArrayRef<OutputSection *> outputSections);
109
110  /// Link CodeView from a single object file into the target (output) PDB.
111  /// When a precompiled headers object is linked, its TPI map might be provided
112  /// externally.
113  void addDebug(TpiSource *source);
114
115  const CVIndexMap *mergeTypeRecords(TpiSource *source, CVIndexMap *localMap);
116
117  void addDebugSymbols(ObjFile *file, const CVIndexMap *indexMap);
118
119  void mergeSymbolRecords(ObjFile *file, const CVIndexMap &indexMap,
120                          std::vector<ulittle32_t *> &stringTableRefs,
121                          BinaryStreamRef symData);
122
123  /// Add the section map and section contributions to the PDB.
124  void addSections(ArrayRef<OutputSection *> outputSections,
125                   ArrayRef<uint8_t> sectionTable);
126
127  /// Write the PDB to disk and store the Guid generated for it in *Guid.
128  void commit(codeview::GUID *guid);
129
130  // Print statistics regarding the final PDB
131  void printStats();
132
133private:
134  SymbolTable *symtab;
135
136  pdb::PDBFileBuilder builder;
137
138  TypeMerger tMerger;
139
140  /// PDBs use a single global string table for filenames in the file checksum
141  /// table.
142  DebugStringTableSubsection pdbStrTab;
143
144  llvm::SmallString<128> nativePath;
145
146  // For statistics
147  uint64_t globalSymbols = 0;
148  uint64_t moduleSymbols = 0;
149  uint64_t publicSymbols = 0;
150};
151
152class DebugSHandler {
153  PDBLinker &linker;
154
155  /// The object file whose .debug$S sections we're processing.
156  ObjFile &file;
157
158  /// The result of merging type indices.
159  const CVIndexMap *indexMap;
160
161  /// The DEBUG_S_STRINGTABLE subsection.  These strings are referred to by
162  /// index from other records in the .debug$S section.  All of these strings
163  /// need to be added to the global PDB string table, and all references to
164  /// these strings need to have their indices re-written to refer to the
165  /// global PDB string table.
166  DebugStringTableSubsectionRef cvStrTab;
167
168  /// The DEBUG_S_FILECHKSMS subsection.  As above, these are referred to
169  /// by other records in the .debug$S section and need to be merged into the
170  /// PDB.
171  DebugChecksumsSubsectionRef checksums;
172
173  /// The DEBUG_S_FRAMEDATA subsection(s).  There can be more than one of
174  /// these and they need not appear in any specific order.  However, they
175  /// contain string table references which need to be re-written, so we
176  /// collect them all here and re-write them after all subsections have been
177  /// discovered and processed.
178  std::vector<DebugFrameDataSubsectionRef> newFpoFrames;
179
180  /// Pointers to raw memory that we determine have string table references
181  /// that need to be re-written.  We first process all .debug$S subsections
182  /// to ensure that we can handle subsections written in any order, building
183  /// up this list as we go.  At the end, we use the string table (which must
184  /// have been discovered by now else it is an error) to re-write these
185  /// references.
186  std::vector<ulittle32_t *> stringTableReferences;
187
188  void mergeInlineeLines(const DebugSubsectionRecord &inlineeLines);
189
190public:
191  DebugSHandler(PDBLinker &linker, ObjFile &file, const CVIndexMap *indexMap)
192      : linker(linker), file(file), indexMap(indexMap) {}
193
194  void handleDebugS(ArrayRef<uint8_t> relocatedDebugContents);
195
196  void finish();
197};
198}
199
200// Visual Studio's debugger requires absolute paths in various places in the
201// PDB to work without additional configuration:
202// https://docs.microsoft.com/en-us/visualstudio/debugger/debug-source-files-common-properties-solution-property-pages-dialog-box
203static void pdbMakeAbsolute(SmallVectorImpl<char> &fileName) {
204  // The default behavior is to produce paths that are valid within the context
205  // of the machine that you perform the link on.  If the linker is running on
206  // a POSIX system, we will output absolute POSIX paths.  If the linker is
207  // running on a Windows system, we will output absolute Windows paths.  If the
208  // user desires any other kind of behavior, they should explicitly pass
209  // /pdbsourcepath, in which case we will treat the exact string the user
210  // passed in as the gospel and not normalize, canonicalize it.
211  if (sys::path::is_absolute(fileName, sys::path::Style::windows) ||
212      sys::path::is_absolute(fileName, sys::path::Style::posix))
213    return;
214
215  // It's not absolute in any path syntax.  Relative paths necessarily refer to
216  // the local file system, so we can make it native without ending up with a
217  // nonsensical path.
218  if (config->pdbSourcePath.empty()) {
219    sys::path::native(fileName);
220    sys::fs::make_absolute(fileName);
221    return;
222  }
223
224  // Try to guess whether /PDBSOURCEPATH is a unix path or a windows path.
225  // Since PDB's are more of a Windows thing, we make this conservative and only
226  // decide that it's a unix path if we're fairly certain.  Specifically, if
227  // it starts with a forward slash.
228  SmallString<128> absoluteFileName = config->pdbSourcePath;
229  sys::path::Style guessedStyle = absoluteFileName.startswith("/")
230                                      ? sys::path::Style::posix
231                                      : sys::path::Style::windows;
232  sys::path::append(absoluteFileName, guessedStyle, fileName);
233  sys::path::native(absoluteFileName, guessedStyle);
234  sys::path::remove_dots(absoluteFileName, true, guessedStyle);
235
236  fileName = std::move(absoluteFileName);
237}
238
239static void addTypeInfo(pdb::TpiStreamBuilder &tpiBuilder,
240                        TypeCollection &typeTable) {
241  // Start the TPI or IPI stream header.
242  tpiBuilder.setVersionHeader(pdb::PdbTpiV80);
243
244  // Flatten the in memory type table and hash each type.
245  typeTable.ForEachRecord([&](TypeIndex ti, const CVType &type) {
246    auto hash = pdb::hashTypeRecord(type);
247    if (auto e = hash.takeError())
248      fatal("type hashing error");
249    tpiBuilder.addTypeRecord(type.RecordData, *hash);
250  });
251}
252
253static bool remapTypeIndex(TypeIndex &ti, ArrayRef<TypeIndex> typeIndexMap) {
254  if (ti.isSimple())
255    return true;
256  if (ti.toArrayIndex() >= typeIndexMap.size())
257    return false;
258  ti = typeIndexMap[ti.toArrayIndex()];
259  return true;
260}
261
262static void remapTypesInSymbolRecord(ObjFile *file, SymbolKind symKind,
263                                     MutableArrayRef<uint8_t> recordBytes,
264                                     const CVIndexMap &indexMap,
265                                     ArrayRef<TiReference> typeRefs) {
266  MutableArrayRef<uint8_t> contents =
267      recordBytes.drop_front(sizeof(RecordPrefix));
268  for (const TiReference &ref : typeRefs) {
269    unsigned byteSize = ref.Count * sizeof(TypeIndex);
270    if (contents.size() < ref.Offset + byteSize)
271      fatal("symbol record too short");
272
273    // This can be an item index or a type index. Choose the appropriate map.
274    ArrayRef<TypeIndex> typeOrItemMap = indexMap.tpiMap;
275    bool isItemIndex = ref.Kind == TiRefKind::IndexRef;
276    if (isItemIndex && indexMap.isTypeServerMap)
277      typeOrItemMap = indexMap.ipiMap;
278
279    MutableArrayRef<TypeIndex> tIs(
280        reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
281    for (TypeIndex &ti : tIs) {
282      if (!remapTypeIndex(ti, typeOrItemMap)) {
283        log("ignoring symbol record of kind 0x" + utohexstr(symKind) + " in " +
284            file->getName() + " with bad " + (isItemIndex ? "item" : "type") +
285            " index 0x" + utohexstr(ti.getIndex()));
286        ti = TypeIndex(SimpleTypeKind::NotTranslated);
287        continue;
288      }
289    }
290  }
291}
292
293static void
294recordStringTableReferenceAtOffset(MutableArrayRef<uint8_t> contents,
295                                   uint32_t offset,
296                                   std::vector<ulittle32_t *> &strTableRefs) {
297  contents =
298      contents.drop_front(offset).take_front(sizeof(support::ulittle32_t));
299  ulittle32_t *index = reinterpret_cast<ulittle32_t *>(contents.data());
300  strTableRefs.push_back(index);
301}
302
303static void
304recordStringTableReferences(SymbolKind kind, MutableArrayRef<uint8_t> contents,
305                            std::vector<ulittle32_t *> &strTableRefs) {
306  // For now we only handle S_FILESTATIC, but we may need the same logic for
307  // S_DEFRANGE and S_DEFRANGE_SUBFIELD.  However, I cannot seem to generate any
308  // PDBs that contain these types of records, so because of the uncertainty
309  // they are omitted here until we can prove that it's necessary.
310  switch (kind) {
311  case SymbolKind::S_FILESTATIC:
312    // FileStaticSym::ModFileOffset
313    recordStringTableReferenceAtOffset(contents, 8, strTableRefs);
314    break;
315  case SymbolKind::S_DEFRANGE:
316  case SymbolKind::S_DEFRANGE_SUBFIELD:
317    log("Not fixing up string table reference in S_DEFRANGE / "
318        "S_DEFRANGE_SUBFIELD record");
319    break;
320  default:
321    break;
322  }
323}
324
325static SymbolKind symbolKind(ArrayRef<uint8_t> recordData) {
326  const RecordPrefix *prefix =
327      reinterpret_cast<const RecordPrefix *>(recordData.data());
328  return static_cast<SymbolKind>(uint16_t(prefix->RecordKind));
329}
330
331/// MSVC translates S_PROC_ID_END to S_END, and S_[LG]PROC32_ID to S_[LG]PROC32
332static void translateIdSymbols(MutableArrayRef<uint8_t> &recordData,
333                               TypeCollection &idTable) {
334  RecordPrefix *prefix = reinterpret_cast<RecordPrefix *>(recordData.data());
335
336  SymbolKind kind = symbolKind(recordData);
337
338  if (kind == SymbolKind::S_PROC_ID_END) {
339    prefix->RecordKind = SymbolKind::S_END;
340    return;
341  }
342
343  // In an object file, GPROC32_ID has an embedded reference which refers to the
344  // single object file type index namespace.  This has already been translated
345  // to the PDB file's ID stream index space, but we need to convert this to a
346  // symbol that refers to the type stream index space.  So we remap again from
347  // ID index space to type index space.
348  if (kind == SymbolKind::S_GPROC32_ID || kind == SymbolKind::S_LPROC32_ID) {
349    SmallVector<TiReference, 1> refs;
350    auto content = recordData.drop_front(sizeof(RecordPrefix));
351    CVSymbol sym(recordData);
352    discoverTypeIndicesInSymbol(sym, refs);
353    assert(refs.size() == 1);
354    assert(refs.front().Count == 1);
355
356    TypeIndex *ti =
357        reinterpret_cast<TypeIndex *>(content.data() + refs[0].Offset);
358    // `ti` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in
359    // the IPI stream, whose `FunctionType` member refers to the TPI stream.
360    // Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and
361    // in both cases we just need the second type index.
362    if (!ti->isSimple() && !ti->isNoneType()) {
363      CVType funcIdData = idTable.getType(*ti);
364      ArrayRef<uint8_t> tiBuf = funcIdData.data().slice(8, 4);
365      assert(tiBuf.size() == 4 && "corrupt LF_[MEM]FUNC_ID record");
366      *ti = *reinterpret_cast<const TypeIndex *>(tiBuf.data());
367    }
368
369    kind = (kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32
370                                              : SymbolKind::S_LPROC32;
371    prefix->RecordKind = uint16_t(kind);
372  }
373}
374
375/// Copy the symbol record. In a PDB, symbol records must be 4 byte aligned.
376/// The object file may not be aligned.
377static MutableArrayRef<uint8_t>
378copyAndAlignSymbol(const CVSymbol &sym, MutableArrayRef<uint8_t> &alignedMem) {
379  size_t size = alignTo(sym.length(), alignOf(CodeViewContainer::Pdb));
380  assert(size >= 4 && "record too short");
381  assert(size <= MaxRecordLength && "record too long");
382  assert(alignedMem.size() >= size && "didn't preallocate enough");
383
384  // Copy the symbol record and zero out any padding bytes.
385  MutableArrayRef<uint8_t> newData = alignedMem.take_front(size);
386  alignedMem = alignedMem.drop_front(size);
387  memcpy(newData.data(), sym.data().data(), sym.length());
388  memset(newData.data() + sym.length(), 0, size - sym.length());
389
390  // Update the record prefix length. It should point to the beginning of the
391  // next record.
392  auto *prefix = reinterpret_cast<RecordPrefix *>(newData.data());
393  prefix->RecordLen = size - 2;
394  return newData;
395}
396
397struct ScopeRecord {
398  ulittle32_t ptrParent;
399  ulittle32_t ptrEnd;
400};
401
402struct SymbolScope {
403  ScopeRecord *openingRecord;
404  uint32_t scopeOffset;
405};
406
407static void scopeStackOpen(SmallVectorImpl<SymbolScope> &stack,
408                           uint32_t curOffset, CVSymbol &sym) {
409  assert(symbolOpensScope(sym.kind()));
410  SymbolScope s;
411  s.scopeOffset = curOffset;
412  s.openingRecord = const_cast<ScopeRecord *>(
413      reinterpret_cast<const ScopeRecord *>(sym.content().data()));
414  s.openingRecord->ptrParent = stack.empty() ? 0 : stack.back().scopeOffset;
415  stack.push_back(s);
416}
417
418static void scopeStackClose(SmallVectorImpl<SymbolScope> &stack,
419                            uint32_t curOffset, InputFile *file) {
420  if (stack.empty()) {
421    warn("symbol scopes are not balanced in " + file->getName());
422    return;
423  }
424  SymbolScope s = stack.pop_back_val();
425  s.openingRecord->ptrEnd = curOffset;
426}
427
428static bool symbolGoesInModuleStream(const CVSymbol &sym, bool isGlobalScope) {
429  switch (sym.kind()) {
430  case SymbolKind::S_GDATA32:
431  case SymbolKind::S_CONSTANT:
432  case SymbolKind::S_GTHREAD32:
433  // We really should not be seeing S_PROCREF and S_LPROCREF in the first place
434  // since they are synthesized by the linker in response to S_GPROC32 and
435  // S_LPROC32, but if we do see them, don't put them in the module stream I
436  // guess.
437  case SymbolKind::S_PROCREF:
438  case SymbolKind::S_LPROCREF:
439    return false;
440  // S_UDT records go in the module stream if it is not a global S_UDT.
441  case SymbolKind::S_UDT:
442    return !isGlobalScope;
443  // S_GDATA32 does not go in the module stream, but S_LDATA32 does.
444  case SymbolKind::S_LDATA32:
445  case SymbolKind::S_LTHREAD32:
446  default:
447    return true;
448  }
449}
450
451static bool symbolGoesInGlobalsStream(const CVSymbol &sym,
452                                      bool isFunctionScope) {
453  switch (sym.kind()) {
454  case SymbolKind::S_CONSTANT:
455  case SymbolKind::S_GDATA32:
456  case SymbolKind::S_GTHREAD32:
457  case SymbolKind::S_GPROC32:
458  case SymbolKind::S_LPROC32:
459  // We really should not be seeing S_PROCREF and S_LPROCREF in the first place
460  // since they are synthesized by the linker in response to S_GPROC32 and
461  // S_LPROC32, but if we do see them, copy them straight through.
462  case SymbolKind::S_PROCREF:
463  case SymbolKind::S_LPROCREF:
464    return true;
465  // Records that go in the globals stream, unless they are function-local.
466  case SymbolKind::S_UDT:
467  case SymbolKind::S_LDATA32:
468  case SymbolKind::S_LTHREAD32:
469    return !isFunctionScope;
470  default:
471    return false;
472  }
473}
474
475static void addGlobalSymbol(pdb::GSIStreamBuilder &builder, uint16_t modIndex,
476                            unsigned symOffset, const CVSymbol &sym) {
477  switch (sym.kind()) {
478  case SymbolKind::S_CONSTANT:
479  case SymbolKind::S_UDT:
480  case SymbolKind::S_GDATA32:
481  case SymbolKind::S_GTHREAD32:
482  case SymbolKind::S_LTHREAD32:
483  case SymbolKind::S_LDATA32:
484  case SymbolKind::S_PROCREF:
485  case SymbolKind::S_LPROCREF:
486    builder.addGlobalSymbol(sym);
487    break;
488  case SymbolKind::S_GPROC32:
489  case SymbolKind::S_LPROC32: {
490    SymbolRecordKind k = SymbolRecordKind::ProcRefSym;
491    if (sym.kind() == SymbolKind::S_LPROC32)
492      k = SymbolRecordKind::LocalProcRef;
493    ProcRefSym ps(k);
494    ps.Module = modIndex;
495    // For some reason, MSVC seems to add one to this value.
496    ++ps.Module;
497    ps.Name = getSymbolName(sym);
498    ps.SumName = 0;
499    ps.SymOffset = symOffset;
500    builder.addGlobalSymbol(ps);
501    break;
502  }
503  default:
504    llvm_unreachable("Invalid symbol kind!");
505  }
506}
507
508void PDBLinker::mergeSymbolRecords(ObjFile *file, const CVIndexMap &indexMap,
509                                   std::vector<ulittle32_t *> &stringTableRefs,
510                                   BinaryStreamRef symData) {
511  ArrayRef<uint8_t> symsBuffer;
512  cantFail(symData.readBytes(0, symData.getLength(), symsBuffer));
513  SmallVector<SymbolScope, 4> scopes;
514
515  // Iterate every symbol to check if any need to be realigned, and if so, how
516  // much space we need to allocate for them.
517  bool needsRealignment = false;
518  unsigned totalRealignedSize = 0;
519  auto ec = forEachCodeViewRecord<CVSymbol>(
520      symsBuffer, [&](CVSymbol sym) -> llvm::Error {
521        unsigned realignedSize =
522            alignTo(sym.length(), alignOf(CodeViewContainer::Pdb));
523        needsRealignment |= realignedSize != sym.length();
524        totalRealignedSize += realignedSize;
525        return Error::success();
526      });
527
528  // If any of the symbol record lengths was corrupt, ignore them all, warn
529  // about it, and move on.
530  if (ec) {
531    warn("corrupt symbol records in " + file->getName());
532    consumeError(std::move(ec));
533    return;
534  }
535
536  // If any symbol needed realignment, allocate enough contiguous memory for
537  // them all. Typically symbol subsections are small enough that this will not
538  // cause fragmentation.
539  MutableArrayRef<uint8_t> alignedSymbolMem;
540  if (needsRealignment) {
541    void *alignedData =
542        bAlloc.Allocate(totalRealignedSize, alignOf(CodeViewContainer::Pdb));
543    alignedSymbolMem = makeMutableArrayRef(
544        reinterpret_cast<uint8_t *>(alignedData), totalRealignedSize);
545  }
546
547  // Iterate again, this time doing the real work.
548  unsigned curSymOffset = file->moduleDBI->getNextSymbolOffset();
549  ArrayRef<uint8_t> bulkSymbols;
550  cantFail(forEachCodeViewRecord<CVSymbol>(
551      symsBuffer, [&](CVSymbol sym) -> llvm::Error {
552        // Align the record if required.
553        MutableArrayRef<uint8_t> recordBytes;
554        if (needsRealignment) {
555          recordBytes = copyAndAlignSymbol(sym, alignedSymbolMem);
556          sym = CVSymbol(recordBytes);
557        } else {
558          // Otherwise, we can actually mutate the symbol directly, since we
559          // copied it to apply relocations.
560          recordBytes = makeMutableArrayRef(
561              const_cast<uint8_t *>(sym.data().data()), sym.length());
562        }
563
564        // Discover type index references in the record. Skip it if we don't
565        // know where they are.
566        SmallVector<TiReference, 32> typeRefs;
567        if (!discoverTypeIndicesInSymbol(sym, typeRefs)) {
568          log("ignoring unknown symbol record with kind 0x" +
569              utohexstr(sym.kind()));
570          return Error::success();
571        }
572
573        // Re-map all the type index references.
574        remapTypesInSymbolRecord(file, sym.kind(), recordBytes, indexMap,
575                                 typeRefs);
576
577        // An object file may have S_xxx_ID symbols, but these get converted to
578        // "real" symbols in a PDB.
579        translateIdSymbols(recordBytes, tMerger.getIDTable());
580        sym = CVSymbol(recordBytes);
581
582        // If this record refers to an offset in the object file's string table,
583        // add that item to the global PDB string table and re-write the index.
584        recordStringTableReferences(sym.kind(), recordBytes, stringTableRefs);
585
586        // Fill in "Parent" and "End" fields by maintaining a stack of scopes.
587        if (symbolOpensScope(sym.kind()))
588          scopeStackOpen(scopes, curSymOffset, sym);
589        else if (symbolEndsScope(sym.kind()))
590          scopeStackClose(scopes, curSymOffset, file);
591
592        // Add the symbol to the globals stream if necessary.  Do this before
593        // adding the symbol to the module since we may need to get the next
594        // symbol offset, and writing to the module's symbol stream will update
595        // that offset.
596        if (symbolGoesInGlobalsStream(sym, !scopes.empty())) {
597          addGlobalSymbol(builder.getGsiBuilder(),
598                          file->moduleDBI->getModuleIndex(), curSymOffset, sym);
599          ++globalSymbols;
600        }
601
602        if (symbolGoesInModuleStream(sym, scopes.empty())) {
603          // Add symbols to the module in bulk. If this symbol is contiguous
604          // with the previous run of symbols to add, combine the ranges. If
605          // not, close the previous range of symbols and start a new one.
606          if (sym.data().data() == bulkSymbols.end()) {
607            bulkSymbols = makeArrayRef(bulkSymbols.data(),
608                                       bulkSymbols.size() + sym.length());
609          } else {
610            file->moduleDBI->addSymbolsInBulk(bulkSymbols);
611            bulkSymbols = recordBytes;
612          }
613          curSymOffset += sym.length();
614          ++moduleSymbols;
615        }
616        return Error::success();
617      }));
618
619  // Add any remaining symbols we've accumulated.
620  file->moduleDBI->addSymbolsInBulk(bulkSymbols);
621}
622
623static pdb::SectionContrib createSectionContrib(const Chunk *c, uint32_t modi) {
624  OutputSection *os = c ? c->getOutputSection() : nullptr;
625  pdb::SectionContrib sc;
626  memset(&sc, 0, sizeof(sc));
627  sc.ISect = os ? os->sectionIndex : llvm::pdb::kInvalidStreamIndex;
628  sc.Off = c && os ? c->getRVA() - os->getRVA() : 0;
629  sc.Size = c ? c->getSize() : -1;
630  if (auto *secChunk = dyn_cast_or_null<SectionChunk>(c)) {
631    sc.Characteristics = secChunk->header->Characteristics;
632    sc.Imod = secChunk->file->moduleDBI->getModuleIndex();
633    ArrayRef<uint8_t> contents = secChunk->getContents();
634    JamCRC crc(0);
635    crc.update(contents);
636    sc.DataCrc = crc.getCRC();
637  } else {
638    sc.Characteristics = os ? os->header.Characteristics : 0;
639    sc.Imod = modi;
640  }
641  sc.RelocCrc = 0; // FIXME
642
643  return sc;
644}
645
646static uint32_t
647translateStringTableIndex(uint32_t objIndex,
648                          const DebugStringTableSubsectionRef &objStrTable,
649                          DebugStringTableSubsection &pdbStrTable) {
650  auto expectedString = objStrTable.getString(objIndex);
651  if (!expectedString) {
652    warn("Invalid string table reference");
653    consumeError(expectedString.takeError());
654    return 0;
655  }
656
657  return pdbStrTable.insert(*expectedString);
658}
659
660void DebugSHandler::handleDebugS(ArrayRef<uint8_t> relocatedDebugContents) {
661  relocatedDebugContents =
662      SectionChunk::consumeDebugMagic(relocatedDebugContents, ".debug$S");
663
664  DebugSubsectionArray subsections;
665  BinaryStreamReader reader(relocatedDebugContents, support::little);
666  exitOnErr(reader.readArray(subsections, relocatedDebugContents.size()));
667
668  // If there is no index map, use an empty one.
669  CVIndexMap tempIndexMap;
670  if (!indexMap)
671    indexMap = &tempIndexMap;
672
673  for (const DebugSubsectionRecord &ss : subsections) {
674    // Ignore subsections with the 'ignore' bit. Some versions of the Visual C++
675    // runtime have subsections with this bit set.
676    if (uint32_t(ss.kind()) & codeview::SubsectionIgnoreFlag)
677      continue;
678
679    switch (ss.kind()) {
680    case DebugSubsectionKind::StringTable: {
681      assert(!cvStrTab.valid() &&
682             "Encountered multiple string table subsections!");
683      exitOnErr(cvStrTab.initialize(ss.getRecordData()));
684      break;
685    }
686    case DebugSubsectionKind::FileChecksums:
687      assert(!checksums.valid() &&
688             "Encountered multiple checksum subsections!");
689      exitOnErr(checksums.initialize(ss.getRecordData()));
690      break;
691    case DebugSubsectionKind::Lines:
692      // We can add the relocated line table directly to the PDB without
693      // modification because the file checksum offsets will stay the same.
694      file.moduleDBI->addDebugSubsection(ss);
695      break;
696    case DebugSubsectionKind::InlineeLines:
697      // The inlinee lines subsection also has file checksum table references
698      // that can be used directly, but it contains function id references that
699      // must be remapped.
700      mergeInlineeLines(ss);
701      break;
702    case DebugSubsectionKind::FrameData: {
703      // We need to re-write string table indices here, so save off all
704      // frame data subsections until we've processed the entire list of
705      // subsections so that we can be sure we have the string table.
706      DebugFrameDataSubsectionRef fds;
707      exitOnErr(fds.initialize(ss.getRecordData()));
708      newFpoFrames.push_back(std::move(fds));
709      break;
710    }
711    case DebugSubsectionKind::Symbols: {
712      linker.mergeSymbolRecords(&file, *indexMap, stringTableReferences,
713                                ss.getRecordData());
714      break;
715    }
716
717    case DebugSubsectionKind::CrossScopeImports:
718    case DebugSubsectionKind::CrossScopeExports:
719      // These appear to relate to cross-module optimization, so we might use
720      // these for ThinLTO.
721      break;
722
723    case DebugSubsectionKind::ILLines:
724    case DebugSubsectionKind::FuncMDTokenMap:
725    case DebugSubsectionKind::TypeMDTokenMap:
726    case DebugSubsectionKind::MergedAssemblyInput:
727      // These appear to relate to .Net assembly info.
728      break;
729
730    case DebugSubsectionKind::CoffSymbolRVA:
731      // Unclear what this is for.
732      break;
733
734    default:
735      warn("ignoring unknown debug$S subsection kind 0x" +
736           utohexstr(uint32_t(ss.kind())) + " in file " + toString(&file));
737      break;
738    }
739  }
740}
741
742static Expected<StringRef>
743getFileName(const DebugStringTableSubsectionRef &strings,
744            const DebugChecksumsSubsectionRef &checksums, uint32_t fileID) {
745  auto iter = checksums.getArray().at(fileID);
746  if (iter == checksums.getArray().end())
747    return make_error<CodeViewError>(cv_error_code::no_records);
748  uint32_t offset = iter->FileNameOffset;
749  return strings.getString(offset);
750}
751
752void DebugSHandler::mergeInlineeLines(
753    const DebugSubsectionRecord &inlineeSubsection) {
754  DebugInlineeLinesSubsectionRef inlineeLines;
755  exitOnErr(inlineeLines.initialize(inlineeSubsection.getRecordData()));
756
757  // Remap type indices in inlinee line records in place.
758  for (const InlineeSourceLine &line : inlineeLines) {
759    TypeIndex &inlinee = *const_cast<TypeIndex *>(&line.Header->Inlinee);
760    ArrayRef<TypeIndex> typeOrItemMap =
761        indexMap->isTypeServerMap ? indexMap->ipiMap : indexMap->tpiMap;
762    if (!remapTypeIndex(inlinee, typeOrItemMap)) {
763      log("bad inlinee line record in " + file.getName() +
764          " with bad inlinee index 0x" + utohexstr(inlinee.getIndex()));
765    }
766  }
767
768  // Add the modified inlinee line subsection directly.
769  file.moduleDBI->addDebugSubsection(inlineeSubsection);
770}
771
772void DebugSHandler::finish() {
773  pdb::DbiStreamBuilder &dbiBuilder = linker.builder.getDbiBuilder();
774
775  // We should have seen all debug subsections across the entire object file now
776  // which means that if a StringTable subsection and Checksums subsection were
777  // present, now is the time to handle them.
778  if (!cvStrTab.valid()) {
779    if (checksums.valid())
780      fatal(".debug$S sections with a checksums subsection must also contain a "
781            "string table subsection");
782
783    if (!stringTableReferences.empty())
784      warn("No StringTable subsection was encountered, but there are string "
785           "table references");
786    return;
787  }
788
789  // Rewrite string table indices in the Fpo Data and symbol records to refer to
790  // the global PDB string table instead of the object file string table.
791  for (DebugFrameDataSubsectionRef &fds : newFpoFrames) {
792    const ulittle32_t *reloc = fds.getRelocPtr();
793    for (codeview::FrameData fd : fds) {
794      fd.RvaStart += *reloc;
795      fd.FrameFunc =
796          translateStringTableIndex(fd.FrameFunc, cvStrTab, linker.pdbStrTab);
797      dbiBuilder.addNewFpoData(fd);
798    }
799  }
800
801  for (ulittle32_t *ref : stringTableReferences)
802    *ref = translateStringTableIndex(*ref, cvStrTab, linker.pdbStrTab);
803
804  // Make a new file checksum table that refers to offsets in the PDB-wide
805  // string table. Generally the string table subsection appears after the
806  // checksum table, so we have to do this after looping over all the
807  // subsections. The new checksum table must have the exact same layout and
808  // size as the original. Otherwise, the file references in the line and
809  // inlinee line tables will be incorrect.
810  auto newChecksums = std::make_unique<DebugChecksumsSubsection>(linker.pdbStrTab);
811  for (FileChecksumEntry &fc : checksums) {
812    SmallString<128> filename =
813        exitOnErr(cvStrTab.getString(fc.FileNameOffset));
814    pdbMakeAbsolute(filename);
815    exitOnErr(dbiBuilder.addModuleSourceFile(*file.moduleDBI, filename));
816    newChecksums->addChecksum(filename, fc.Kind, fc.Checksum);
817  }
818  assert(checksums.getArray().getUnderlyingStream().getLength() ==
819             newChecksums->calculateSerializedSize() &&
820         "file checksum table must have same layout");
821
822  file.moduleDBI->addDebugSubsection(std::move(newChecksums));
823}
824
825static void warnUnusable(InputFile *f, Error e) {
826  if (!config->warnDebugInfoUnusable) {
827    consumeError(std::move(e));
828    return;
829  }
830  auto msg = "Cannot use debug info for '" + toString(f) + "' [LNK4099]";
831  if (e)
832    warn(msg + "\n>>> failed to load reference " + toString(std::move(e)));
833  else
834    warn(msg);
835}
836
837const CVIndexMap *PDBLinker::mergeTypeRecords(TpiSource *source,
838                                              CVIndexMap *localMap) {
839  ScopedTimer t(typeMergingTimer);
840  // Before we can process symbol substreams from .debug$S, we need to process
841  // type information, file checksums, and the string table.  Add type info to
842  // the PDB first, so that we can get the map from object file type and item
843  // indices to PDB type and item indices.
844  Expected<const CVIndexMap *> r = source->mergeDebugT(&tMerger, localMap);
845
846  // If the .debug$T sections fail to merge, assume there is no debug info.
847  if (!r) {
848    warnUnusable(source->file, r.takeError());
849    return nullptr;
850  }
851  return *r;
852}
853
854// Allocate memory for a .debug$S / .debug$F section and relocate it.
855static ArrayRef<uint8_t> relocateDebugChunk(SectionChunk &debugChunk) {
856  uint8_t *buffer = bAlloc.Allocate<uint8_t>(debugChunk.getSize());
857  assert(debugChunk.getOutputSectionIdx() == 0 &&
858         "debug sections should not be in output sections");
859  debugChunk.writeTo(buffer);
860  return makeArrayRef(buffer, debugChunk.getSize());
861}
862
863void PDBLinker::addDebugSymbols(ObjFile *file, const CVIndexMap *indexMap) {
864  ScopedTimer t(symbolMergingTimer);
865  pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
866  DebugSHandler dsh(*this, *file, indexMap);
867  // Now do all live .debug$S and .debug$F sections.
868  for (SectionChunk *debugChunk : file->getDebugChunks()) {
869    if (!debugChunk->live || debugChunk->getSize() == 0)
870      continue;
871
872    bool isDebugS = debugChunk->getSectionName() == ".debug$S";
873    bool isDebugF = debugChunk->getSectionName() == ".debug$F";
874    if (!isDebugS && !isDebugF)
875      continue;
876
877    ArrayRef<uint8_t> relocatedDebugContents = relocateDebugChunk(*debugChunk);
878
879    if (isDebugS) {
880      dsh.handleDebugS(relocatedDebugContents);
881    } else if (isDebugF) {
882      FixedStreamArray<object::FpoData> fpoRecords;
883      BinaryStreamReader reader(relocatedDebugContents, support::little);
884      uint32_t count = relocatedDebugContents.size() / sizeof(object::FpoData);
885      exitOnErr(reader.readArray(fpoRecords, count));
886
887      // These are already relocated and don't refer to the string table, so we
888      // can just copy it.
889      for (const object::FpoData &fd : fpoRecords)
890        dbiBuilder.addOldFpoData(fd);
891    }
892  }
893
894  // Do any post-processing now that all .debug$S sections have been processed.
895  dsh.finish();
896}
897
898// Add a module descriptor for every object file. We need to put an absolute
899// path to the object into the PDB. If this is a plain object, we make its
900// path absolute. If it's an object in an archive, we make the archive path
901// absolute.
902static void createModuleDBI(pdb::PDBFileBuilder &builder, ObjFile *file) {
903  pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
904  SmallString<128> objName;
905
906  bool inArchive = !file->parentName.empty();
907  objName = inArchive ? file->parentName : file->getName();
908  pdbMakeAbsolute(objName);
909  StringRef modName = inArchive ? file->getName() : StringRef(objName);
910
911  file->moduleDBI = &exitOnErr(dbiBuilder.addModuleInfo(modName));
912  file->moduleDBI->setObjFileName(objName);
913
914  ArrayRef<Chunk *> chunks = file->getChunks();
915  uint32_t modi = file->moduleDBI->getModuleIndex();
916
917  for (Chunk *c : chunks) {
918    auto *secChunk = dyn_cast<SectionChunk>(c);
919    if (!secChunk || !secChunk->live)
920      continue;
921    pdb::SectionContrib sc = createSectionContrib(secChunk, modi);
922    file->moduleDBI->setFirstSectionContrib(sc);
923    break;
924  }
925}
926
927void PDBLinker::addDebug(TpiSource *source) {
928  CVIndexMap localMap;
929  const CVIndexMap *indexMap = mergeTypeRecords(source, &localMap);
930
931  if (source->kind == TpiSource::PDB)
932    return; // No symbols in TypeServer PDBs
933
934  addDebugSymbols(source->file, indexMap);
935}
936
937static pdb::BulkPublic createPublic(Defined *def) {
938  pdb::BulkPublic pub;
939  pub.Name = def->getName().data();
940  pub.NameLen = def->getName().size();
941
942  PublicSymFlags flags = PublicSymFlags::None;
943  if (auto *d = dyn_cast<DefinedCOFF>(def)) {
944    if (d->getCOFFSymbol().isFunctionDefinition())
945      flags = PublicSymFlags::Function;
946  } else if (isa<DefinedImportThunk>(def)) {
947    flags = PublicSymFlags::Function;
948  }
949  pub.setFlags(flags);
950
951  OutputSection *os = def->getChunk()->getOutputSection();
952  assert(os && "all publics should be in final image");
953  pub.Offset = def->getRVA() - os->getRVA();
954  pub.Segment = os->sectionIndex;
955  return pub;
956}
957
958// Add all object files to the PDB. Merge .debug$T sections into IpiData and
959// TpiData.
960void PDBLinker::addObjectsToPDB() {
961  ScopedTimer t1(addObjectsTimer);
962
963  // Create module descriptors
964  for_each(ObjFile::instances,
965           [&](ObjFile *obj) { createModuleDBI(builder, obj); });
966
967  // Merge OBJs that do not have debug types
968  for_each(ObjFile::instances, [&](ObjFile *obj) {
969    if (obj->debugTypesObj)
970      return;
971    // Even if there're no types, still merge non-symbol .Debug$S and .Debug$F
972    // sections
973    addDebugSymbols(obj, nullptr);
974  });
975
976  // Merge dependencies
977  TpiSource::forEachSource([&](TpiSource *source) {
978    if (source->isDependency())
979      addDebug(source);
980  });
981
982  // Merge regular and dependent OBJs
983  TpiSource::forEachSource([&](TpiSource *source) {
984    if (!source->isDependency())
985      addDebug(source);
986  });
987
988  builder.getStringTableBuilder().setStrings(pdbStrTab);
989  t1.stop();
990
991  // Construct TPI and IPI stream contents.
992  ScopedTimer t2(tpiStreamLayoutTimer);
993  addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable());
994  addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable());
995  t2.stop();
996}
997
998void PDBLinker::addPublicsToPDB() {
999  ScopedTimer t3(publicsLayoutTimer);
1000  // Compute the public symbols.
1001  auto &gsiBuilder = builder.getGsiBuilder();
1002  std::vector<pdb::BulkPublic> publics;
1003  symtab->forEachSymbol([&publics](Symbol *s) {
1004    // Only emit external, defined, live symbols that have a chunk. Static,
1005    // non-external symbols do not appear in the symbol table.
1006    auto *def = dyn_cast<Defined>(s);
1007    if (def && def->isLive() && def->getChunk())
1008      publics.push_back(createPublic(def));
1009  });
1010
1011  if (!publics.empty()) {
1012    publicSymbols = publics.size();
1013    gsiBuilder.addPublicSymbols(std::move(publics));
1014  }
1015}
1016
1017void PDBLinker::printStats() {
1018  if (!config->showSummary)
1019    return;
1020
1021  SmallString<256> buffer;
1022  raw_svector_ostream stream(buffer);
1023
1024  stream << center_justify("Summary", 80) << '\n'
1025         << std::string(80, '-') << '\n';
1026
1027  auto print = [&](uint64_t v, StringRef s) {
1028    stream << format_decimal(v, 15) << " " << s << '\n';
1029  };
1030
1031  print(ObjFile::instances.size(),
1032        "Input OBJ files (expanded from all cmd-line inputs)");
1033  print(TpiSource::countTypeServerPDBs(), "PDB type server dependencies");
1034  print(TpiSource::countPrecompObjs(), "Precomp OBJ dependencies");
1035  print(tMerger.getTypeTable().size() + tMerger.getIDTable().size(),
1036        "Merged TPI records");
1037  print(pdbStrTab.size(), "Output PDB strings");
1038  print(globalSymbols, "Global symbol records");
1039  print(moduleSymbols, "Module symbol records");
1040  print(publicSymbols, "Public symbol records");
1041
1042  auto printLargeInputTypeRecs = [&](StringRef name,
1043                                     ArrayRef<uint32_t> recCounts,
1044                                     TypeCollection &records) {
1045    // Figure out which type indices were responsible for the most duplicate
1046    // bytes in the input files. These should be frequently emitted LF_CLASS and
1047    // LF_FIELDLIST records.
1048    struct TypeSizeInfo {
1049      uint32_t typeSize;
1050      uint32_t dupCount;
1051      TypeIndex typeIndex;
1052      uint64_t totalInputSize() const { return uint64_t(dupCount) * typeSize; }
1053      bool operator<(const TypeSizeInfo &rhs) const {
1054        if (totalInputSize() == rhs.totalInputSize())
1055          return typeIndex < rhs.typeIndex;
1056        return totalInputSize() < rhs.totalInputSize();
1057      }
1058    };
1059    SmallVector<TypeSizeInfo, 0> tsis;
1060    for (auto e : enumerate(recCounts)) {
1061      TypeIndex typeIndex = TypeIndex::fromArrayIndex(e.index());
1062      uint32_t typeSize = records.getType(typeIndex).length();
1063      uint32_t dupCount = e.value();
1064      tsis.push_back({typeSize, dupCount, typeIndex});
1065    }
1066
1067    if (!tsis.empty()) {
1068      stream << "\nTop 10 types responsible for the most " << name
1069             << " input:\n";
1070      stream << "       index     total bytes   count     size\n";
1071      llvm::sort(tsis);
1072      unsigned i = 0;
1073      for (const auto &tsi : reverse(tsis)) {
1074        stream << formatv("  {0,10:X}: {1,14:N} = {2,5:N} * {3,6:N}\n",
1075                          tsi.typeIndex.getIndex(), tsi.totalInputSize(),
1076                          tsi.dupCount, tsi.typeSize);
1077        if (++i >= 10)
1078          break;
1079      }
1080      stream
1081          << "Run llvm-pdbutil to print details about a particular record:\n";
1082      stream << formatv("llvm-pdbutil dump -{0}s -{0}-index {1:X} {2}\n",
1083                        (name == "TPI" ? "type" : "id"),
1084                        tsis.back().typeIndex.getIndex(), config->pdbPath);
1085    }
1086  };
1087
1088  printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable());
1089  printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable());
1090
1091  message(buffer);
1092}
1093
1094void PDBLinker::addNatvisFiles() {
1095  for (StringRef file : config->natvisFiles) {
1096    ErrorOr<std::unique_ptr<MemoryBuffer>> dataOrErr =
1097        MemoryBuffer::getFile(file);
1098    if (!dataOrErr) {
1099      warn("Cannot open input file: " + file);
1100      continue;
1101    }
1102    builder.addInjectedSource(file, std::move(*dataOrErr));
1103  }
1104}
1105
1106void PDBLinker::addNamedStreams() {
1107  for (const auto &streamFile : config->namedStreams) {
1108    const StringRef stream = streamFile.getKey(), file = streamFile.getValue();
1109    ErrorOr<std::unique_ptr<MemoryBuffer>> dataOrErr =
1110        MemoryBuffer::getFile(file);
1111    if (!dataOrErr) {
1112      warn("Cannot open input file: " + file);
1113      continue;
1114    }
1115    exitOnErr(builder.addNamedStream(stream, (*dataOrErr)->getBuffer()));
1116  }
1117}
1118
1119static codeview::CPUType toCodeViewMachine(COFF::MachineTypes machine) {
1120  switch (machine) {
1121  case COFF::IMAGE_FILE_MACHINE_AMD64:
1122    return codeview::CPUType::X64;
1123  case COFF::IMAGE_FILE_MACHINE_ARM:
1124    return codeview::CPUType::ARM7;
1125  case COFF::IMAGE_FILE_MACHINE_ARM64:
1126    return codeview::CPUType::ARM64;
1127  case COFF::IMAGE_FILE_MACHINE_ARMNT:
1128    return codeview::CPUType::ARMNT;
1129  case COFF::IMAGE_FILE_MACHINE_I386:
1130    return codeview::CPUType::Intel80386;
1131  default:
1132    llvm_unreachable("Unsupported CPU Type");
1133  }
1134}
1135
1136// Mimic MSVC which surrounds arguments containing whitespace with quotes.
1137// Double double-quotes are handled, so that the resulting string can be
1138// executed again on the cmd-line.
1139static std::string quote(ArrayRef<StringRef> args) {
1140  std::string r;
1141  r.reserve(256);
1142  for (StringRef a : args) {
1143    if (!r.empty())
1144      r.push_back(' ');
1145    bool hasWS = a.find(' ') != StringRef::npos;
1146    bool hasQ = a.find('"') != StringRef::npos;
1147    if (hasWS || hasQ)
1148      r.push_back('"');
1149    if (hasQ) {
1150      SmallVector<StringRef, 4> s;
1151      a.split(s, '"');
1152      r.append(join(s, "\"\""));
1153    } else {
1154      r.append(std::string(a));
1155    }
1156    if (hasWS || hasQ)
1157      r.push_back('"');
1158  }
1159  return r;
1160}
1161
1162static void fillLinkerVerRecord(Compile3Sym &cs) {
1163  cs.Machine = toCodeViewMachine(config->machine);
1164  // Interestingly, if we set the string to 0.0.0.0, then when trying to view
1165  // local variables WinDbg emits an error that private symbols are not present.
1166  // By setting this to a valid MSVC linker version string, local variables are
1167  // displayed properly.   As such, even though it is not representative of
1168  // LLVM's version information, we need this for compatibility.
1169  cs.Flags = CompileSym3Flags::None;
1170  cs.VersionBackendBuild = 25019;
1171  cs.VersionBackendMajor = 14;
1172  cs.VersionBackendMinor = 10;
1173  cs.VersionBackendQFE = 0;
1174
1175  // MSVC also sets the frontend to 0.0.0.0 since this is specifically for the
1176  // linker module (which is by definition a backend), so we don't need to do
1177  // anything here.  Also, it seems we can use "LLVM Linker" for the linker name
1178  // without any problems.  Only the backend version has to be hardcoded to a
1179  // magic number.
1180  cs.VersionFrontendBuild = 0;
1181  cs.VersionFrontendMajor = 0;
1182  cs.VersionFrontendMinor = 0;
1183  cs.VersionFrontendQFE = 0;
1184  cs.Version = "LLVM Linker";
1185  cs.setLanguage(SourceLanguage::Link);
1186}
1187
1188static void addCommonLinkerModuleSymbols(StringRef path,
1189                                         pdb::DbiModuleDescriptorBuilder &mod) {
1190  ObjNameSym ons(SymbolRecordKind::ObjNameSym);
1191  EnvBlockSym ebs(SymbolRecordKind::EnvBlockSym);
1192  Compile3Sym cs(SymbolRecordKind::Compile3Sym);
1193  fillLinkerVerRecord(cs);
1194
1195  ons.Name = "* Linker *";
1196  ons.Signature = 0;
1197
1198  ArrayRef<StringRef> args = makeArrayRef(config->argv).drop_front();
1199  std::string argStr = quote(args);
1200  ebs.Fields.push_back("cwd");
1201  SmallString<64> cwd;
1202  if (config->pdbSourcePath.empty())
1203    sys::fs::current_path(cwd);
1204  else
1205    cwd = config->pdbSourcePath;
1206  ebs.Fields.push_back(cwd);
1207  ebs.Fields.push_back("exe");
1208  SmallString<64> exe = config->argv[0];
1209  pdbMakeAbsolute(exe);
1210  ebs.Fields.push_back(exe);
1211  ebs.Fields.push_back("pdb");
1212  ebs.Fields.push_back(path);
1213  ebs.Fields.push_back("cmd");
1214  ebs.Fields.push_back(argStr);
1215  mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
1216      ons, bAlloc, CodeViewContainer::Pdb));
1217  mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
1218      cs, bAlloc, CodeViewContainer::Pdb));
1219  mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
1220      ebs, bAlloc, CodeViewContainer::Pdb));
1221}
1222
1223static void addLinkerModuleCoffGroup(PartialSection *sec,
1224                                     pdb::DbiModuleDescriptorBuilder &mod,
1225                                     OutputSection &os) {
1226  // If there's a section, there's at least one chunk
1227  assert(!sec->chunks.empty());
1228  const Chunk *firstChunk = *sec->chunks.begin();
1229  const Chunk *lastChunk = *sec->chunks.rbegin();
1230
1231  // Emit COFF group
1232  CoffGroupSym cgs(SymbolRecordKind::CoffGroupSym);
1233  cgs.Name = sec->name;
1234  cgs.Segment = os.sectionIndex;
1235  cgs.Offset = firstChunk->getRVA() - os.getRVA();
1236  cgs.Size = lastChunk->getRVA() + lastChunk->getSize() - firstChunk->getRVA();
1237  cgs.Characteristics = sec->characteristics;
1238
1239  // Somehow .idata sections & sections groups in the debug symbol stream have
1240  // the "write" flag set. However the section header for the corresponding
1241  // .idata section doesn't have it.
1242  if (cgs.Name.startswith(".idata"))
1243    cgs.Characteristics |= llvm::COFF::IMAGE_SCN_MEM_WRITE;
1244
1245  mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
1246      cgs, bAlloc, CodeViewContainer::Pdb));
1247}
1248
1249static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &mod,
1250                                         OutputSection &os) {
1251  SectionSym sym(SymbolRecordKind::SectionSym);
1252  sym.Alignment = 12; // 2^12 = 4KB
1253  sym.Characteristics = os.header.Characteristics;
1254  sym.Length = os.getVirtualSize();
1255  sym.Name = os.name;
1256  sym.Rva = os.getRVA();
1257  sym.SectionNumber = os.sectionIndex;
1258  mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
1259      sym, bAlloc, CodeViewContainer::Pdb));
1260
1261  // Skip COFF groups in MinGW because it adds a significant footprint to the
1262  // PDB, due to each function being in its own section
1263  if (config->mingw)
1264    return;
1265
1266  // Output COFF groups for individual chunks of this section.
1267  for (PartialSection *sec : os.contribSections) {
1268    addLinkerModuleCoffGroup(sec, mod, os);
1269  }
1270}
1271
1272// Add all import files as modules to the PDB.
1273void PDBLinker::addImportFilesToPDB(ArrayRef<OutputSection *> outputSections) {
1274  if (ImportFile::instances.empty())
1275    return;
1276
1277  std::map<std::string, llvm::pdb::DbiModuleDescriptorBuilder *> dllToModuleDbi;
1278
1279  for (ImportFile *file : ImportFile::instances) {
1280    if (!file->live)
1281      continue;
1282
1283    if (!file->thunkSym)
1284      continue;
1285
1286    if (!file->thunkLive)
1287        continue;
1288
1289    std::string dll = StringRef(file->dllName).lower();
1290    llvm::pdb::DbiModuleDescriptorBuilder *&mod = dllToModuleDbi[dll];
1291    if (!mod) {
1292      pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
1293      SmallString<128> libPath = file->parentName;
1294      pdbMakeAbsolute(libPath);
1295      sys::path::native(libPath);
1296
1297      // Name modules similar to MSVC's link.exe.
1298      // The first module is the simple dll filename
1299      llvm::pdb::DbiModuleDescriptorBuilder &firstMod =
1300          exitOnErr(dbiBuilder.addModuleInfo(file->dllName));
1301      firstMod.setObjFileName(libPath);
1302      pdb::SectionContrib sc =
1303          createSectionContrib(nullptr, llvm::pdb::kInvalidStreamIndex);
1304      firstMod.setFirstSectionContrib(sc);
1305
1306      // The second module is where the import stream goes.
1307      mod = &exitOnErr(dbiBuilder.addModuleInfo("Import:" + file->dllName));
1308      mod->setObjFileName(libPath);
1309    }
1310
1311    DefinedImportThunk *thunk = cast<DefinedImportThunk>(file->thunkSym);
1312    Chunk *thunkChunk = thunk->getChunk();
1313    OutputSection *thunkOS = thunkChunk->getOutputSection();
1314
1315    ObjNameSym ons(SymbolRecordKind::ObjNameSym);
1316    Compile3Sym cs(SymbolRecordKind::Compile3Sym);
1317    Thunk32Sym ts(SymbolRecordKind::Thunk32Sym);
1318    ScopeEndSym es(SymbolRecordKind::ScopeEndSym);
1319
1320    ons.Name = file->dllName;
1321    ons.Signature = 0;
1322
1323    fillLinkerVerRecord(cs);
1324
1325    ts.Name = thunk->getName();
1326    ts.Parent = 0;
1327    ts.End = 0;
1328    ts.Next = 0;
1329    ts.Thunk = ThunkOrdinal::Standard;
1330    ts.Length = thunkChunk->getSize();
1331    ts.Segment = thunkOS->sectionIndex;
1332    ts.Offset = thunkChunk->getRVA() - thunkOS->getRVA();
1333
1334    mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol(
1335        ons, bAlloc, CodeViewContainer::Pdb));
1336    mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol(
1337        cs, bAlloc, CodeViewContainer::Pdb));
1338
1339    SmallVector<SymbolScope, 4> scopes;
1340    CVSymbol newSym = codeview::SymbolSerializer::writeOneSymbol(
1341        ts, bAlloc, CodeViewContainer::Pdb);
1342    scopeStackOpen(scopes, mod->getNextSymbolOffset(), newSym);
1343
1344    mod->addSymbol(newSym);
1345
1346    newSym = codeview::SymbolSerializer::writeOneSymbol(es, bAlloc,
1347                                                        CodeViewContainer::Pdb);
1348    scopeStackClose(scopes, mod->getNextSymbolOffset(), file);
1349
1350    mod->addSymbol(newSym);
1351
1352    pdb::SectionContrib sc =
1353        createSectionContrib(thunk->getChunk(), mod->getModuleIndex());
1354    mod->setFirstSectionContrib(sc);
1355  }
1356}
1357
1358// Creates a PDB file.
1359void lld::coff::createPDB(SymbolTable *symtab,
1360                          ArrayRef<OutputSection *> outputSections,
1361                          ArrayRef<uint8_t> sectionTable,
1362                          llvm::codeview::DebugInfo *buildId) {
1363  ScopedTimer t1(totalPdbLinkTimer);
1364  PDBLinker pdb(symtab);
1365
1366  pdb.initialize(buildId);
1367  pdb.addObjectsToPDB();
1368  pdb.addImportFilesToPDB(outputSections);
1369  pdb.addSections(outputSections, sectionTable);
1370  pdb.addNatvisFiles();
1371  pdb.addNamedStreams();
1372  pdb.addPublicsToPDB();
1373
1374  ScopedTimer t2(diskCommitTimer);
1375  codeview::GUID guid;
1376  pdb.commit(&guid);
1377  memcpy(&buildId->PDB70.Signature, &guid, 16);
1378
1379  t2.stop();
1380  t1.stop();
1381  pdb.printStats();
1382}
1383
1384void PDBLinker::initialize(llvm::codeview::DebugInfo *buildId) {
1385  exitOnErr(builder.initialize(4096)); // 4096 is blocksize
1386
1387  buildId->Signature.CVSignature = OMF::Signature::PDB70;
1388  // Signature is set to a hash of the PDB contents when the PDB is done.
1389  memset(buildId->PDB70.Signature, 0, 16);
1390  buildId->PDB70.Age = 1;
1391
1392  // Create streams in MSF for predefined streams, namely
1393  // PDB, TPI, DBI and IPI.
1394  for (int i = 0; i < (int)pdb::kSpecialStreamCount; ++i)
1395    exitOnErr(builder.getMsfBuilder().addStream(0));
1396
1397  // Add an Info stream.
1398  auto &infoBuilder = builder.getInfoBuilder();
1399  infoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70);
1400  infoBuilder.setHashPDBContentsToGUID(true);
1401
1402  // Add an empty DBI stream.
1403  pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
1404  dbiBuilder.setAge(buildId->PDB70.Age);
1405  dbiBuilder.setVersionHeader(pdb::PdbDbiV70);
1406  dbiBuilder.setMachineType(config->machine);
1407  // Technically we are not link.exe 14.11, but there are known cases where
1408  // debugging tools on Windows expect Microsoft-specific version numbers or
1409  // they fail to work at all.  Since we know we produce PDBs that are
1410  // compatible with LINK 14.11, we set that version number here.
1411  dbiBuilder.setBuildNumber(14, 11);
1412}
1413
1414void PDBLinker::addSections(ArrayRef<OutputSection *> outputSections,
1415                            ArrayRef<uint8_t> sectionTable) {
1416  // It's not entirely clear what this is, but the * Linker * module uses it.
1417  pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
1418  nativePath = config->pdbPath;
1419  pdbMakeAbsolute(nativePath);
1420  uint32_t pdbFilePathNI = dbiBuilder.addECName(nativePath);
1421  auto &linkerModule = exitOnErr(dbiBuilder.addModuleInfo("* Linker *"));
1422  linkerModule.setPdbFilePathNI(pdbFilePathNI);
1423  addCommonLinkerModuleSymbols(nativePath, linkerModule);
1424
1425  // Add section contributions. They must be ordered by ascending RVA.
1426  for (OutputSection *os : outputSections) {
1427    addLinkerModuleSectionSymbol(linkerModule, *os);
1428    for (Chunk *c : os->chunks) {
1429      pdb::SectionContrib sc =
1430          createSectionContrib(c, linkerModule.getModuleIndex());
1431      builder.getDbiBuilder().addSectionContrib(sc);
1432    }
1433  }
1434
1435  // The * Linker * first section contrib is only used along with /INCREMENTAL,
1436  // to provide trampolines thunks for incremental function patching. Set this
1437  // as "unused" because LLD doesn't support /INCREMENTAL link.
1438  pdb::SectionContrib sc =
1439      createSectionContrib(nullptr, llvm::pdb::kInvalidStreamIndex);
1440  linkerModule.setFirstSectionContrib(sc);
1441
1442  // Add Section Map stream.
1443  ArrayRef<object::coff_section> sections = {
1444      (const object::coff_section *)sectionTable.data(),
1445      sectionTable.size() / sizeof(object::coff_section)};
1446  dbiBuilder.createSectionMap(sections);
1447
1448  // Add COFF section header stream.
1449  exitOnErr(
1450      dbiBuilder.addDbgStream(pdb::DbgHeaderType::SectionHdr, sectionTable));
1451}
1452
1453void PDBLinker::commit(codeview::GUID *guid) {
1454  ExitOnError exitOnErr((config->pdbPath + ": ").str());
1455  // Write to a file.
1456  exitOnErr(builder.commit(config->pdbPath, guid));
1457}
1458
1459static uint32_t getSecrelReloc() {
1460  switch (config->machine) {
1461  case AMD64:
1462    return COFF::IMAGE_REL_AMD64_SECREL;
1463  case I386:
1464    return COFF::IMAGE_REL_I386_SECREL;
1465  case ARMNT:
1466    return COFF::IMAGE_REL_ARM_SECREL;
1467  case ARM64:
1468    return COFF::IMAGE_REL_ARM64_SECREL;
1469  default:
1470    llvm_unreachable("unknown machine type");
1471  }
1472}
1473
1474// Try to find a line table for the given offset Addr into the given chunk C.
1475// If a line table was found, the line table, the string and checksum tables
1476// that are used to interpret the line table, and the offset of Addr in the line
1477// table are stored in the output arguments. Returns whether a line table was
1478// found.
1479static bool findLineTable(const SectionChunk *c, uint32_t addr,
1480                          DebugStringTableSubsectionRef &cvStrTab,
1481                          DebugChecksumsSubsectionRef &checksums,
1482                          DebugLinesSubsectionRef &lines,
1483                          uint32_t &offsetInLinetable) {
1484  ExitOnError exitOnErr;
1485  uint32_t secrelReloc = getSecrelReloc();
1486
1487  for (SectionChunk *dbgC : c->file->getDebugChunks()) {
1488    if (dbgC->getSectionName() != ".debug$S")
1489      continue;
1490
1491    // Build a mapping of SECREL relocations in dbgC that refer to `c`.
1492    DenseMap<uint32_t, uint32_t> secrels;
1493    for (const coff_relocation &r : dbgC->getRelocs()) {
1494      if (r.Type != secrelReloc)
1495        continue;
1496
1497      if (auto *s = dyn_cast_or_null<DefinedRegular>(
1498              c->file->getSymbols()[r.SymbolTableIndex]))
1499        if (s->getChunk() == c)
1500          secrels[r.VirtualAddress] = s->getValue();
1501    }
1502
1503    ArrayRef<uint8_t> contents =
1504        SectionChunk::consumeDebugMagic(dbgC->getContents(), ".debug$S");
1505    DebugSubsectionArray subsections;
1506    BinaryStreamReader reader(contents, support::little);
1507    exitOnErr(reader.readArray(subsections, contents.size()));
1508
1509    for (const DebugSubsectionRecord &ss : subsections) {
1510      switch (ss.kind()) {
1511      case DebugSubsectionKind::StringTable: {
1512        assert(!cvStrTab.valid() &&
1513               "Encountered multiple string table subsections!");
1514        exitOnErr(cvStrTab.initialize(ss.getRecordData()));
1515        break;
1516      }
1517      case DebugSubsectionKind::FileChecksums:
1518        assert(!checksums.valid() &&
1519               "Encountered multiple checksum subsections!");
1520        exitOnErr(checksums.initialize(ss.getRecordData()));
1521        break;
1522      case DebugSubsectionKind::Lines: {
1523        ArrayRef<uint8_t> bytes;
1524        auto ref = ss.getRecordData();
1525        exitOnErr(ref.readLongestContiguousChunk(0, bytes));
1526        size_t offsetInDbgC = bytes.data() - dbgC->getContents().data();
1527
1528        // Check whether this line table refers to C.
1529        auto i = secrels.find(offsetInDbgC);
1530        if (i == secrels.end())
1531          break;
1532
1533        // Check whether this line table covers Addr in C.
1534        DebugLinesSubsectionRef linesTmp;
1535        exitOnErr(linesTmp.initialize(BinaryStreamReader(ref)));
1536        uint32_t offsetInC = i->second + linesTmp.header()->RelocOffset;
1537        if (addr < offsetInC || addr >= offsetInC + linesTmp.header()->CodeSize)
1538          break;
1539
1540        assert(!lines.header() &&
1541               "Encountered multiple line tables for function!");
1542        exitOnErr(lines.initialize(BinaryStreamReader(ref)));
1543        offsetInLinetable = addr - offsetInC;
1544        break;
1545      }
1546      default:
1547        break;
1548      }
1549
1550      if (cvStrTab.valid() && checksums.valid() && lines.header())
1551        return true;
1552    }
1553  }
1554
1555  return false;
1556}
1557
1558// Use CodeView line tables to resolve a file and line number for the given
1559// offset into the given chunk and return them, or None if a line table was
1560// not found.
1561Optional<std::pair<StringRef, uint32_t>>
1562lld::coff::getFileLineCodeView(const SectionChunk *c, uint32_t addr) {
1563  ExitOnError exitOnErr;
1564
1565  DebugStringTableSubsectionRef cvStrTab;
1566  DebugChecksumsSubsectionRef checksums;
1567  DebugLinesSubsectionRef lines;
1568  uint32_t offsetInLinetable;
1569
1570  if (!findLineTable(c, addr, cvStrTab, checksums, lines, offsetInLinetable))
1571    return None;
1572
1573  Optional<uint32_t> nameIndex;
1574  Optional<uint32_t> lineNumber;
1575  for (LineColumnEntry &entry : lines) {
1576    for (const LineNumberEntry &ln : entry.LineNumbers) {
1577      LineInfo li(ln.Flags);
1578      if (ln.Offset > offsetInLinetable) {
1579        if (!nameIndex) {
1580          nameIndex = entry.NameIndex;
1581          lineNumber = li.getStartLine();
1582        }
1583        StringRef filename =
1584            exitOnErr(getFileName(cvStrTab, checksums, *nameIndex));
1585        return std::make_pair(filename, *lineNumber);
1586      }
1587      nameIndex = entry.NameIndex;
1588      lineNumber = li.getStartLine();
1589    }
1590  }
1591  if (!nameIndex)
1592    return None;
1593  StringRef filename = exitOnErr(getFileName(cvStrTab, checksums, *nameIndex));
1594  return std::make_pair(filename, *lineNumber);
1595}
1596