InputFile.cpp revision 360784
1//===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "InputFile.h"
10
11#include "FormatUtil.h"
12#include "LinePrinter.h"
13
14#include "llvm/BinaryFormat/Magic.h"
15#include "llvm/DebugInfo/CodeView/CodeView.h"
16#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
17#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
18#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
19#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
20#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
21#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
22#include "llvm/DebugInfo/PDB/Native/RawError.h"
23#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
24#include "llvm/DebugInfo/PDB/PDB.h"
25#include "llvm/Object/COFF.h"
26#include "llvm/Support/FileSystem.h"
27#include "llvm/Support/FormatVariadic.h"
28
29using namespace llvm;
30using namespace llvm::codeview;
31using namespace llvm::object;
32using namespace llvm::pdb;
33
34InputFile::InputFile() {}
35InputFile::~InputFile() {}
36
37static Expected<ModuleDebugStreamRef>
38getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
39  ExitOnError Err("Unexpected error: ");
40
41  auto &Dbi = Err(File.getPDBDbiStream());
42  const auto &Modules = Dbi.modules();
43  if (Index >= Modules.getModuleCount())
44    return make_error<RawError>(raw_error_code::index_out_of_bounds,
45                                "Invalid module index");
46
47  auto Modi = Modules.getModuleDescriptor(Index);
48
49  ModuleName = Modi.getModuleName();
50
51  uint16_t ModiStream = Modi.getModuleStreamIndex();
52  if (ModiStream == kInvalidStreamIndex)
53    return make_error<RawError>(raw_error_code::no_stream,
54                                "Module stream not present");
55
56  auto ModStreamData = File.createIndexedStream(ModiStream);
57
58  ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
59  if (auto EC = ModS.reload())
60    return make_error<RawError>(raw_error_code::corrupt_file,
61                                "Invalid module stream");
62
63  return std::move(ModS);
64}
65
66static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
67                                             StringRef Name,
68                                             BinaryStreamReader &Reader) {
69  if (Expected<StringRef> NameOrErr = Section.getName()) {
70    if (*NameOrErr != Name)
71      return false;
72  } else {
73    consumeError(NameOrErr.takeError());
74    return false;
75  }
76
77  Expected<StringRef> ContentsOrErr = Section.getContents();
78  if (!ContentsOrErr) {
79    consumeError(ContentsOrErr.takeError());
80    return false;
81  }
82
83  Reader = BinaryStreamReader(*ContentsOrErr, support::little);
84  uint32_t Magic;
85  if (Reader.bytesRemaining() < sizeof(uint32_t))
86    return false;
87  cantFail(Reader.readInteger(Magic));
88  if (Magic != COFF::DEBUG_SECTION_MAGIC)
89    return false;
90  return true;
91}
92
93static inline bool isDebugSSection(object::SectionRef Section,
94                                   DebugSubsectionArray &Subsections) {
95  BinaryStreamReader Reader;
96  if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
97    return false;
98
99  cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
100  return true;
101}
102
103static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
104  BinaryStreamReader Reader;
105  if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
106      !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
107    return false;
108  cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
109  return true;
110}
111
112static std::string formatChecksumKind(FileChecksumKind Kind) {
113  switch (Kind) {
114    RETURN_CASE(FileChecksumKind, None, "None");
115    RETURN_CASE(FileChecksumKind, MD5, "MD5");
116    RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
117    RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
118  }
119  return formatUnknownEnum(Kind);
120}
121
122template <typename... Args>
123static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) {
124  if (Append)
125    Printer.format(std::forward<Args>(args)...);
126  else
127    Printer.formatLine(std::forward<Args>(args)...);
128}
129
130SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
131  if (!File)
132    return;
133
134  if (File->isPdb())
135    initializeForPdb(GroupIndex);
136  else {
137    Name = ".debug$S";
138    uint32_t I = 0;
139    for (const auto &S : File->obj().sections()) {
140      DebugSubsectionArray SS;
141      if (!isDebugSSection(S, SS))
142        continue;
143
144      if (!SC.hasChecksums() || !SC.hasStrings())
145        SC.initialize(SS);
146
147      if (I == GroupIndex)
148        Subsections = SS;
149
150      if (SC.hasChecksums() && SC.hasStrings())
151        break;
152    }
153    rebuildChecksumMap();
154  }
155}
156
157StringRef SymbolGroup::name() const { return Name; }
158
159void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
160  Subsections = SS;
161}
162
163void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
164
165void SymbolGroup::initializeForPdb(uint32_t Modi) {
166  assert(File && File->isPdb());
167
168  // PDB always uses the same string table, but each module has its own
169  // checksums.  So we only set the strings if they're not already set.
170  if (!SC.hasStrings()) {
171    auto StringTable = File->pdb().getStringTable();
172    if (StringTable)
173      SC.setStrings(StringTable->getStringTable());
174    else
175      consumeError(StringTable.takeError());
176  }
177
178  SC.resetChecksums();
179  auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
180  if (!MDS) {
181    consumeError(MDS.takeError());
182    return;
183  }
184
185  DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
186  Subsections = DebugStream->getSubsectionsArray();
187  SC.initialize(Subsections);
188  rebuildChecksumMap();
189}
190
191void SymbolGroup::rebuildChecksumMap() {
192  if (!SC.hasChecksums())
193    return;
194
195  for (const auto &Entry : SC.checksums()) {
196    auto S = SC.strings().getString(Entry.FileNameOffset);
197    if (!S)
198      continue;
199    ChecksumsByFile[*S] = Entry;
200  }
201}
202
203const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
204  assert(File && File->isPdb() && DebugStream);
205  return *DebugStream;
206}
207
208Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
209  return SC.strings().getString(Offset);
210}
211
212void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
213                                     bool Append) const {
214  auto FC = ChecksumsByFile.find(File);
215  if (FC == ChecksumsByFile.end()) {
216    formatInternal(Printer, Append, "- (no checksum) {0}", File);
217    return;
218  }
219
220  formatInternal(Printer, Append, "- ({0}: {1}) {2}",
221                 formatChecksumKind(FC->getValue().Kind),
222                 toHex(FC->getValue().Checksum), File);
223}
224
225void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
226                                            uint32_t Offset,
227                                            bool Append) const {
228  if (!SC.hasChecksums()) {
229    formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
230    return;
231  }
232
233  auto Iter = SC.checksums().getArray().at(Offset);
234  if (Iter == SC.checksums().getArray().end()) {
235    formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
236    return;
237  }
238
239  uint32_t FO = Iter->FileNameOffset;
240  auto ExpectedFile = getNameFromStringTable(FO);
241  if (!ExpectedFile) {
242    formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
243    consumeError(ExpectedFile.takeError());
244    return;
245  }
246  if (Iter->Kind == FileChecksumKind::None) {
247    formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
248  } else {
249    formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
250                   formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
251  }
252}
253
254Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
255  InputFile IF;
256  if (!llvm::sys::fs::exists(Path))
257    return make_error<StringError>(formatv("File {0} not found", Path),
258                                   inconvertibleErrorCode());
259
260  file_magic Magic;
261  if (auto EC = identify_magic(Path, Magic))
262    return make_error<StringError>(
263        formatv("Unable to identify file type for file {0}", Path), EC);
264
265  if (Magic == file_magic::coff_object) {
266    Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
267    if (!BinaryOrErr)
268      return BinaryOrErr.takeError();
269
270    IF.CoffObject = std::move(*BinaryOrErr);
271    IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
272    return std::move(IF);
273  }
274
275  if (Magic == file_magic::pdb) {
276    std::unique_ptr<IPDBSession> Session;
277    if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
278      return std::move(Err);
279
280    IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
281    IF.PdbOrObj = &IF.PdbSession->getPDBFile();
282
283    return std::move(IF);
284  }
285
286  if (!AllowUnknownFile)
287    return make_error<StringError>(
288        formatv("File {0} is not a supported file type", Path),
289        inconvertibleErrorCode());
290
291  auto Result = MemoryBuffer::getFile(Path, -1LL, false);
292  if (!Result)
293    return make_error<StringError>(
294        formatv("File {0} could not be opened", Path), Result.getError());
295
296  IF.UnknownFile = std::move(*Result);
297  IF.PdbOrObj = IF.UnknownFile.get();
298  return std::move(IF);
299}
300
301PDBFile &InputFile::pdb() {
302  assert(isPdb());
303  return *PdbOrObj.get<PDBFile *>();
304}
305
306const PDBFile &InputFile::pdb() const {
307  assert(isPdb());
308  return *PdbOrObj.get<PDBFile *>();
309}
310
311object::COFFObjectFile &InputFile::obj() {
312  assert(isObj());
313  return *PdbOrObj.get<object::COFFObjectFile *>();
314}
315
316const object::COFFObjectFile &InputFile::obj() const {
317  assert(isObj());
318  return *PdbOrObj.get<object::COFFObjectFile *>();
319}
320
321MemoryBuffer &InputFile::unknown() {
322  assert(isUnknown());
323  return *PdbOrObj.get<MemoryBuffer *>();
324}
325
326const MemoryBuffer &InputFile::unknown() const {
327  assert(isUnknown());
328  return *PdbOrObj.get<MemoryBuffer *>();
329}
330
331StringRef InputFile::getFilePath() const {
332  if (isPdb())
333    return pdb().getFilePath();
334  if (isObj())
335    return obj().getFileName();
336  assert(isUnknown());
337  return unknown().getBufferIdentifier();
338}
339
340bool InputFile::hasTypes() const {
341  if (isPdb())
342    return pdb().hasPDBTpiStream();
343
344  for (const auto &Section : obj().sections()) {
345    CVTypeArray Types;
346    if (isDebugTSection(Section, Types))
347      return true;
348  }
349  return false;
350}
351
352bool InputFile::hasIds() const {
353  if (isObj())
354    return false;
355  return pdb().hasPDBIpiStream();
356}
357
358bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); }
359
360bool InputFile::isObj() const {
361  return PdbOrObj.is<object::COFFObjectFile *>();
362}
363
364bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); }
365
366codeview::LazyRandomTypeCollection &
367InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
368  if (Types && Kind == kTypes)
369    return *Types;
370  if (Ids && Kind == kIds)
371    return *Ids;
372
373  if (Kind == kIds) {
374    assert(isPdb() && pdb().hasPDBIpiStream());
375  }
376
377  // If the collection was already initialized, we should have just returned it
378  // in step 1.
379  if (isPdb()) {
380    TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
381    auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
382                                           : pdb().getPDBTpiStream());
383
384    auto &Array = Stream.typeArray();
385    uint32_t Count = Stream.getNumTypeRecords();
386    auto Offsets = Stream.getTypeIndexOffsets();
387    Collection =
388        std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
389    return *Collection;
390  }
391
392  assert(isObj());
393  assert(Kind == kTypes);
394  assert(!Types);
395
396  for (const auto &Section : obj().sections()) {
397    CVTypeArray Records;
398    if (!isDebugTSection(Section, Records))
399      continue;
400
401    Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
402    return *Types;
403  }
404
405  Types = std::make_unique<LazyRandomTypeCollection>(100);
406  return *Types;
407}
408
409codeview::LazyRandomTypeCollection &InputFile::types() {
410  return getOrCreateTypeCollection(kTypes);
411}
412
413codeview::LazyRandomTypeCollection &InputFile::ids() {
414  // Object files have only one type stream that contains both types and ids.
415  // Similarly, some PDBs don't contain an IPI stream, and for those both types
416  // and IDs are in the same stream.
417  if (isObj() || !pdb().hasPDBIpiStream())
418    return types();
419
420  return getOrCreateTypeCollection(kIds);
421}
422
423iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
424  return make_range<SymbolGroupIterator>(symbol_groups_begin(),
425                                         symbol_groups_end());
426}
427
428SymbolGroupIterator InputFile::symbol_groups_begin() {
429  return SymbolGroupIterator(*this);
430}
431
432SymbolGroupIterator InputFile::symbol_groups_end() {
433  return SymbolGroupIterator();
434}
435
436SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
437
438SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
439  if (File.isObj()) {
440    SectionIter = File.obj().section_begin();
441    scanToNextDebugS();
442  }
443}
444
445bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
446  bool E = isEnd();
447  bool RE = R.isEnd();
448  if (E || RE)
449    return E == RE;
450
451  if (Value.File != R.Value.File)
452    return false;
453  return Index == R.Index;
454}
455
456const SymbolGroup &SymbolGroupIterator::operator*() const {
457  assert(!isEnd());
458  return Value;
459}
460SymbolGroup &SymbolGroupIterator::operator*() {
461  assert(!isEnd());
462  return Value;
463}
464
465SymbolGroupIterator &SymbolGroupIterator::operator++() {
466  assert(Value.File && !isEnd());
467  ++Index;
468  if (isEnd())
469    return *this;
470
471  if (Value.File->isPdb()) {
472    Value.updatePdbModi(Index);
473    return *this;
474  }
475
476  scanToNextDebugS();
477  return *this;
478}
479
480void SymbolGroupIterator::scanToNextDebugS() {
481  assert(SectionIter.hasValue());
482  auto End = Value.File->obj().section_end();
483  auto &Iter = *SectionIter;
484  assert(!isEnd());
485
486  while (++Iter != End) {
487    DebugSubsectionArray SS;
488    SectionRef SR = *Iter;
489    if (!isDebugSSection(SR, SS))
490      continue;
491
492    Value.updateDebugS(SS);
493    return;
494  }
495}
496
497bool SymbolGroupIterator::isEnd() const {
498  if (!Value.File)
499    return true;
500  if (Value.File->isPdb()) {
501    auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
502    uint32_t Count = Dbi.modules().getModuleCount();
503    assert(Index <= Count);
504    return Index == Count;
505  }
506
507  assert(SectionIter.hasValue());
508  return *SectionIter == Value.File->obj().section_end();
509}
510