1//===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/DebugInfo/MSF/MSFCommon.h"
13#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
14#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
15#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
16#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
17#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
18#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
19#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
20#include "llvm/DebugInfo/PDB/Native/RawError.h"
21#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
22#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
23#include "llvm/Support/BinaryStream.h"
24#include "llvm/Support/BinaryStreamArray.h"
25#include "llvm/Support/BinaryStreamReader.h"
26#include "llvm/Support/Endian.h"
27#include "llvm/Support/Error.h"
28#include "llvm/Support/Path.h"
29#include <algorithm>
30#include <cassert>
31#include <cstdint>
32
33using namespace llvm;
34using namespace llvm::codeview;
35using namespace llvm::msf;
36using namespace llvm::pdb;
37
38namespace {
39typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
40} // end anonymous namespace
41
42PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
43                 BumpPtrAllocator &Allocator)
44    : FilePath(std::string(Path)), Allocator(Allocator),
45      Buffer(std::move(PdbFileBuffer)) {}
46
47PDBFile::~PDBFile() = default;
48
49StringRef PDBFile::getFilePath() const { return FilePath; }
50
51StringRef PDBFile::getFileDirectory() const {
52  return sys::path::parent_path(FilePath);
53}
54
55uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
56
57uint32_t PDBFile::getFreeBlockMapBlock() const {
58  return ContainerLayout.SB->FreeBlockMapBlock;
59}
60
61uint32_t PDBFile::getBlockCount() const {
62  return ContainerLayout.SB->NumBlocks;
63}
64
65uint32_t PDBFile::getNumDirectoryBytes() const {
66  return ContainerLayout.SB->NumDirectoryBytes;
67}
68
69uint32_t PDBFile::getBlockMapIndex() const {
70  return ContainerLayout.SB->BlockMapAddr;
71}
72
73uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
74
75uint32_t PDBFile::getNumDirectoryBlocks() const {
76  return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
77                            ContainerLayout.SB->BlockSize);
78}
79
80uint64_t PDBFile::getBlockMapOffset() const {
81  return (uint64_t)ContainerLayout.SB->BlockMapAddr *
82         ContainerLayout.SB->BlockSize;
83}
84
85uint32_t PDBFile::getNumStreams() const {
86  return ContainerLayout.StreamSizes.size();
87}
88
89uint32_t PDBFile::getMaxStreamSize() const {
90  return *std::max_element(ContainerLayout.StreamSizes.begin(),
91                           ContainerLayout.StreamSizes.end());
92}
93
94uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
95  return ContainerLayout.StreamSizes[StreamIndex];
96}
97
98ArrayRef<support::ulittle32_t>
99PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
100  return ContainerLayout.StreamMap[StreamIndex];
101}
102
103uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
104
105Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
106                                                  uint32_t NumBytes) const {
107  uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
108
109  ArrayRef<uint8_t> Result;
110  if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
111    return std::move(EC);
112  return Result;
113}
114
115Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
116                            ArrayRef<uint8_t> Data) const {
117  return make_error<RawError>(raw_error_code::not_writable,
118                              "PDBFile is immutable");
119}
120
121Error PDBFile::parseFileHeaders() {
122  BinaryStreamReader Reader(*Buffer);
123
124  // Initialize SB.
125  const msf::SuperBlock *SB = nullptr;
126  if (auto EC = Reader.readObject(SB)) {
127    consumeError(std::move(EC));
128    return make_error<RawError>(raw_error_code::corrupt_file,
129                                "MSF superblock is missing");
130  }
131
132  if (auto EC = msf::validateSuperBlock(*SB))
133    return EC;
134
135  if (Buffer->getLength() % SB->BlockSize != 0)
136    return make_error<RawError>(raw_error_code::corrupt_file,
137                                "File size is not a multiple of block size");
138  ContainerLayout.SB = SB;
139
140  // Initialize Free Page Map.
141  ContainerLayout.FreePageMap.resize(SB->NumBlocks);
142  // The Fpm exists either at block 1 or block 2 of the MSF.  However, this
143  // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
144  // thusly an equal number of total blocks in the file.  For a block size
145  // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
146  // maximum file size of 32KiB * 4KiB = 128MiB.  Obviously this won't do, so
147  // the Fpm is split across the file at `getBlockSize()` intervals.  As a
148  // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
149  // for any non-negative integer k is an Fpm block.  In theory, we only really
150  // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
151  // current versions of the MSF format already expect the Fpm to be arranged
152  // at getBlockSize() intervals, so we have to be compatible.
153  // See the function fpmPn() for more information:
154  // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
155  auto FpmStream =
156      MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
157  BinaryStreamReader FpmReader(*FpmStream);
158  ArrayRef<uint8_t> FpmBytes;
159  if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
160    return EC;
161  uint32_t BlocksRemaining = getBlockCount();
162  uint32_t BI = 0;
163  for (auto Byte : FpmBytes) {
164    uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
165    for (uint32_t I = 0; I < BlocksThisByte; ++I) {
166      if (Byte & (1 << I))
167        ContainerLayout.FreePageMap[BI] = true;
168      --BlocksRemaining;
169      ++BI;
170    }
171  }
172
173  Reader.setOffset(getBlockMapOffset());
174  if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
175                                 getNumDirectoryBlocks()))
176    return EC;
177
178  return Error::success();
179}
180
181Error PDBFile::parseStreamData() {
182  assert(ContainerLayout.SB);
183  if (DirectoryStream)
184    return Error::success();
185
186  uint32_t NumStreams = 0;
187
188  // Normally you can't use a MappedBlockStream without having fully parsed the
189  // PDB file, because it accesses the directory and various other things, which
190  // is exactly what we are attempting to parse.  By specifying a custom
191  // subclass of IPDBStreamData which only accesses the fields that have already
192  // been parsed, we can avoid this and reuse MappedBlockStream.
193  auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
194                                                     Allocator);
195  BinaryStreamReader Reader(*DS);
196  if (auto EC = Reader.readInteger(NumStreams))
197    return EC;
198
199  if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
200    return EC;
201  for (uint32_t I = 0; I < NumStreams; ++I) {
202    uint32_t StreamSize = getStreamByteSize(I);
203    // FIXME: What does StreamSize ~0U mean?
204    uint64_t NumExpectedStreamBlocks =
205        StreamSize == UINT32_MAX
206            ? 0
207            : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
208
209    // For convenience, we store the block array contiguously.  This is because
210    // if someone calls setStreamMap(), it is more convenient to be able to call
211    // it with an ArrayRef instead of setting up a StreamRef.  Since the
212    // DirectoryStream is cached in the class and thus lives for the life of the
213    // class, we can be guaranteed that readArray() will return a stable
214    // reference, even if it has to allocate from its internal pool.
215    ArrayRef<support::ulittle32_t> Blocks;
216    if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
217      return EC;
218    for (uint32_t Block : Blocks) {
219      uint64_t BlockEndOffset =
220          (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
221      if (BlockEndOffset > getFileSize())
222        return make_error<RawError>(raw_error_code::corrupt_file,
223                                    "Stream block map is corrupt.");
224    }
225    ContainerLayout.StreamMap.push_back(Blocks);
226  }
227
228  // We should have read exactly SB->NumDirectoryBytes bytes.
229  assert(Reader.bytesRemaining() == 0);
230  DirectoryStream = std::move(DS);
231  return Error::success();
232}
233
234ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
235  return ContainerLayout.DirectoryBlocks;
236}
237
238std::unique_ptr<MappedBlockStream>
239PDBFile::createIndexedStream(uint16_t SN) const {
240  if (SN == kInvalidStreamIndex)
241    return nullptr;
242  return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
243                                                Allocator);
244}
245
246MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
247  MSFStreamLayout Result;
248  auto Blocks = getStreamBlockList(StreamIdx);
249  Result.Blocks.assign(Blocks.begin(), Blocks.end());
250  Result.Length = getStreamByteSize(StreamIdx);
251  return Result;
252}
253
254msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
255  return msf::getFpmStreamLayout(ContainerLayout);
256}
257
258Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
259  if (!Globals) {
260    auto DbiS = getPDBDbiStream();
261    if (!DbiS)
262      return DbiS.takeError();
263
264    auto GlobalS =
265        safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());
266    if (!GlobalS)
267      return GlobalS.takeError();
268    auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS));
269    if (auto EC = TempGlobals->reload())
270      return std::move(EC);
271    Globals = std::move(TempGlobals);
272  }
273  return *Globals;
274}
275
276Expected<InfoStream &> PDBFile::getPDBInfoStream() {
277  if (!Info) {
278    auto InfoS = safelyCreateIndexedStream(StreamPDB);
279    if (!InfoS)
280      return InfoS.takeError();
281    auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS));
282    if (auto EC = TempInfo->reload())
283      return std::move(EC);
284    Info = std::move(TempInfo);
285  }
286  return *Info;
287}
288
289Expected<DbiStream &> PDBFile::getPDBDbiStream() {
290  if (!Dbi) {
291    auto DbiS = safelyCreateIndexedStream(StreamDBI);
292    if (!DbiS)
293      return DbiS.takeError();
294    auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS));
295    if (auto EC = TempDbi->reload(this))
296      return std::move(EC);
297    Dbi = std::move(TempDbi);
298  }
299  return *Dbi;
300}
301
302Expected<TpiStream &> PDBFile::getPDBTpiStream() {
303  if (!Tpi) {
304    auto TpiS = safelyCreateIndexedStream(StreamTPI);
305    if (!TpiS)
306      return TpiS.takeError();
307    auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS));
308    if (auto EC = TempTpi->reload())
309      return std::move(EC);
310    Tpi = std::move(TempTpi);
311  }
312  return *Tpi;
313}
314
315Expected<TpiStream &> PDBFile::getPDBIpiStream() {
316  if (!Ipi) {
317    if (!hasPDBIpiStream())
318      return make_error<RawError>(raw_error_code::no_stream);
319
320    auto IpiS = safelyCreateIndexedStream(StreamIPI);
321    if (!IpiS)
322      return IpiS.takeError();
323    auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS));
324    if (auto EC = TempIpi->reload())
325      return std::move(EC);
326    Ipi = std::move(TempIpi);
327  }
328  return *Ipi;
329}
330
331Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
332  if (!Publics) {
333    auto DbiS = getPDBDbiStream();
334    if (!DbiS)
335      return DbiS.takeError();
336
337    auto PublicS =
338        safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());
339    if (!PublicS)
340      return PublicS.takeError();
341    auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS));
342    if (auto EC = TempPublics->reload())
343      return std::move(EC);
344    Publics = std::move(TempPublics);
345  }
346  return *Publics;
347}
348
349Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
350  if (!Symbols) {
351    auto DbiS = getPDBDbiStream();
352    if (!DbiS)
353      return DbiS.takeError();
354
355    uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
356    auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum);
357    if (!SymbolS)
358      return SymbolS.takeError();
359
360    auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS));
361    if (auto EC = TempSymbols->reload())
362      return std::move(EC);
363    Symbols = std::move(TempSymbols);
364  }
365  return *Symbols;
366}
367
368Expected<PDBStringTable &> PDBFile::getStringTable() {
369  if (!Strings) {
370    auto NS = safelyCreateNamedStream("/names");
371    if (!NS)
372      return NS.takeError();
373
374    auto N = std::make_unique<PDBStringTable>();
375    BinaryStreamReader Reader(**NS);
376    if (auto EC = N->reload(Reader))
377      return std::move(EC);
378    assert(Reader.bytesRemaining() == 0);
379    StringTableStream = std::move(*NS);
380    Strings = std::move(N);
381  }
382  return *Strings;
383}
384
385Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
386  if (!InjectedSources) {
387    auto IJS = safelyCreateNamedStream("/src/headerblock");
388    if (!IJS)
389      return IJS.takeError();
390
391    auto Strings = getStringTable();
392    if (!Strings)
393      return Strings.takeError();
394
395    auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS));
396    if (auto EC = IJ->reload(*Strings))
397      return std::move(EC);
398    InjectedSources = std::move(IJ);
399  }
400  return *InjectedSources;
401}
402
403uint32_t PDBFile::getPointerSize() {
404  auto DbiS = getPDBDbiStream();
405  if (!DbiS)
406    return 0;
407  PDB_Machine Machine = DbiS->getMachineType();
408  if (Machine == PDB_Machine::Amd64)
409    return 8;
410  return 4;
411}
412
413bool PDBFile::hasPDBDbiStream() const {
414  return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
415}
416
417bool PDBFile::hasPDBGlobalsStream() {
418  auto DbiS = getPDBDbiStream();
419  if (!DbiS) {
420    consumeError(DbiS.takeError());
421    return false;
422  }
423
424  return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
425}
426
427bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
428
429bool PDBFile::hasPDBIpiStream() const {
430  if (!hasPDBInfoStream())
431    return false;
432
433  if (StreamIPI >= getNumStreams())
434    return false;
435
436  auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
437  return InfoStream.containsIdStream();
438}
439
440bool PDBFile::hasPDBPublicsStream() {
441  auto DbiS = getPDBDbiStream();
442  if (!DbiS) {
443    consumeError(DbiS.takeError());
444    return false;
445  }
446  return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
447}
448
449bool PDBFile::hasPDBSymbolStream() {
450  auto DbiS = getPDBDbiStream();
451  if (!DbiS)
452    return false;
453  return DbiS->getSymRecordStreamIndex() < getNumStreams();
454}
455
456bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
457
458bool PDBFile::hasPDBStringTable() {
459  auto IS = getPDBInfoStream();
460  if (!IS)
461    return false;
462  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
463  if (!ExpectedNSI) {
464    consumeError(ExpectedNSI.takeError());
465    return false;
466  }
467  assert(*ExpectedNSI < getNumStreams());
468  return true;
469}
470
471bool PDBFile::hasPDBInjectedSourceStream() {
472  auto IS = getPDBInfoStream();
473  if (!IS)
474    return false;
475  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");
476  if (!ExpectedNSI) {
477    consumeError(ExpectedNSI.takeError());
478    return false;
479  }
480  assert(*ExpectedNSI < getNumStreams());
481  return true;
482}
483
484/// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
485/// stream with that index actually exists.  If it does not, the return value
486/// will have an MSFError with code msf_error_code::no_stream.  Else, the return
487/// value will contain the stream returned by createIndexedStream().
488Expected<std::unique_ptr<MappedBlockStream>>
489PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
490  if (StreamIndex >= getNumStreams())
491    // This rejects kInvalidStreamIndex with an error as well.
492    return make_error<RawError>(raw_error_code::no_stream);
493  return createIndexedStream(StreamIndex);
494}
495
496Expected<std::unique_ptr<MappedBlockStream>>
497PDBFile::safelyCreateNamedStream(StringRef Name) {
498  auto IS = getPDBInfoStream();
499  if (!IS)
500    return IS.takeError();
501
502  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
503  if (!ExpectedNSI)
504    return ExpectedNSI.takeError();
505  uint32_t NameStreamIndex = *ExpectedNSI;
506
507  return safelyCreateIndexedStream(NameStreamIndex);
508}
509