1327952Sdim//===- GlobalsStream.cpp - PDB Index of Symbols by Name ---------*- C++ -*-===//
2317017Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6317017Sdim//
7317017Sdim//===----------------------------------------------------------------------===//
8327952Sdim//
9327952Sdim// The on-disk structores used in this file are based on the reference
10327952Sdim// implementation which is available at
11327952Sdim// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h
12327952Sdim//
13327952Sdim// When you are reading the reference source code, you'd find the
14327952Sdim// information below useful.
15327952Sdim//
16327952Sdim//  - ppdb1->m_fMinimalDbgInfo seems to be always true.
17327952Sdim//  - SMALLBUCKETS macro is defined.
18327952Sdim//
19327952Sdim//===----------------------------------------------------------------------===//
20317017Sdim
21317017Sdim#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
22344779Sdim
23344779Sdim#include "llvm/DebugInfo/CodeView/RecordName.h"
24344779Sdim#include "llvm/DebugInfo/PDB/Native/Hash.h"
25327952Sdim#include "llvm/DebugInfo/PDB/Native/RawError.h"
26344779Sdim#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
27317017Sdim#include "llvm/Support/BinaryStreamReader.h"
28317017Sdim#include "llvm/Support/Error.h"
29317017Sdim#include <algorithm>
30317017Sdim
31317017Sdimusing namespace llvm;
32317017Sdimusing namespace llvm::msf;
33317017Sdimusing namespace llvm::pdb;
34317017Sdim
35317017SdimGlobalsStream::GlobalsStream(std::unique_ptr<MappedBlockStream> Stream)
36317017Sdim    : Stream(std::move(Stream)) {}
37317017Sdim
38317017SdimGlobalsStream::~GlobalsStream() = default;
39317017Sdim
40317017SdimError GlobalsStream::reload() {
41317017Sdim  BinaryStreamReader Reader(*Stream);
42327952Sdim  if (auto E = GlobalsTable.read(Reader))
43327952Sdim    return E;
44327952Sdim  return Error::success();
45327952Sdim}
46317017Sdim
47344779Sdimstd::vector<std::pair<uint32_t, codeview::CVSymbol>>
48344779SdimGlobalsStream::findRecordsByName(StringRef Name,
49344779Sdim                                 const SymbolStream &Symbols) const {
50344779Sdim  std::vector<std::pair<uint32_t, codeview::CVSymbol>> Result;
51344779Sdim
52344779Sdim  // Hash the name to figure out which bucket this goes into.
53344779Sdim  size_t ExpandedBucketIndex = hashStringV1(Name) % IPHR_HASH;
54344779Sdim  int32_t CompressedBucketIndex = GlobalsTable.BucketMap[ExpandedBucketIndex];
55344779Sdim  if (CompressedBucketIndex == -1)
56344779Sdim    return Result;
57344779Sdim
58344779Sdim  uint32_t LastBucketIndex = GlobalsTable.HashBuckets.size() - 1;
59344779Sdim  uint32_t StartRecordIndex =
60344779Sdim      GlobalsTable.HashBuckets[CompressedBucketIndex] / 12;
61344779Sdim  uint32_t EndRecordIndex = 0;
62344779Sdim  if (LLVM_LIKELY(uint32_t(CompressedBucketIndex) < LastBucketIndex)) {
63344779Sdim    EndRecordIndex = GlobalsTable.HashBuckets[CompressedBucketIndex + 1];
64344779Sdim  } else {
65344779Sdim    // If this is the last bucket, it consists of all hash records until the end
66344779Sdim    // of the HashRecords array.
67344779Sdim    EndRecordIndex = GlobalsTable.HashRecords.size() * 12;
68344779Sdim  }
69344779Sdim
70344779Sdim  EndRecordIndex /= 12;
71344779Sdim
72344779Sdim  assert(EndRecordIndex <= GlobalsTable.HashRecords.size());
73344779Sdim  while (StartRecordIndex < EndRecordIndex) {
74344779Sdim    PSHashRecord PSH = GlobalsTable.HashRecords[StartRecordIndex];
75344779Sdim    uint32_t Off = PSH.Off - 1;
76344779Sdim    codeview::CVSymbol Record = Symbols.readRecord(Off);
77344779Sdim    if (codeview::getSymbolName(Record) == Name)
78344779Sdim      Result.push_back(std::make_pair(Off, std::move(Record)));
79344779Sdim    ++StartRecordIndex;
80344779Sdim  }
81344779Sdim  return Result;
82344779Sdim}
83344779Sdim
84327952Sdimstatic Error checkHashHdrVersion(const GSIHashHeader *HashHdr) {
85327952Sdim  if (HashHdr->VerHdr != GSIHashHeader::HdrVersion)
86327952Sdim    return make_error<RawError>(
87327952Sdim        raw_error_code::feature_unsupported,
88327952Sdim        "Encountered unsupported globals stream version.");
89317017Sdim
90327952Sdim  return Error::success();
91327952Sdim}
92327952Sdim
93327952Sdimstatic Error readGSIHashHeader(const GSIHashHeader *&HashHdr,
94327952Sdim                               BinaryStreamReader &Reader) {
95327952Sdim  if (Reader.readObject(HashHdr))
96327952Sdim    return make_error<RawError>(raw_error_code::corrupt_file,
97327952Sdim                                "Stream does not contain a GSIHashHeader.");
98327952Sdim
99327952Sdim  if (HashHdr->VerSignature != GSIHashHeader::HdrSignature)
100327952Sdim    return make_error<RawError>(
101327952Sdim        raw_error_code::feature_unsupported,
102327952Sdim        "GSIHashHeader signature (0xffffffff) not found.");
103327952Sdim
104327952Sdim  return Error::success();
105327952Sdim}
106327952Sdim
107327952Sdimstatic Error readGSIHashRecords(FixedStreamArray<PSHashRecord> &HashRecords,
108327952Sdim                                const GSIHashHeader *HashHdr,
109327952Sdim                                BinaryStreamReader &Reader) {
110327952Sdim  if (auto EC = checkHashHdrVersion(HashHdr))
111317017Sdim    return EC;
112317017Sdim
113327952Sdim  // HashHdr->HrSize specifies the number of bytes of PSHashRecords we have.
114327952Sdim  // Verify that we can read them all.
115327952Sdim  if (HashHdr->HrSize % sizeof(PSHashRecord))
116327952Sdim    return make_error<RawError>(raw_error_code::corrupt_file,
117327952Sdim                                "Invalid HR array size.");
118327952Sdim  uint32_t NumHashRecords = HashHdr->HrSize / sizeof(PSHashRecord);
119327952Sdim  if (auto EC = Reader.readArray(HashRecords, NumHashRecords))
120327952Sdim    return joinErrors(std::move(EC),
121327952Sdim                      make_error<RawError>(raw_error_code::corrupt_file,
122327952Sdim                                           "Error reading hash records."));
123327952Sdim
124327952Sdim  return Error::success();
125327952Sdim}
126327952Sdim
127327952Sdimstatic Error
128327952SdimreadGSIHashBuckets(FixedStreamArray<support::ulittle32_t> &HashBuckets,
129344779Sdim                   FixedStreamArray<support::ulittle32_t> &HashBitmap,
130344779Sdim                   const GSIHashHeader *HashHdr,
131344779Sdim                   MutableArrayRef<int32_t> BucketMap,
132327952Sdim                   BinaryStreamReader &Reader) {
133327952Sdim  if (auto EC = checkHashHdrVersion(HashHdr))
134317017Sdim    return EC;
135317017Sdim
136327952Sdim  // Before the actual hash buckets, there is a bitmap of length determined by
137327952Sdim  // IPHR_HASH.
138327952Sdim  size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
139344779Sdim  uint32_t NumBitmapEntries = BitmapSizeInBits / 32;
140344779Sdim  if (auto EC = Reader.readArray(HashBitmap, NumBitmapEntries))
141327952Sdim    return joinErrors(std::move(EC),
142327952Sdim                      make_error<RawError>(raw_error_code::corrupt_file,
143327952Sdim                                           "Could not read a bitmap."));
144344779Sdim  uint32_t NumBuckets1 = 0;
145344779Sdim  uint32_t CompressedBucketIdx = 0;
146344779Sdim  for (uint32_t I = 0; I <= IPHR_HASH; ++I) {
147344779Sdim    uint8_t WordIdx = I / 32;
148344779Sdim    uint8_t BitIdx = I % 32;
149344779Sdim    bool IsSet = HashBitmap[WordIdx] & (1U << BitIdx);
150344779Sdim    if (IsSet) {
151344779Sdim      ++NumBuckets1;
152344779Sdim      BucketMap[I] = CompressedBucketIdx++;
153344779Sdim    } else {
154344779Sdim      BucketMap[I] = -1;
155344779Sdim    }
156344779Sdim  }
157344779Sdim
158327952Sdim  uint32_t NumBuckets = 0;
159344779Sdim  for (uint32_t B : HashBitmap)
160327952Sdim    NumBuckets += countPopulation(B);
161327952Sdim
162327952Sdim  // Hash buckets follow.
163327952Sdim  if (auto EC = Reader.readArray(HashBuckets, NumBuckets))
164327952Sdim    return joinErrors(std::move(EC),
165327952Sdim                      make_error<RawError>(raw_error_code::corrupt_file,
166327952Sdim                                           "Hash buckets corrupted."));
167327952Sdim
168317017Sdim  return Error::success();
169317017Sdim}
170317017Sdim
171327952SdimError GSIHashTable::read(BinaryStreamReader &Reader) {
172327952Sdim  if (auto EC = readGSIHashHeader(HashHdr, Reader))
173327952Sdim    return EC;
174327952Sdim  if (auto EC = readGSIHashRecords(HashRecords, HashHdr, Reader))
175327952Sdim    return EC;
176327952Sdim  if (HashHdr->HrSize > 0)
177344779Sdim    if (auto EC = readGSIHashBuckets(HashBuckets, HashBitmap, HashHdr,
178344779Sdim                                     BucketMap, Reader))
179327952Sdim      return EC;
180327952Sdim  return Error::success();
181327952Sdim}
182