1//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
10
11#include "llvm/ADT/ArrayRef.h"
12#include "llvm/DebugInfo/PDB/Native/Hash.h"
13#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
14#include "llvm/Support/BinaryStreamWriter.h"
15#include "llvm/Support/Endian.h"
16
17#include <map>
18
19using namespace llvm;
20using namespace llvm::msf;
21using namespace llvm::support;
22using namespace llvm::support::endian;
23using namespace llvm::pdb;
24
25StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
26    : Table(&Table) {}
27
28uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
29  // The reference implementation doesn't include code for /src/headerblock
30  // handling, but it can only read natvis entries lld's PDB files if
31  // this hash function truncates the hash to 16 bit.
32  // PDB/include/misc.h in the reference implementation has a hashSz() function
33  // that returns an unsigned short, that seems what's being used for
34  // /src/headerblock.
35  return static_cast<uint16_t>(Table->getIdForString(S));
36}
37
38StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
39  return Table->getStringForId(Offset);
40}
41
42uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
43  return Table->insert(S);
44}
45
46uint32_t PDBStringTableBuilder::insert(StringRef S) {
47  return Strings.insert(S);
48}
49
50uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
51  return Strings.getIdForString(S);
52}
53
54StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
55  return Strings.getStringForId(Id);
56}
57
58static uint32_t computeBucketCount(uint32_t NumStrings) {
59  // This is a precomputed list of Buckets given the specified number of
60  // strings.  Matching the reference algorithm exactly is not strictly
61  // necessary for correctness, but it helps when comparing LLD's PDBs with
62  // Microsoft's PDBs so as to eliminate superfluous differences.
63  // The reference implementation does (in nmt.h, NMT::grow()):
64  //   unsigned StringCount = 0;
65  //   unsigned BucketCount = 1;
66  //   fn insert() {
67  //     ++StringCount;
68  //     if (BucketCount * 3 / 4 < StringCount)
69  //       BucketCount = BucketCount * 3 / 2 + 1;
70  //   }
71  // This list contains all StringCount, BucketCount pairs where BucketCount was
72  // just incremented.  It ends before the first BucketCount entry where
73  // BucketCount * 3 would overflow a 32-bit unsigned int.
74  static std::map<uint32_t, uint32_t> StringsToBuckets = {
75      {0, 1},
76      {1, 2},
77      {2, 4},
78      {4, 7},
79      {6, 11},
80      {9, 17},
81      {13, 26},
82      {20, 40},
83      {31, 61},
84      {46, 92},
85      {70, 139},
86      {105, 209},
87      {157, 314},
88      {236, 472},
89      {355, 709},
90      {532, 1064},
91      {799, 1597},
92      {1198, 2396},
93      {1798, 3595},
94      {2697, 5393},
95      {4045, 8090},
96      {6068, 12136},
97      {9103, 18205},
98      {13654, 27308},
99      {20482, 40963},
100      {30723, 61445},
101      {46084, 92168},
102      {69127, 138253},
103      {103690, 207380},
104      {155536, 311071},
105      {233304, 466607},
106      {349956, 699911},
107      {524934, 1049867},
108      {787401, 1574801},
109      {1181101, 2362202},
110      {1771652, 3543304},
111      {2657479, 5314957},
112      {3986218, 7972436},
113      {5979328, 11958655},
114      {8968992, 17937983},
115      {13453488, 26906975},
116      {20180232, 40360463},
117      {30270348, 60540695},
118      {45405522, 90811043},
119      {68108283, 136216565},
120      {102162424, 204324848},
121      {153243637, 306487273},
122      {229865455, 459730910},
123      {344798183, 689596366},
124      {517197275, 1034394550},
125      {775795913, 1551591826},
126      {1163693870, 2327387740}};
127  auto Entry = StringsToBuckets.lower_bound(NumStrings);
128  assert(Entry != StringsToBuckets.end());
129  return Entry->second;
130}
131
132uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
133  uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
134  Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
135
136  return Size;
137}
138
139uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
140  uint32_t Size = 0;
141  Size += sizeof(PDBStringTableHeader);
142  Size += Strings.calculateSerializedSize();
143  Size += calculateHashTableSize();
144  Size += sizeof(uint32_t); // The /names stream ends with the string count.
145  return Size;
146}
147
148void PDBStringTableBuilder::setStrings(
149    const codeview::DebugStringTableSubsection &Strings) {
150  this->Strings = Strings;
151}
152
153Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
154  // Write a header
155  PDBStringTableHeader H;
156  H.Signature = PDBStringTableSignature;
157  H.HashVersion = 1;
158  H.ByteSize = Strings.calculateSerializedSize();
159  if (auto EC = Writer.writeObject(H))
160    return EC;
161  assert(Writer.bytesRemaining() == 0);
162  return Error::success();
163}
164
165Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
166  if (auto EC = Strings.commit(Writer))
167    return EC;
168
169  assert(Writer.bytesRemaining() == 0);
170  return Error::success();
171}
172
173Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
174  // Write a hash table.
175  uint32_t BucketCount = computeBucketCount(Strings.size());
176  if (auto EC = Writer.writeInteger(BucketCount))
177    return EC;
178  std::vector<ulittle32_t> Buckets(BucketCount);
179
180  for (auto &Pair : Strings) {
181    StringRef S = Pair.getKey();
182    uint32_t Offset = Pair.getValue();
183    uint32_t Hash = hashStringV1(S);
184
185    for (uint32_t I = 0; I != BucketCount; ++I) {
186      uint32_t Slot = (Hash + I) % BucketCount;
187      if (Buckets[Slot] != 0)
188        continue;
189      Buckets[Slot] = Offset;
190      break;
191    }
192  }
193
194  if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
195    return EC;
196
197  assert(Writer.bytesRemaining() == 0);
198  return Error::success();
199}
200
201Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
202  if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
203    return EC;
204  assert(Writer.bytesRemaining() == 0);
205  return Error::success();
206}
207
208Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
209  BinaryStreamWriter SectionWriter;
210
211  std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
212  if (auto EC = writeHeader(SectionWriter))
213    return EC;
214
215  std::tie(SectionWriter, Writer) =
216      Writer.split(Strings.calculateSerializedSize());
217  if (auto EC = writeStrings(SectionWriter))
218    return EC;
219
220  std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
221  if (auto EC = writeHashTable(SectionWriter))
222    return EC;
223
224  std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
225  if (auto EC = writeEpilogue(SectionWriter))
226    return EC;
227
228  return Error::success();
229}
230