1317760Sdim//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2317760Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6317760Sdim//
7317760Sdim//===----------------------------------------------------------------------===//
8317760Sdim
9317760Sdim#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
10317760Sdim
11317760Sdim#include "llvm/ADT/ArrayRef.h"
12317760Sdim#include "llvm/DebugInfo/PDB/Native/Hash.h"
13317760Sdim#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
14317760Sdim#include "llvm/Support/BinaryStreamWriter.h"
15317760Sdim#include "llvm/Support/Endian.h"
16317760Sdim
17341825Sdim#include <map>
18341825Sdim
19317760Sdimusing namespace llvm;
20317760Sdimusing namespace llvm::msf;
21317760Sdimusing namespace llvm::support;
22317760Sdimusing namespace llvm::support::endian;
23317760Sdimusing namespace llvm::pdb;
24317760Sdim
25341825SdimStringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
26341825Sdim    : Table(&Table) {}
27341825Sdim
28341825Sdimuint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
29353358Sdim  // The reference implementation doesn't include code for /src/headerblock
30353358Sdim  // handling, but it can only read natvis entries lld's PDB files if
31353358Sdim  // this hash function truncates the hash to 16 bit.
32353358Sdim  // PDB/include/misc.h in the reference implementation has a hashSz() function
33353358Sdim  // that returns an unsigned short, that seems what's being used for
34353358Sdim  // /src/headerblock.
35353358Sdim  return static_cast<uint16_t>(Table->getIdForString(S));
36341825Sdim}
37341825Sdim
38341825SdimStringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
39341825Sdim  return Table->getStringForId(Offset);
40341825Sdim}
41341825Sdim
42341825Sdimuint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
43341825Sdim  return Table->insert(S);
44341825Sdim}
45341825Sdim
46317760Sdimuint32_t PDBStringTableBuilder::insert(StringRef S) {
47317760Sdim  return Strings.insert(S);
48317760Sdim}
49317760Sdim
50341825Sdimuint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
51341825Sdim  return Strings.getIdForString(S);
52341825Sdim}
53341825Sdim
54341825SdimStringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
55341825Sdim  return Strings.getStringForId(Id);
56341825Sdim}
57341825Sdim
58317760Sdimstatic uint32_t computeBucketCount(uint32_t NumStrings) {
59353358Sdim  // This is a precomputed list of Buckets given the specified number of
60353358Sdim  // strings.  Matching the reference algorithm exactly is not strictly
61353358Sdim  // necessary for correctness, but it helps when comparing LLD's PDBs with
62353358Sdim  // Microsoft's PDBs so as to eliminate superfluous differences.
63353358Sdim  // The reference implementation does (in nmt.h, NMT::grow()):
64353358Sdim  //   unsigned StringCount = 0;
65353358Sdim  //   unsigned BucketCount = 1;
66353358Sdim  //   fn insert() {
67353358Sdim  //     ++StringCount;
68353358Sdim  //     if (BucketCount * 3 / 4 < StringCount)
69353358Sdim  //       BucketCount = BucketCount * 3 / 2 + 1;
70353358Sdim  //   }
71353358Sdim  // This list contains all StringCount, BucketCount pairs where BucketCount was
72353358Sdim  // just incremented.  It ends before the first BucketCount entry where
73353358Sdim  // BucketCount * 3 would overflow a 32-bit unsigned int.
74353358Sdim  static std::map<uint32_t, uint32_t> StringsToBuckets = {
75353358Sdim      {0, 1},
76353358Sdim      {1, 2},
77353358Sdim      {2, 4},
78353358Sdim      {4, 7},
79353358Sdim      {6, 11},
80353358Sdim      {9, 17},
81353358Sdim      {13, 26},
82353358Sdim      {20, 40},
83353358Sdim      {31, 61},
84353358Sdim      {46, 92},
85353358Sdim      {70, 139},
86353358Sdim      {105, 209},
87353358Sdim      {157, 314},
88353358Sdim      {236, 472},
89353358Sdim      {355, 709},
90353358Sdim      {532, 1064},
91353358Sdim      {799, 1597},
92353358Sdim      {1198, 2396},
93353358Sdim      {1798, 3595},
94353358Sdim      {2697, 5393},
95353358Sdim      {4045, 8090},
96353358Sdim      {6068, 12136},
97353358Sdim      {9103, 18205},
98353358Sdim      {13654, 27308},
99353358Sdim      {20482, 40963},
100353358Sdim      {30723, 61445},
101353358Sdim      {46084, 92168},
102353358Sdim      {69127, 138253},
103353358Sdim      {103690, 207380},
104353358Sdim      {155536, 311071},
105353358Sdim      {233304, 466607},
106353358Sdim      {349956, 699911},
107353358Sdim      {524934, 1049867},
108353358Sdim      {787401, 1574801},
109353358Sdim      {1181101, 2362202},
110353358Sdim      {1771652, 3543304},
111353358Sdim      {2657479, 5314957},
112353358Sdim      {3986218, 7972436},
113353358Sdim      {5979328, 11958655},
114353358Sdim      {8968992, 17937983},
115353358Sdim      {13453488, 26906975},
116353358Sdim      {20180232, 40360463},
117353358Sdim      {30270348, 60540695},
118353358Sdim      {45405522, 90811043},
119353358Sdim      {68108283, 136216565},
120353358Sdim      {102162424, 204324848},
121353358Sdim      {153243637, 306487273},
122353358Sdim      {229865455, 459730910},
123353358Sdim      {344798183, 689596366},
124353358Sdim      {517197275, 1034394550},
125353358Sdim      {775795913, 1551591826},
126353358Sdim      {1163693870, 2327387740}};
127341825Sdim  auto Entry = StringsToBuckets.lower_bound(NumStrings);
128341825Sdim  assert(Entry != StringsToBuckets.end());
129341825Sdim  return Entry->second;
130317760Sdim}
131317760Sdim
132317760Sdimuint32_t PDBStringTableBuilder::calculateHashTableSize() const {
133317760Sdim  uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
134317760Sdim  Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
135317760Sdim
136317760Sdim  return Size;
137317760Sdim}
138317760Sdim
139317760Sdimuint32_t PDBStringTableBuilder::calculateSerializedSize() const {
140317760Sdim  uint32_t Size = 0;
141317760Sdim  Size += sizeof(PDBStringTableHeader);
142317760Sdim  Size += Strings.calculateSerializedSize();
143317760Sdim  Size += calculateHashTableSize();
144317760Sdim  Size += sizeof(uint32_t); // The /names stream ends with the string count.
145317760Sdim  return Size;
146317760Sdim}
147317760Sdim
148320041Sdimvoid PDBStringTableBuilder::setStrings(
149320041Sdim    const codeview::DebugStringTableSubsection &Strings) {
150320041Sdim  this->Strings = Strings;
151320041Sdim}
152320041Sdim
153317760SdimError PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
154317760Sdim  // Write a header
155317760Sdim  PDBStringTableHeader H;
156317760Sdim  H.Signature = PDBStringTableSignature;
157317760Sdim  H.HashVersion = 1;
158317760Sdim  H.ByteSize = Strings.calculateSerializedSize();
159317760Sdim  if (auto EC = Writer.writeObject(H))
160317760Sdim    return EC;
161317760Sdim  assert(Writer.bytesRemaining() == 0);
162317760Sdim  return Error::success();
163317760Sdim}
164317760Sdim
165317760SdimError PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
166317760Sdim  if (auto EC = Strings.commit(Writer))
167317760Sdim    return EC;
168317760Sdim
169317760Sdim  assert(Writer.bytesRemaining() == 0);
170317760Sdim  return Error::success();
171317760Sdim}
172317760Sdim
173317760SdimError PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
174317760Sdim  // Write a hash table.
175317760Sdim  uint32_t BucketCount = computeBucketCount(Strings.size());
176317760Sdim  if (auto EC = Writer.writeInteger(BucketCount))
177317760Sdim    return EC;
178317760Sdim  std::vector<ulittle32_t> Buckets(BucketCount);
179317760Sdim
180317760Sdim  for (auto &Pair : Strings) {
181317760Sdim    StringRef S = Pair.getKey();
182317760Sdim    uint32_t Offset = Pair.getValue();
183317760Sdim    uint32_t Hash = hashStringV1(S);
184317760Sdim
185317760Sdim    for (uint32_t I = 0; I != BucketCount; ++I) {
186317760Sdim      uint32_t Slot = (Hash + I) % BucketCount;
187317760Sdim      if (Buckets[Slot] != 0)
188317760Sdim        continue;
189317760Sdim      Buckets[Slot] = Offset;
190317760Sdim      break;
191317760Sdim    }
192317760Sdim  }
193317760Sdim
194317760Sdim  if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
195317760Sdim    return EC;
196317760Sdim
197317760Sdim  assert(Writer.bytesRemaining() == 0);
198317760Sdim  return Error::success();
199317760Sdim}
200317760Sdim
201317760SdimError PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
202317760Sdim  if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
203317760Sdim    return EC;
204317760Sdim  assert(Writer.bytesRemaining() == 0);
205317760Sdim  return Error::success();
206317760Sdim}
207317760Sdim
208317760SdimError PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
209317760Sdim  BinaryStreamWriter SectionWriter;
210317760Sdim
211317760Sdim  std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
212317760Sdim  if (auto EC = writeHeader(SectionWriter))
213317760Sdim    return EC;
214317760Sdim
215317760Sdim  std::tie(SectionWriter, Writer) =
216317760Sdim      Writer.split(Strings.calculateSerializedSize());
217317760Sdim  if (auto EC = writeStrings(SectionWriter))
218317760Sdim    return EC;
219317760Sdim
220317760Sdim  std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
221317760Sdim  if (auto EC = writeHashTable(SectionWriter))
222317760Sdim    return EC;
223317760Sdim
224317760Sdim  std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
225317760Sdim  if (auto EC = writeEpilogue(SectionWriter))
226317760Sdim    return EC;
227317760Sdim
228317760Sdim  return Error::success();
229317760Sdim}
230