1317760Sdim//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// 2317760Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6317760Sdim// 7317760Sdim//===----------------------------------------------------------------------===// 8317760Sdim 9317760Sdim#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" 10317760Sdim 11317760Sdim#include "llvm/ADT/ArrayRef.h" 12317760Sdim#include "llvm/DebugInfo/PDB/Native/Hash.h" 13317760Sdim#include "llvm/DebugInfo/PDB/Native/RawTypes.h" 14317760Sdim#include "llvm/Support/BinaryStreamWriter.h" 15317760Sdim#include "llvm/Support/Endian.h" 16317760Sdim 17341825Sdim#include <map> 18341825Sdim 19317760Sdimusing namespace llvm; 20317760Sdimusing namespace llvm::msf; 21317760Sdimusing namespace llvm::support; 22317760Sdimusing namespace llvm::support::endian; 23317760Sdimusing namespace llvm::pdb; 24317760Sdim 25341825SdimStringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table) 26341825Sdim : Table(&Table) {} 27341825Sdim 28341825Sdimuint32_t StringTableHashTraits::hashLookupKey(StringRef S) const { 29353358Sdim // The reference implementation doesn't include code for /src/headerblock 30353358Sdim // handling, but it can only read natvis entries lld's PDB files if 31353358Sdim // this hash function truncates the hash to 16 bit. 32353358Sdim // PDB/include/misc.h in the reference implementation has a hashSz() function 33353358Sdim // that returns an unsigned short, that seems what's being used for 34353358Sdim // /src/headerblock. 35353358Sdim return static_cast<uint16_t>(Table->getIdForString(S)); 36341825Sdim} 37341825Sdim 38341825SdimStringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const { 39341825Sdim return Table->getStringForId(Offset); 40341825Sdim} 41341825Sdim 42341825Sdimuint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) { 43341825Sdim return Table->insert(S); 44341825Sdim} 45341825Sdim 46317760Sdimuint32_t PDBStringTableBuilder::insert(StringRef S) { 47317760Sdim return Strings.insert(S); 48317760Sdim} 49317760Sdim 50341825Sdimuint32_t PDBStringTableBuilder::getIdForString(StringRef S) const { 51341825Sdim return Strings.getIdForString(S); 52341825Sdim} 53341825Sdim 54341825SdimStringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const { 55341825Sdim return Strings.getStringForId(Id); 56341825Sdim} 57341825Sdim 58317760Sdimstatic uint32_t computeBucketCount(uint32_t NumStrings) { 59353358Sdim // This is a precomputed list of Buckets given the specified number of 60353358Sdim // strings. Matching the reference algorithm exactly is not strictly 61353358Sdim // necessary for correctness, but it helps when comparing LLD's PDBs with 62353358Sdim // Microsoft's PDBs so as to eliminate superfluous differences. 63353358Sdim // The reference implementation does (in nmt.h, NMT::grow()): 64353358Sdim // unsigned StringCount = 0; 65353358Sdim // unsigned BucketCount = 1; 66353358Sdim // fn insert() { 67353358Sdim // ++StringCount; 68353358Sdim // if (BucketCount * 3 / 4 < StringCount) 69353358Sdim // BucketCount = BucketCount * 3 / 2 + 1; 70353358Sdim // } 71353358Sdim // This list contains all StringCount, BucketCount pairs where BucketCount was 72353358Sdim // just incremented. It ends before the first BucketCount entry where 73353358Sdim // BucketCount * 3 would overflow a 32-bit unsigned int. 74353358Sdim static std::map<uint32_t, uint32_t> StringsToBuckets = { 75353358Sdim {0, 1}, 76353358Sdim {1, 2}, 77353358Sdim {2, 4}, 78353358Sdim {4, 7}, 79353358Sdim {6, 11}, 80353358Sdim {9, 17}, 81353358Sdim {13, 26}, 82353358Sdim {20, 40}, 83353358Sdim {31, 61}, 84353358Sdim {46, 92}, 85353358Sdim {70, 139}, 86353358Sdim {105, 209}, 87353358Sdim {157, 314}, 88353358Sdim {236, 472}, 89353358Sdim {355, 709}, 90353358Sdim {532, 1064}, 91353358Sdim {799, 1597}, 92353358Sdim {1198, 2396}, 93353358Sdim {1798, 3595}, 94353358Sdim {2697, 5393}, 95353358Sdim {4045, 8090}, 96353358Sdim {6068, 12136}, 97353358Sdim {9103, 18205}, 98353358Sdim {13654, 27308}, 99353358Sdim {20482, 40963}, 100353358Sdim {30723, 61445}, 101353358Sdim {46084, 92168}, 102353358Sdim {69127, 138253}, 103353358Sdim {103690, 207380}, 104353358Sdim {155536, 311071}, 105353358Sdim {233304, 466607}, 106353358Sdim {349956, 699911}, 107353358Sdim {524934, 1049867}, 108353358Sdim {787401, 1574801}, 109353358Sdim {1181101, 2362202}, 110353358Sdim {1771652, 3543304}, 111353358Sdim {2657479, 5314957}, 112353358Sdim {3986218, 7972436}, 113353358Sdim {5979328, 11958655}, 114353358Sdim {8968992, 17937983}, 115353358Sdim {13453488, 26906975}, 116353358Sdim {20180232, 40360463}, 117353358Sdim {30270348, 60540695}, 118353358Sdim {45405522, 90811043}, 119353358Sdim {68108283, 136216565}, 120353358Sdim {102162424, 204324848}, 121353358Sdim {153243637, 306487273}, 122353358Sdim {229865455, 459730910}, 123353358Sdim {344798183, 689596366}, 124353358Sdim {517197275, 1034394550}, 125353358Sdim {775795913, 1551591826}, 126353358Sdim {1163693870, 2327387740}}; 127341825Sdim auto Entry = StringsToBuckets.lower_bound(NumStrings); 128341825Sdim assert(Entry != StringsToBuckets.end()); 129341825Sdim return Entry->second; 130317760Sdim} 131317760Sdim 132317760Sdimuint32_t PDBStringTableBuilder::calculateHashTableSize() const { 133317760Sdim uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. 134317760Sdim Size += sizeof(uint32_t) * computeBucketCount(Strings.size()); 135317760Sdim 136317760Sdim return Size; 137317760Sdim} 138317760Sdim 139317760Sdimuint32_t PDBStringTableBuilder::calculateSerializedSize() const { 140317760Sdim uint32_t Size = 0; 141317760Sdim Size += sizeof(PDBStringTableHeader); 142317760Sdim Size += Strings.calculateSerializedSize(); 143317760Sdim Size += calculateHashTableSize(); 144317760Sdim Size += sizeof(uint32_t); // The /names stream ends with the string count. 145317760Sdim return Size; 146317760Sdim} 147317760Sdim 148320041Sdimvoid PDBStringTableBuilder::setStrings( 149320041Sdim const codeview::DebugStringTableSubsection &Strings) { 150320041Sdim this->Strings = Strings; 151320041Sdim} 152320041Sdim 153317760SdimError PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { 154317760Sdim // Write a header 155317760Sdim PDBStringTableHeader H; 156317760Sdim H.Signature = PDBStringTableSignature; 157317760Sdim H.HashVersion = 1; 158317760Sdim H.ByteSize = Strings.calculateSerializedSize(); 159317760Sdim if (auto EC = Writer.writeObject(H)) 160317760Sdim return EC; 161317760Sdim assert(Writer.bytesRemaining() == 0); 162317760Sdim return Error::success(); 163317760Sdim} 164317760Sdim 165317760SdimError PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { 166317760Sdim if (auto EC = Strings.commit(Writer)) 167317760Sdim return EC; 168317760Sdim 169317760Sdim assert(Writer.bytesRemaining() == 0); 170317760Sdim return Error::success(); 171317760Sdim} 172317760Sdim 173317760SdimError PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { 174317760Sdim // Write a hash table. 175317760Sdim uint32_t BucketCount = computeBucketCount(Strings.size()); 176317760Sdim if (auto EC = Writer.writeInteger(BucketCount)) 177317760Sdim return EC; 178317760Sdim std::vector<ulittle32_t> Buckets(BucketCount); 179317760Sdim 180317760Sdim for (auto &Pair : Strings) { 181317760Sdim StringRef S = Pair.getKey(); 182317760Sdim uint32_t Offset = Pair.getValue(); 183317760Sdim uint32_t Hash = hashStringV1(S); 184317760Sdim 185317760Sdim for (uint32_t I = 0; I != BucketCount; ++I) { 186317760Sdim uint32_t Slot = (Hash + I) % BucketCount; 187317760Sdim if (Buckets[Slot] != 0) 188317760Sdim continue; 189317760Sdim Buckets[Slot] = Offset; 190317760Sdim break; 191317760Sdim } 192317760Sdim } 193317760Sdim 194317760Sdim if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) 195317760Sdim return EC; 196317760Sdim 197317760Sdim assert(Writer.bytesRemaining() == 0); 198317760Sdim return Error::success(); 199317760Sdim} 200317760Sdim 201317760SdimError PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { 202317760Sdim if (auto EC = Writer.writeInteger<uint32_t>(Strings.size())) 203317760Sdim return EC; 204317760Sdim assert(Writer.bytesRemaining() == 0); 205317760Sdim return Error::success(); 206317760Sdim} 207317760Sdim 208317760SdimError PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { 209317760Sdim BinaryStreamWriter SectionWriter; 210317760Sdim 211317760Sdim std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader)); 212317760Sdim if (auto EC = writeHeader(SectionWriter)) 213317760Sdim return EC; 214317760Sdim 215317760Sdim std::tie(SectionWriter, Writer) = 216317760Sdim Writer.split(Strings.calculateSerializedSize()); 217317760Sdim if (auto EC = writeStrings(SectionWriter)) 218317760Sdim return EC; 219317760Sdim 220317760Sdim std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize()); 221317760Sdim if (auto EC = writeHashTable(SectionWriter)) 222317760Sdim return EC; 223317760Sdim 224317760Sdim std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t)); 225317760Sdim if (auto EC = writeEpilogue(SectionWriter)) 226317760Sdim return EC; 227317760Sdim 228317760Sdim return Error::success(); 229317760Sdim} 230