1317017Sdim//===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===// 2317017Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6317017Sdim// 7317017Sdim//===----------------------------------------------------------------------===// 8317017Sdim 9320572Sdim#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 10320572Sdim#define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 11317017Sdim 12317017Sdim#include "llvm/ADT/SparseBitVector.h" 13317017Sdim#include "llvm/ADT/iterator.h" 14341825Sdim#include "llvm/DebugInfo/PDB/Native/RawError.h" 15341825Sdim#include "llvm/Support/BinaryStreamReader.h" 16341825Sdim#include "llvm/Support/BinaryStreamWriter.h" 17317017Sdim#include "llvm/Support/Endian.h" 18317017Sdim#include "llvm/Support/Error.h" 19317017Sdim#include <cstdint> 20320572Sdim#include <iterator> 21317017Sdim#include <utility> 22320572Sdim#include <vector> 23317017Sdim 24317017Sdimnamespace llvm { 25320572Sdim 26320572Sdimclass BinaryStreamReader; 27320572Sdimclass BinaryStreamWriter; 28320572Sdim 29317017Sdimnamespace pdb { 30317017Sdim 31341825SdimError readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V); 32341825SdimError writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec); 33317017Sdim 34353358Sdimtemplate <typename ValueT> class HashTable; 35341825Sdim 36353358Sdimtemplate <typename ValueT> 37341825Sdimclass HashTableIterator 38353358Sdim : public iterator_facade_base<HashTableIterator<ValueT>, 39341825Sdim std::forward_iterator_tag, 40353358Sdim const std::pair<uint32_t, ValueT>> { 41353358Sdim friend HashTable<ValueT>; 42341825Sdim 43353358Sdim HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index, 44341825Sdim bool IsEnd) 45341825Sdim : Map(&Map), Index(Index), IsEnd(IsEnd) {} 46341825Sdim 47341825Sdimpublic: 48353358Sdim HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) { 49341825Sdim int I = Map.Present.find_first(); 50341825Sdim if (I == -1) { 51341825Sdim Index = 0; 52341825Sdim IsEnd = true; 53341825Sdim } else { 54341825Sdim Index = static_cast<uint32_t>(I); 55341825Sdim IsEnd = false; 56341825Sdim } 57341825Sdim } 58341825Sdim 59360784Sdim HashTableIterator(const HashTableIterator &R) = default; 60341825Sdim HashTableIterator &operator=(const HashTableIterator &R) { 61341825Sdim Map = R.Map; 62341825Sdim return *this; 63341825Sdim } 64341825Sdim bool operator==(const HashTableIterator &R) const { 65341825Sdim if (IsEnd && R.IsEnd) 66341825Sdim return true; 67341825Sdim if (IsEnd != R.IsEnd) 68341825Sdim return false; 69341825Sdim 70341825Sdim return (Map == R.Map) && (Index == R.Index); 71341825Sdim } 72341825Sdim const std::pair<uint32_t, ValueT> &operator*() const { 73341825Sdim assert(Map->Present.test(Index)); 74341825Sdim return Map->Buckets[Index]; 75341825Sdim } 76353358Sdim 77353358Sdim // Implement postfix op++ in terms of prefix op++ by using the superclass 78353358Sdim // implementation. 79353358Sdim using iterator_facade_base<HashTableIterator<ValueT>, 80353358Sdim std::forward_iterator_tag, 81353358Sdim const std::pair<uint32_t, ValueT>>::operator++; 82341825Sdim HashTableIterator &operator++() { 83341825Sdim while (Index < Map->Buckets.size()) { 84341825Sdim ++Index; 85341825Sdim if (Map->Present.test(Index)) 86341825Sdim return *this; 87341825Sdim } 88341825Sdim 89341825Sdim IsEnd = true; 90341825Sdim return *this; 91341825Sdim } 92341825Sdim 93341825Sdimprivate: 94341825Sdim bool isEnd() const { return IsEnd; } 95341825Sdim uint32_t index() const { return Index; } 96341825Sdim 97353358Sdim const HashTable<ValueT> *Map; 98341825Sdim uint32_t Index; 99341825Sdim bool IsEnd; 100341825Sdim}; 101341825Sdim 102353358Sdimtemplate <typename ValueT> 103317017Sdimclass HashTable { 104317017Sdim struct Header { 105317017Sdim support::ulittle32_t Size; 106317017Sdim support::ulittle32_t Capacity; 107317017Sdim }; 108317017Sdim 109341825Sdim using BucketList = std::vector<std::pair<uint32_t, ValueT>>; 110317017Sdim 111317017Sdimpublic: 112353358Sdim using const_iterator = HashTableIterator<ValueT>; 113353358Sdim friend const_iterator; 114353358Sdim 115341825Sdim HashTable() { Buckets.resize(8); } 116353358Sdim explicit HashTable(uint32_t Capacity) { 117341825Sdim Buckets.resize(Capacity); 118341825Sdim } 119317017Sdim 120341825Sdim Error load(BinaryStreamReader &Stream) { 121341825Sdim const Header *H; 122341825Sdim if (auto EC = Stream.readObject(H)) 123341825Sdim return EC; 124341825Sdim if (H->Capacity == 0) 125341825Sdim return make_error<RawError>(raw_error_code::corrupt_file, 126341825Sdim "Invalid Hash Table Capacity"); 127341825Sdim if (H->Size > maxLoad(H->Capacity)) 128341825Sdim return make_error<RawError>(raw_error_code::corrupt_file, 129341825Sdim "Invalid Hash Table Size"); 130317017Sdim 131341825Sdim Buckets.resize(H->Capacity); 132317017Sdim 133341825Sdim if (auto EC = readSparseBitVector(Stream, Present)) 134341825Sdim return EC; 135341825Sdim if (Present.count() != H->Size) 136341825Sdim return make_error<RawError>(raw_error_code::corrupt_file, 137341825Sdim "Present bit vector does not match size!"); 138317017Sdim 139341825Sdim if (auto EC = readSparseBitVector(Stream, Deleted)) 140341825Sdim return EC; 141341825Sdim if (Present.intersects(Deleted)) 142341825Sdim return make_error<RawError>(raw_error_code::corrupt_file, 143353358Sdim "Present bit vector intersects deleted!"); 144317017Sdim 145341825Sdim for (uint32_t P : Present) { 146341825Sdim if (auto EC = Stream.readInteger(Buckets[P].first)) 147341825Sdim return EC; 148341825Sdim const ValueT *Value; 149341825Sdim if (auto EC = Stream.readObject(Value)) 150341825Sdim return EC; 151341825Sdim Buckets[P].second = *Value; 152341825Sdim } 153317017Sdim 154341825Sdim return Error::success(); 155341825Sdim } 156341825Sdim 157341825Sdim uint32_t calculateSerializedLength() const { 158341825Sdim uint32_t Size = sizeof(Header); 159341825Sdim 160341825Sdim constexpr int BitsPerWord = 8 * sizeof(uint32_t); 161341825Sdim 162341825Sdim int NumBitsP = Present.find_last() + 1; 163341825Sdim int NumBitsD = Deleted.find_last() + 1; 164341825Sdim 165341825Sdim uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord; 166341825Sdim uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord; 167341825Sdim 168341825Sdim // Present bit set number of words (4 bytes), followed by that many actual 169341825Sdim // words (4 bytes each). 170341825Sdim Size += sizeof(uint32_t); 171341825Sdim Size += NumWordsP * sizeof(uint32_t); 172341825Sdim 173341825Sdim // Deleted bit set number of words (4 bytes), followed by that many actual 174341825Sdim // words (4 bytes each). 175341825Sdim Size += sizeof(uint32_t); 176341825Sdim Size += NumWordsD * sizeof(uint32_t); 177341825Sdim 178341825Sdim // One (Key, ValueT) pair for each entry Present. 179341825Sdim Size += (sizeof(uint32_t) + sizeof(ValueT)) * size(); 180341825Sdim 181341825Sdim return Size; 182341825Sdim } 183341825Sdim 184341825Sdim Error commit(BinaryStreamWriter &Writer) const { 185341825Sdim Header H; 186341825Sdim H.Size = size(); 187341825Sdim H.Capacity = capacity(); 188341825Sdim if (auto EC = Writer.writeObject(H)) 189341825Sdim return EC; 190341825Sdim 191341825Sdim if (auto EC = writeSparseBitVector(Writer, Present)) 192341825Sdim return EC; 193341825Sdim 194341825Sdim if (auto EC = writeSparseBitVector(Writer, Deleted)) 195341825Sdim return EC; 196341825Sdim 197341825Sdim for (const auto &Entry : *this) { 198341825Sdim if (auto EC = Writer.writeInteger(Entry.first)) 199341825Sdim return EC; 200341825Sdim if (auto EC = Writer.writeObject(Entry.second)) 201341825Sdim return EC; 202341825Sdim } 203341825Sdim return Error::success(); 204341825Sdim } 205341825Sdim 206341825Sdim void clear() { 207341825Sdim Buckets.resize(8); 208341825Sdim Present.clear(); 209341825Sdim Deleted.clear(); 210341825Sdim } 211341825Sdim 212341825Sdim bool empty() const { return size() == 0; } 213341825Sdim uint32_t capacity() const { return Buckets.size(); } 214341825Sdim uint32_t size() const { return Present.count(); } 215341825Sdim 216353358Sdim const_iterator begin() const { return const_iterator(*this); } 217353358Sdim const_iterator end() const { return const_iterator(*this, 0, true); } 218341825Sdim 219341825Sdim /// Find the entry whose key has the specified hash value, using the specified 220341825Sdim /// traits defining hash function and equality. 221353358Sdim template <typename Key, typename TraitsT> 222353358Sdim const_iterator find_as(const Key &K, TraitsT &Traits) const { 223341825Sdim uint32_t H = Traits.hashLookupKey(K) % capacity(); 224341825Sdim uint32_t I = H; 225341825Sdim Optional<uint32_t> FirstUnused; 226341825Sdim do { 227341825Sdim if (isPresent(I)) { 228341825Sdim if (Traits.storageKeyToLookupKey(Buckets[I].first) == K) 229353358Sdim return const_iterator(*this, I, false); 230341825Sdim } else { 231341825Sdim if (!FirstUnused) 232341825Sdim FirstUnused = I; 233341825Sdim // Insertion occurs via linear probing from the slot hint, and will be 234341825Sdim // inserted at the first empty / deleted location. Therefore, if we are 235341825Sdim // probing and find a location that is neither present nor deleted, then 236341825Sdim // nothing must have EVER been inserted at this location, and thus it is 237341825Sdim // not possible for a matching value to occur later. 238341825Sdim if (!isDeleted(I)) 239341825Sdim break; 240341825Sdim } 241341825Sdim I = (I + 1) % capacity(); 242341825Sdim } while (I != H); 243341825Sdim 244341825Sdim // The only way FirstUnused would not be set is if every single entry in the 245341825Sdim // table were Present. But this would violate the load factor constraints 246341825Sdim // that we impose, so it should never happen. 247341825Sdim assert(FirstUnused); 248353358Sdim return const_iterator(*this, *FirstUnused, true); 249341825Sdim } 250341825Sdim 251341825Sdim /// Set the entry using a key type that the specified Traits can convert 252341825Sdim /// from a real key to an internal key. 253353358Sdim template <typename Key, typename TraitsT> 254353358Sdim bool set_as(const Key &K, ValueT V, TraitsT &Traits) { 255353358Sdim return set_as_internal(K, std::move(V), Traits, None); 256341825Sdim } 257341825Sdim 258353358Sdim template <typename Key, typename TraitsT> 259353358Sdim ValueT get(const Key &K, TraitsT &Traits) const { 260353358Sdim auto Iter = find_as(K, Traits); 261341825Sdim assert(Iter != end()); 262341825Sdim return (*Iter).second; 263341825Sdim } 264341825Sdim 265317017Sdimprotected: 266317017Sdim bool isPresent(uint32_t K) const { return Present.test(K); } 267317017Sdim bool isDeleted(uint32_t K) const { return Deleted.test(K); } 268320572Sdim 269317017Sdim BucketList Buckets; 270317017Sdim mutable SparseBitVector<> Present; 271317017Sdim mutable SparseBitVector<> Deleted; 272317017Sdim 273317017Sdimprivate: 274341825Sdim /// Set the entry using a key type that the specified Traits can convert 275341825Sdim /// from a real key to an internal key. 276353358Sdim template <typename Key, typename TraitsT> 277353358Sdim bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits, 278353358Sdim Optional<uint32_t> InternalKey) { 279353358Sdim auto Entry = find_as(K, Traits); 280341825Sdim if (Entry != end()) { 281341825Sdim assert(isPresent(Entry.index())); 282341825Sdim assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K); 283341825Sdim // We're updating, no need to do anything special. 284341825Sdim Buckets[Entry.index()].second = V; 285341825Sdim return false; 286341825Sdim } 287317017Sdim 288341825Sdim auto &B = Buckets[Entry.index()]; 289341825Sdim assert(!isPresent(Entry.index())); 290341825Sdim assert(Entry.isEnd()); 291341825Sdim B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K); 292341825Sdim B.second = V; 293341825Sdim Present.set(Entry.index()); 294341825Sdim Deleted.reset(Entry.index()); 295317017Sdim 296353358Sdim grow(Traits); 297320572Sdim 298353358Sdim assert((find_as(K, Traits)) != end()); 299341825Sdim return true; 300341825Sdim } 301317017Sdim 302341825Sdim static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } 303317017Sdim 304353358Sdim template <typename TraitsT> 305353358Sdim void grow(TraitsT &Traits) { 306341825Sdim uint32_t S = size(); 307341825Sdim uint32_t MaxLoad = maxLoad(capacity()); 308341825Sdim if (S < maxLoad(capacity())) 309341825Sdim return; 310341825Sdim assert(capacity() != UINT32_MAX && "Can't grow Hash table!"); 311317017Sdim 312341825Sdim uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX; 313317017Sdim 314341825Sdim // Growing requires rebuilding the table and re-hashing every item. Make a 315341825Sdim // copy with a larger capacity, insert everything into the copy, then swap 316341825Sdim // it in. 317353358Sdim HashTable NewMap(NewCapacity); 318341825Sdim for (auto I : Present) { 319341825Sdim auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first); 320353358Sdim NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits, 321353358Sdim Buckets[I].first); 322341825Sdim } 323341825Sdim 324341825Sdim Buckets.swap(NewMap.Buckets); 325341825Sdim std::swap(Present, NewMap.Present); 326341825Sdim std::swap(Deleted, NewMap.Deleted); 327341825Sdim assert(capacity() == NewCapacity); 328341825Sdim assert(size() == S); 329341825Sdim } 330317017Sdim}; 331317017Sdim 332317017Sdim} // end namespace pdb 333320572Sdim 334317017Sdim} // end namespace llvm 335317017Sdim 336320572Sdim#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 337