1//===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 10#define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 11 12#include "llvm/ADT/SparseBitVector.h" 13#include "llvm/ADT/iterator.h" 14#include "llvm/DebugInfo/PDB/Native/RawError.h" 15#include "llvm/Support/BinaryStreamReader.h" 16#include "llvm/Support/BinaryStreamWriter.h" 17#include "llvm/Support/Endian.h" 18#include "llvm/Support/Error.h" 19#include <cstdint> 20#include <iterator> 21#include <utility> 22#include <vector> 23 24namespace llvm { 25 26class BinaryStreamReader; 27class BinaryStreamWriter; 28 29namespace pdb { 30 31Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V); 32Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec); 33 34template <typename ValueT> class HashTable; 35 36template <typename ValueT> 37class HashTableIterator 38 : public iterator_facade_base<HashTableIterator<ValueT>, 39 std::forward_iterator_tag, 40 const std::pair<uint32_t, ValueT>> { 41 friend HashTable<ValueT>; 42 43 HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index, 44 bool IsEnd) 45 : Map(&Map), Index(Index), IsEnd(IsEnd) {} 46 47public: 48 HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) { 49 int I = Map.Present.find_first(); 50 if (I == -1) { 51 Index = 0; 52 IsEnd = true; 53 } else { 54 Index = static_cast<uint32_t>(I); 55 IsEnd = false; 56 } 57 } 58 59 HashTableIterator(const HashTableIterator &R) = default; 60 HashTableIterator &operator=(const HashTableIterator &R) { 61 Map = R.Map; 62 return *this; 63 } 64 bool operator==(const HashTableIterator &R) const { 65 if (IsEnd && R.IsEnd) 66 return true; 67 if (IsEnd != R.IsEnd) 68 return false; 69 70 return (Map == R.Map) && (Index == R.Index); 71 } 72 const std::pair<uint32_t, ValueT> &operator*() const { 73 assert(Map->Present.test(Index)); 74 return Map->Buckets[Index]; 75 } 76 77 // Implement postfix op++ in terms of prefix op++ by using the superclass 78 // implementation. 79 using iterator_facade_base<HashTableIterator<ValueT>, 80 std::forward_iterator_tag, 81 const std::pair<uint32_t, ValueT>>::operator++; 82 HashTableIterator &operator++() { 83 while (Index < Map->Buckets.size()) { 84 ++Index; 85 if (Map->Present.test(Index)) 86 return *this; 87 } 88 89 IsEnd = true; 90 return *this; 91 } 92 93private: 94 bool isEnd() const { return IsEnd; } 95 uint32_t index() const { return Index; } 96 97 const HashTable<ValueT> *Map; 98 uint32_t Index; 99 bool IsEnd; 100}; 101 102template <typename ValueT> 103class HashTable { 104 struct Header { 105 support::ulittle32_t Size; 106 support::ulittle32_t Capacity; 107 }; 108 109 using BucketList = std::vector<std::pair<uint32_t, ValueT>>; 110 111public: 112 using const_iterator = HashTableIterator<ValueT>; 113 friend const_iterator; 114 115 HashTable() { Buckets.resize(8); } 116 explicit HashTable(uint32_t Capacity) { 117 Buckets.resize(Capacity); 118 } 119 120 Error load(BinaryStreamReader &Stream) { 121 const Header *H; 122 if (auto EC = Stream.readObject(H)) 123 return EC; 124 if (H->Capacity == 0) 125 return make_error<RawError>(raw_error_code::corrupt_file, 126 "Invalid Hash Table Capacity"); 127 if (H->Size > maxLoad(H->Capacity)) 128 return make_error<RawError>(raw_error_code::corrupt_file, 129 "Invalid Hash Table Size"); 130 131 Buckets.resize(H->Capacity); 132 133 if (auto EC = readSparseBitVector(Stream, Present)) 134 return EC; 135 if (Present.count() != H->Size) 136 return make_error<RawError>(raw_error_code::corrupt_file, 137 "Present bit vector does not match size!"); 138 139 if (auto EC = readSparseBitVector(Stream, Deleted)) 140 return EC; 141 if (Present.intersects(Deleted)) 142 return make_error<RawError>(raw_error_code::corrupt_file, 143 "Present bit vector intersects deleted!"); 144 145 for (uint32_t P : Present) { 146 if (auto EC = Stream.readInteger(Buckets[P].first)) 147 return EC; 148 const ValueT *Value; 149 if (auto EC = Stream.readObject(Value)) 150 return EC; 151 Buckets[P].second = *Value; 152 } 153 154 return Error::success(); 155 } 156 157 uint32_t calculateSerializedLength() const { 158 uint32_t Size = sizeof(Header); 159 160 constexpr int BitsPerWord = 8 * sizeof(uint32_t); 161 162 int NumBitsP = Present.find_last() + 1; 163 int NumBitsD = Deleted.find_last() + 1; 164 165 uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord; 166 uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord; 167 168 // Present bit set number of words (4 bytes), followed by that many actual 169 // words (4 bytes each). 170 Size += sizeof(uint32_t); 171 Size += NumWordsP * sizeof(uint32_t); 172 173 // Deleted bit set number of words (4 bytes), followed by that many actual 174 // words (4 bytes each). 175 Size += sizeof(uint32_t); 176 Size += NumWordsD * sizeof(uint32_t); 177 178 // One (Key, ValueT) pair for each entry Present. 179 Size += (sizeof(uint32_t) + sizeof(ValueT)) * size(); 180 181 return Size; 182 } 183 184 Error commit(BinaryStreamWriter &Writer) const { 185 Header H; 186 H.Size = size(); 187 H.Capacity = capacity(); 188 if (auto EC = Writer.writeObject(H)) 189 return EC; 190 191 if (auto EC = writeSparseBitVector(Writer, Present)) 192 return EC; 193 194 if (auto EC = writeSparseBitVector(Writer, Deleted)) 195 return EC; 196 197 for (const auto &Entry : *this) { 198 if (auto EC = Writer.writeInteger(Entry.first)) 199 return EC; 200 if (auto EC = Writer.writeObject(Entry.second)) 201 return EC; 202 } 203 return Error::success(); 204 } 205 206 void clear() { 207 Buckets.resize(8); 208 Present.clear(); 209 Deleted.clear(); 210 } 211 212 bool empty() const { return size() == 0; } 213 uint32_t capacity() const { return Buckets.size(); } 214 uint32_t size() const { return Present.count(); } 215 216 const_iterator begin() const { return const_iterator(*this); } 217 const_iterator end() const { return const_iterator(*this, 0, true); } 218 219 /// Find the entry whose key has the specified hash value, using the specified 220 /// traits defining hash function and equality. 221 template <typename Key, typename TraitsT> 222 const_iterator find_as(const Key &K, TraitsT &Traits) const { 223 uint32_t H = Traits.hashLookupKey(K) % capacity(); 224 uint32_t I = H; 225 Optional<uint32_t> FirstUnused; 226 do { 227 if (isPresent(I)) { 228 if (Traits.storageKeyToLookupKey(Buckets[I].first) == K) 229 return const_iterator(*this, I, false); 230 } else { 231 if (!FirstUnused) 232 FirstUnused = I; 233 // Insertion occurs via linear probing from the slot hint, and will be 234 // inserted at the first empty / deleted location. Therefore, if we are 235 // probing and find a location that is neither present nor deleted, then 236 // nothing must have EVER been inserted at this location, and thus it is 237 // not possible for a matching value to occur later. 238 if (!isDeleted(I)) 239 break; 240 } 241 I = (I + 1) % capacity(); 242 } while (I != H); 243 244 // The only way FirstUnused would not be set is if every single entry in the 245 // table were Present. But this would violate the load factor constraints 246 // that we impose, so it should never happen. 247 assert(FirstUnused); 248 return const_iterator(*this, *FirstUnused, true); 249 } 250 251 /// Set the entry using a key type that the specified Traits can convert 252 /// from a real key to an internal key. 253 template <typename Key, typename TraitsT> 254 bool set_as(const Key &K, ValueT V, TraitsT &Traits) { 255 return set_as_internal(K, std::move(V), Traits, None); 256 } 257 258 template <typename Key, typename TraitsT> 259 ValueT get(const Key &K, TraitsT &Traits) const { 260 auto Iter = find_as(K, Traits); 261 assert(Iter != end()); 262 return (*Iter).second; 263 } 264 265protected: 266 bool isPresent(uint32_t K) const { return Present.test(K); } 267 bool isDeleted(uint32_t K) const { return Deleted.test(K); } 268 269 BucketList Buckets; 270 mutable SparseBitVector<> Present; 271 mutable SparseBitVector<> Deleted; 272 273private: 274 /// Set the entry using a key type that the specified Traits can convert 275 /// from a real key to an internal key. 276 template <typename Key, typename TraitsT> 277 bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits, 278 Optional<uint32_t> InternalKey) { 279 auto Entry = find_as(K, Traits); 280 if (Entry != end()) { 281 assert(isPresent(Entry.index())); 282 assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K); 283 // We're updating, no need to do anything special. 284 Buckets[Entry.index()].second = V; 285 return false; 286 } 287 288 auto &B = Buckets[Entry.index()]; 289 assert(!isPresent(Entry.index())); 290 assert(Entry.isEnd()); 291 B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K); 292 B.second = V; 293 Present.set(Entry.index()); 294 Deleted.reset(Entry.index()); 295 296 grow(Traits); 297 298 assert((find_as(K, Traits)) != end()); 299 return true; 300 } 301 302 static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } 303 304 template <typename TraitsT> 305 void grow(TraitsT &Traits) { 306 uint32_t S = size(); 307 uint32_t MaxLoad = maxLoad(capacity()); 308 if (S < maxLoad(capacity())) 309 return; 310 assert(capacity() != UINT32_MAX && "Can't grow Hash table!"); 311 312 uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX; 313 314 // Growing requires rebuilding the table and re-hashing every item. Make a 315 // copy with a larger capacity, insert everything into the copy, then swap 316 // it in. 317 HashTable NewMap(NewCapacity); 318 for (auto I : Present) { 319 auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first); 320 NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits, 321 Buckets[I].first); 322 } 323 324 Buckets.swap(NewMap.Buckets); 325 std::swap(Present, NewMap.Present); 326 std::swap(Deleted, NewMap.Deleted); 327 assert(capacity() == NewCapacity); 328 assert(size() == S); 329 } 330}; 331 332} // end namespace pdb 333 334} // end namespace llvm 335 336#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 337