HashTable.h revision 353358
1//===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 10#define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 11 12#include "llvm/ADT/SparseBitVector.h" 13#include "llvm/ADT/iterator.h" 14#include "llvm/DebugInfo/PDB/Native/RawError.h" 15#include "llvm/Support/BinaryStreamReader.h" 16#include "llvm/Support/BinaryStreamWriter.h" 17#include "llvm/Support/Endian.h" 18#include "llvm/Support/Error.h" 19#include <cstdint> 20#include <iterator> 21#include <utility> 22#include <vector> 23 24namespace llvm { 25 26class BinaryStreamReader; 27class BinaryStreamWriter; 28 29namespace pdb { 30 31Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V); 32Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec); 33 34template <typename ValueT> class HashTable; 35 36template <typename ValueT> 37class HashTableIterator 38 : public iterator_facade_base<HashTableIterator<ValueT>, 39 std::forward_iterator_tag, 40 const std::pair<uint32_t, ValueT>> { 41 friend HashTable<ValueT>; 42 43 HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index, 44 bool IsEnd) 45 : Map(&Map), Index(Index), IsEnd(IsEnd) {} 46 47public: 48 HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) { 49 int I = Map.Present.find_first(); 50 if (I == -1) { 51 Index = 0; 52 IsEnd = true; 53 } else { 54 Index = static_cast<uint32_t>(I); 55 IsEnd = false; 56 } 57 } 58 59 HashTableIterator &operator=(const HashTableIterator &R) { 60 Map = R.Map; 61 return *this; 62 } 63 bool operator==(const HashTableIterator &R) const { 64 if (IsEnd && R.IsEnd) 65 return true; 66 if (IsEnd != R.IsEnd) 67 return false; 68 69 return (Map == R.Map) && (Index == R.Index); 70 } 71 const std::pair<uint32_t, ValueT> &operator*() const { 72 assert(Map->Present.test(Index)); 73 return Map->Buckets[Index]; 74 } 75 76 // Implement postfix op++ in terms of prefix op++ by using the superclass 77 // implementation. 78 using iterator_facade_base<HashTableIterator<ValueT>, 79 std::forward_iterator_tag, 80 const std::pair<uint32_t, ValueT>>::operator++; 81 HashTableIterator &operator++() { 82 while (Index < Map->Buckets.size()) { 83 ++Index; 84 if (Map->Present.test(Index)) 85 return *this; 86 } 87 88 IsEnd = true; 89 return *this; 90 } 91 92private: 93 bool isEnd() const { return IsEnd; } 94 uint32_t index() const { return Index; } 95 96 const HashTable<ValueT> *Map; 97 uint32_t Index; 98 bool IsEnd; 99}; 100 101template <typename ValueT> 102class HashTable { 103 struct Header { 104 support::ulittle32_t Size; 105 support::ulittle32_t Capacity; 106 }; 107 108 using BucketList = std::vector<std::pair<uint32_t, ValueT>>; 109 110public: 111 using const_iterator = HashTableIterator<ValueT>; 112 friend const_iterator; 113 114 HashTable() { Buckets.resize(8); } 115 explicit HashTable(uint32_t Capacity) { 116 Buckets.resize(Capacity); 117 } 118 119 Error load(BinaryStreamReader &Stream) { 120 const Header *H; 121 if (auto EC = Stream.readObject(H)) 122 return EC; 123 if (H->Capacity == 0) 124 return make_error<RawError>(raw_error_code::corrupt_file, 125 "Invalid Hash Table Capacity"); 126 if (H->Size > maxLoad(H->Capacity)) 127 return make_error<RawError>(raw_error_code::corrupt_file, 128 "Invalid Hash Table Size"); 129 130 Buckets.resize(H->Capacity); 131 132 if (auto EC = readSparseBitVector(Stream, Present)) 133 return EC; 134 if (Present.count() != H->Size) 135 return make_error<RawError>(raw_error_code::corrupt_file, 136 "Present bit vector does not match size!"); 137 138 if (auto EC = readSparseBitVector(Stream, Deleted)) 139 return EC; 140 if (Present.intersects(Deleted)) 141 return make_error<RawError>(raw_error_code::corrupt_file, 142 "Present bit vector intersects deleted!"); 143 144 for (uint32_t P : Present) { 145 if (auto EC = Stream.readInteger(Buckets[P].first)) 146 return EC; 147 const ValueT *Value; 148 if (auto EC = Stream.readObject(Value)) 149 return EC; 150 Buckets[P].second = *Value; 151 } 152 153 return Error::success(); 154 } 155 156 uint32_t calculateSerializedLength() const { 157 uint32_t Size = sizeof(Header); 158 159 constexpr int BitsPerWord = 8 * sizeof(uint32_t); 160 161 int NumBitsP = Present.find_last() + 1; 162 int NumBitsD = Deleted.find_last() + 1; 163 164 uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord; 165 uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord; 166 167 // Present bit set number of words (4 bytes), followed by that many actual 168 // words (4 bytes each). 169 Size += sizeof(uint32_t); 170 Size += NumWordsP * sizeof(uint32_t); 171 172 // Deleted bit set number of words (4 bytes), followed by that many actual 173 // words (4 bytes each). 174 Size += sizeof(uint32_t); 175 Size += NumWordsD * sizeof(uint32_t); 176 177 // One (Key, ValueT) pair for each entry Present. 178 Size += (sizeof(uint32_t) + sizeof(ValueT)) * size(); 179 180 return Size; 181 } 182 183 Error commit(BinaryStreamWriter &Writer) const { 184 Header H; 185 H.Size = size(); 186 H.Capacity = capacity(); 187 if (auto EC = Writer.writeObject(H)) 188 return EC; 189 190 if (auto EC = writeSparseBitVector(Writer, Present)) 191 return EC; 192 193 if (auto EC = writeSparseBitVector(Writer, Deleted)) 194 return EC; 195 196 for (const auto &Entry : *this) { 197 if (auto EC = Writer.writeInteger(Entry.first)) 198 return EC; 199 if (auto EC = Writer.writeObject(Entry.second)) 200 return EC; 201 } 202 return Error::success(); 203 } 204 205 void clear() { 206 Buckets.resize(8); 207 Present.clear(); 208 Deleted.clear(); 209 } 210 211 bool empty() const { return size() == 0; } 212 uint32_t capacity() const { return Buckets.size(); } 213 uint32_t size() const { return Present.count(); } 214 215 const_iterator begin() const { return const_iterator(*this); } 216 const_iterator end() const { return const_iterator(*this, 0, true); } 217 218 /// Find the entry whose key has the specified hash value, using the specified 219 /// traits defining hash function and equality. 220 template <typename Key, typename TraitsT> 221 const_iterator find_as(const Key &K, TraitsT &Traits) const { 222 uint32_t H = Traits.hashLookupKey(K) % capacity(); 223 uint32_t I = H; 224 Optional<uint32_t> FirstUnused; 225 do { 226 if (isPresent(I)) { 227 if (Traits.storageKeyToLookupKey(Buckets[I].first) == K) 228 return const_iterator(*this, I, false); 229 } else { 230 if (!FirstUnused) 231 FirstUnused = I; 232 // Insertion occurs via linear probing from the slot hint, and will be 233 // inserted at the first empty / deleted location. Therefore, if we are 234 // probing and find a location that is neither present nor deleted, then 235 // nothing must have EVER been inserted at this location, and thus it is 236 // not possible for a matching value to occur later. 237 if (!isDeleted(I)) 238 break; 239 } 240 I = (I + 1) % capacity(); 241 } while (I != H); 242 243 // The only way FirstUnused would not be set is if every single entry in the 244 // table were Present. But this would violate the load factor constraints 245 // that we impose, so it should never happen. 246 assert(FirstUnused); 247 return const_iterator(*this, *FirstUnused, true); 248 } 249 250 /// Set the entry using a key type that the specified Traits can convert 251 /// from a real key to an internal key. 252 template <typename Key, typename TraitsT> 253 bool set_as(const Key &K, ValueT V, TraitsT &Traits) { 254 return set_as_internal(K, std::move(V), Traits, None); 255 } 256 257 template <typename Key, typename TraitsT> 258 ValueT get(const Key &K, TraitsT &Traits) const { 259 auto Iter = find_as(K, Traits); 260 assert(Iter != end()); 261 return (*Iter).second; 262 } 263 264protected: 265 bool isPresent(uint32_t K) const { return Present.test(K); } 266 bool isDeleted(uint32_t K) const { return Deleted.test(K); } 267 268 BucketList Buckets; 269 mutable SparseBitVector<> Present; 270 mutable SparseBitVector<> Deleted; 271 272private: 273 /// Set the entry using a key type that the specified Traits can convert 274 /// from a real key to an internal key. 275 template <typename Key, typename TraitsT> 276 bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits, 277 Optional<uint32_t> InternalKey) { 278 auto Entry = find_as(K, Traits); 279 if (Entry != end()) { 280 assert(isPresent(Entry.index())); 281 assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K); 282 // We're updating, no need to do anything special. 283 Buckets[Entry.index()].second = V; 284 return false; 285 } 286 287 auto &B = Buckets[Entry.index()]; 288 assert(!isPresent(Entry.index())); 289 assert(Entry.isEnd()); 290 B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K); 291 B.second = V; 292 Present.set(Entry.index()); 293 Deleted.reset(Entry.index()); 294 295 grow(Traits); 296 297 assert((find_as(K, Traits)) != end()); 298 return true; 299 } 300 301 static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } 302 303 template <typename TraitsT> 304 void grow(TraitsT &Traits) { 305 uint32_t S = size(); 306 uint32_t MaxLoad = maxLoad(capacity()); 307 if (S < maxLoad(capacity())) 308 return; 309 assert(capacity() != UINT32_MAX && "Can't grow Hash table!"); 310 311 uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX; 312 313 // Growing requires rebuilding the table and re-hashing every item. Make a 314 // copy with a larger capacity, insert everything into the copy, then swap 315 // it in. 316 HashTable NewMap(NewCapacity); 317 for (auto I : Present) { 318 auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first); 319 NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits, 320 Buckets[I].first); 321 } 322 323 Buckets.swap(NewMap.Buckets); 324 std::swap(Present, NewMap.Present); 325 std::swap(Deleted, NewMap.Deleted); 326 assert(capacity() == NewCapacity); 327 assert(size() == S); 328 } 329}; 330 331} // end namespace pdb 332 333} // end namespace llvm 334 335#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 336