1//===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
10#define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
11
12#include "llvm/ADT/SparseBitVector.h"
13#include "llvm/ADT/iterator.h"
14#include "llvm/DebugInfo/PDB/Native/RawError.h"
15#include "llvm/Support/BinaryStreamReader.h"
16#include "llvm/Support/BinaryStreamWriter.h"
17#include "llvm/Support/Endian.h"
18#include "llvm/Support/Error.h"
19#include <cstdint>
20#include <iterator>
21#include <utility>
22#include <vector>
23
24namespace llvm {
25
26class BinaryStreamReader;
27class BinaryStreamWriter;
28
29namespace pdb {
30
31Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
32Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
33
34template <typename ValueT> class HashTable;
35
36template <typename ValueT>
37class HashTableIterator
38    : public iterator_facade_base<HashTableIterator<ValueT>,
39                                  std::forward_iterator_tag,
40                                  const std::pair<uint32_t, ValueT>> {
41  friend HashTable<ValueT>;
42
43  HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
44                    bool IsEnd)
45      : Map(&Map), Index(Index), IsEnd(IsEnd) {}
46
47public:
48  HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
49    int I = Map.Present.find_first();
50    if (I == -1) {
51      Index = 0;
52      IsEnd = true;
53    } else {
54      Index = static_cast<uint32_t>(I);
55      IsEnd = false;
56    }
57  }
58
59  HashTableIterator(const HashTableIterator &R) = default;
60  HashTableIterator &operator=(const HashTableIterator &R) {
61    Map = R.Map;
62    return *this;
63  }
64  bool operator==(const HashTableIterator &R) const {
65    if (IsEnd && R.IsEnd)
66      return true;
67    if (IsEnd != R.IsEnd)
68      return false;
69
70    return (Map == R.Map) && (Index == R.Index);
71  }
72  const std::pair<uint32_t, ValueT> &operator*() const {
73    assert(Map->Present.test(Index));
74    return Map->Buckets[Index];
75  }
76
77  // Implement postfix op++ in terms of prefix op++ by using the superclass
78  // implementation.
79  using iterator_facade_base<HashTableIterator<ValueT>,
80                             std::forward_iterator_tag,
81                             const std::pair<uint32_t, ValueT>>::operator++;
82  HashTableIterator &operator++() {
83    while (Index < Map->Buckets.size()) {
84      ++Index;
85      if (Map->Present.test(Index))
86        return *this;
87    }
88
89    IsEnd = true;
90    return *this;
91  }
92
93private:
94  bool isEnd() const { return IsEnd; }
95  uint32_t index() const { return Index; }
96
97  const HashTable<ValueT> *Map;
98  uint32_t Index;
99  bool IsEnd;
100};
101
102template <typename ValueT>
103class HashTable {
104  struct Header {
105    support::ulittle32_t Size;
106    support::ulittle32_t Capacity;
107  };
108
109  using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
110
111public:
112  using const_iterator = HashTableIterator<ValueT>;
113  friend const_iterator;
114
115  HashTable() { Buckets.resize(8); }
116  explicit HashTable(uint32_t Capacity) {
117    Buckets.resize(Capacity);
118  }
119
120  Error load(BinaryStreamReader &Stream) {
121    const Header *H;
122    if (auto EC = Stream.readObject(H))
123      return EC;
124    if (H->Capacity == 0)
125      return make_error<RawError>(raw_error_code::corrupt_file,
126                                  "Invalid Hash Table Capacity");
127    if (H->Size > maxLoad(H->Capacity))
128      return make_error<RawError>(raw_error_code::corrupt_file,
129                                  "Invalid Hash Table Size");
130
131    Buckets.resize(H->Capacity);
132
133    if (auto EC = readSparseBitVector(Stream, Present))
134      return EC;
135    if (Present.count() != H->Size)
136      return make_error<RawError>(raw_error_code::corrupt_file,
137                                  "Present bit vector does not match size!");
138
139    if (auto EC = readSparseBitVector(Stream, Deleted))
140      return EC;
141    if (Present.intersects(Deleted))
142      return make_error<RawError>(raw_error_code::corrupt_file,
143                                  "Present bit vector intersects deleted!");
144
145    for (uint32_t P : Present) {
146      if (auto EC = Stream.readInteger(Buckets[P].first))
147        return EC;
148      const ValueT *Value;
149      if (auto EC = Stream.readObject(Value))
150        return EC;
151      Buckets[P].second = *Value;
152    }
153
154    return Error::success();
155  }
156
157  uint32_t calculateSerializedLength() const {
158    uint32_t Size = sizeof(Header);
159
160    constexpr int BitsPerWord = 8 * sizeof(uint32_t);
161
162    int NumBitsP = Present.find_last() + 1;
163    int NumBitsD = Deleted.find_last() + 1;
164
165    uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
166    uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
167
168    // Present bit set number of words (4 bytes), followed by that many actual
169    // words (4 bytes each).
170    Size += sizeof(uint32_t);
171    Size += NumWordsP * sizeof(uint32_t);
172
173    // Deleted bit set number of words (4 bytes), followed by that many actual
174    // words (4 bytes each).
175    Size += sizeof(uint32_t);
176    Size += NumWordsD * sizeof(uint32_t);
177
178    // One (Key, ValueT) pair for each entry Present.
179    Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
180
181    return Size;
182  }
183
184  Error commit(BinaryStreamWriter &Writer) const {
185    Header H;
186    H.Size = size();
187    H.Capacity = capacity();
188    if (auto EC = Writer.writeObject(H))
189      return EC;
190
191    if (auto EC = writeSparseBitVector(Writer, Present))
192      return EC;
193
194    if (auto EC = writeSparseBitVector(Writer, Deleted))
195      return EC;
196
197    for (const auto &Entry : *this) {
198      if (auto EC = Writer.writeInteger(Entry.first))
199        return EC;
200      if (auto EC = Writer.writeObject(Entry.second))
201        return EC;
202    }
203    return Error::success();
204  }
205
206  void clear() {
207    Buckets.resize(8);
208    Present.clear();
209    Deleted.clear();
210  }
211
212  bool empty() const { return size() == 0; }
213  uint32_t capacity() const { return Buckets.size(); }
214  uint32_t size() const { return Present.count(); }
215
216  const_iterator begin() const { return const_iterator(*this); }
217  const_iterator end() const { return const_iterator(*this, 0, true); }
218
219  /// Find the entry whose key has the specified hash value, using the specified
220  /// traits defining hash function and equality.
221  template <typename Key, typename TraitsT>
222  const_iterator find_as(const Key &K, TraitsT &Traits) const {
223    uint32_t H = Traits.hashLookupKey(K) % capacity();
224    uint32_t I = H;
225    Optional<uint32_t> FirstUnused;
226    do {
227      if (isPresent(I)) {
228        if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
229          return const_iterator(*this, I, false);
230      } else {
231        if (!FirstUnused)
232          FirstUnused = I;
233        // Insertion occurs via linear probing from the slot hint, and will be
234        // inserted at the first empty / deleted location.  Therefore, if we are
235        // probing and find a location that is neither present nor deleted, then
236        // nothing must have EVER been inserted at this location, and thus it is
237        // not possible for a matching value to occur later.
238        if (!isDeleted(I))
239          break;
240      }
241      I = (I + 1) % capacity();
242    } while (I != H);
243
244    // The only way FirstUnused would not be set is if every single entry in the
245    // table were Present.  But this would violate the load factor constraints
246    // that we impose, so it should never happen.
247    assert(FirstUnused);
248    return const_iterator(*this, *FirstUnused, true);
249  }
250
251  /// Set the entry using a key type that the specified Traits can convert
252  /// from a real key to an internal key.
253  template <typename Key, typename TraitsT>
254  bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
255    return set_as_internal(K, std::move(V), Traits, None);
256  }
257
258  template <typename Key, typename TraitsT>
259  ValueT get(const Key &K, TraitsT &Traits) const {
260    auto Iter = find_as(K, Traits);
261    assert(Iter != end());
262    return (*Iter).second;
263  }
264
265protected:
266  bool isPresent(uint32_t K) const { return Present.test(K); }
267  bool isDeleted(uint32_t K) const { return Deleted.test(K); }
268
269  BucketList Buckets;
270  mutable SparseBitVector<> Present;
271  mutable SparseBitVector<> Deleted;
272
273private:
274  /// Set the entry using a key type that the specified Traits can convert
275  /// from a real key to an internal key.
276  template <typename Key, typename TraitsT>
277  bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
278                       Optional<uint32_t> InternalKey) {
279    auto Entry = find_as(K, Traits);
280    if (Entry != end()) {
281      assert(isPresent(Entry.index()));
282      assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
283      // We're updating, no need to do anything special.
284      Buckets[Entry.index()].second = V;
285      return false;
286    }
287
288    auto &B = Buckets[Entry.index()];
289    assert(!isPresent(Entry.index()));
290    assert(Entry.isEnd());
291    B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
292    B.second = V;
293    Present.set(Entry.index());
294    Deleted.reset(Entry.index());
295
296    grow(Traits);
297
298    assert((find_as(K, Traits)) != end());
299    return true;
300  }
301
302  static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
303
304  template <typename TraitsT>
305  void grow(TraitsT &Traits) {
306    uint32_t S = size();
307    uint32_t MaxLoad = maxLoad(capacity());
308    if (S < maxLoad(capacity()))
309      return;
310    assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
311
312    uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
313
314    // Growing requires rebuilding the table and re-hashing every item.  Make a
315    // copy with a larger capacity, insert everything into the copy, then swap
316    // it in.
317    HashTable NewMap(NewCapacity);
318    for (auto I : Present) {
319      auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
320      NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
321                             Buckets[I].first);
322    }
323
324    Buckets.swap(NewMap.Buckets);
325    std::swap(Present, NewMap.Present);
326    std::swap(Deleted, NewMap.Deleted);
327    assert(capacity() == NewCapacity);
328    assert(size() == S);
329  }
330};
331
332} // end namespace pdb
333
334} // end namespace llvm
335
336#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
337