1317017Sdim//===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
2317017Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6317017Sdim//
7317017Sdim//===----------------------------------------------------------------------===//
8317017Sdim
9320572Sdim#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
10320572Sdim#define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
11317017Sdim
12317017Sdim#include "llvm/ADT/SparseBitVector.h"
13317017Sdim#include "llvm/ADT/iterator.h"
14341825Sdim#include "llvm/DebugInfo/PDB/Native/RawError.h"
15341825Sdim#include "llvm/Support/BinaryStreamReader.h"
16341825Sdim#include "llvm/Support/BinaryStreamWriter.h"
17317017Sdim#include "llvm/Support/Endian.h"
18317017Sdim#include "llvm/Support/Error.h"
19317017Sdim#include <cstdint>
20320572Sdim#include <iterator>
21317017Sdim#include <utility>
22320572Sdim#include <vector>
23317017Sdim
24317017Sdimnamespace llvm {
25320572Sdim
26320572Sdimclass BinaryStreamReader;
27320572Sdimclass BinaryStreamWriter;
28320572Sdim
29317017Sdimnamespace pdb {
30317017Sdim
31341825SdimError readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
32341825SdimError writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
33317017Sdim
34353358Sdimtemplate <typename ValueT> class HashTable;
35341825Sdim
36353358Sdimtemplate <typename ValueT>
37341825Sdimclass HashTableIterator
38353358Sdim    : public iterator_facade_base<HashTableIterator<ValueT>,
39341825Sdim                                  std::forward_iterator_tag,
40353358Sdim                                  const std::pair<uint32_t, ValueT>> {
41353358Sdim  friend HashTable<ValueT>;
42341825Sdim
43353358Sdim  HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
44341825Sdim                    bool IsEnd)
45341825Sdim      : Map(&Map), Index(Index), IsEnd(IsEnd) {}
46341825Sdim
47341825Sdimpublic:
48353358Sdim  HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
49341825Sdim    int I = Map.Present.find_first();
50341825Sdim    if (I == -1) {
51341825Sdim      Index = 0;
52341825Sdim      IsEnd = true;
53341825Sdim    } else {
54341825Sdim      Index = static_cast<uint32_t>(I);
55341825Sdim      IsEnd = false;
56341825Sdim    }
57341825Sdim  }
58341825Sdim
59360784Sdim  HashTableIterator(const HashTableIterator &R) = default;
60341825Sdim  HashTableIterator &operator=(const HashTableIterator &R) {
61341825Sdim    Map = R.Map;
62341825Sdim    return *this;
63341825Sdim  }
64341825Sdim  bool operator==(const HashTableIterator &R) const {
65341825Sdim    if (IsEnd && R.IsEnd)
66341825Sdim      return true;
67341825Sdim    if (IsEnd != R.IsEnd)
68341825Sdim      return false;
69341825Sdim
70341825Sdim    return (Map == R.Map) && (Index == R.Index);
71341825Sdim  }
72341825Sdim  const std::pair<uint32_t, ValueT> &operator*() const {
73341825Sdim    assert(Map->Present.test(Index));
74341825Sdim    return Map->Buckets[Index];
75341825Sdim  }
76353358Sdim
77353358Sdim  // Implement postfix op++ in terms of prefix op++ by using the superclass
78353358Sdim  // implementation.
79353358Sdim  using iterator_facade_base<HashTableIterator<ValueT>,
80353358Sdim                             std::forward_iterator_tag,
81353358Sdim                             const std::pair<uint32_t, ValueT>>::operator++;
82341825Sdim  HashTableIterator &operator++() {
83341825Sdim    while (Index < Map->Buckets.size()) {
84341825Sdim      ++Index;
85341825Sdim      if (Map->Present.test(Index))
86341825Sdim        return *this;
87341825Sdim    }
88341825Sdim
89341825Sdim    IsEnd = true;
90341825Sdim    return *this;
91341825Sdim  }
92341825Sdim
93341825Sdimprivate:
94341825Sdim  bool isEnd() const { return IsEnd; }
95341825Sdim  uint32_t index() const { return Index; }
96341825Sdim
97353358Sdim  const HashTable<ValueT> *Map;
98341825Sdim  uint32_t Index;
99341825Sdim  bool IsEnd;
100341825Sdim};
101341825Sdim
102353358Sdimtemplate <typename ValueT>
103317017Sdimclass HashTable {
104317017Sdim  struct Header {
105317017Sdim    support::ulittle32_t Size;
106317017Sdim    support::ulittle32_t Capacity;
107317017Sdim  };
108317017Sdim
109341825Sdim  using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
110317017Sdim
111317017Sdimpublic:
112353358Sdim  using const_iterator = HashTableIterator<ValueT>;
113353358Sdim  friend const_iterator;
114353358Sdim
115341825Sdim  HashTable() { Buckets.resize(8); }
116353358Sdim  explicit HashTable(uint32_t Capacity) {
117341825Sdim    Buckets.resize(Capacity);
118341825Sdim  }
119317017Sdim
120341825Sdim  Error load(BinaryStreamReader &Stream) {
121341825Sdim    const Header *H;
122341825Sdim    if (auto EC = Stream.readObject(H))
123341825Sdim      return EC;
124341825Sdim    if (H->Capacity == 0)
125341825Sdim      return make_error<RawError>(raw_error_code::corrupt_file,
126341825Sdim                                  "Invalid Hash Table Capacity");
127341825Sdim    if (H->Size > maxLoad(H->Capacity))
128341825Sdim      return make_error<RawError>(raw_error_code::corrupt_file,
129341825Sdim                                  "Invalid Hash Table Size");
130317017Sdim
131341825Sdim    Buckets.resize(H->Capacity);
132317017Sdim
133341825Sdim    if (auto EC = readSparseBitVector(Stream, Present))
134341825Sdim      return EC;
135341825Sdim    if (Present.count() != H->Size)
136341825Sdim      return make_error<RawError>(raw_error_code::corrupt_file,
137341825Sdim                                  "Present bit vector does not match size!");
138317017Sdim
139341825Sdim    if (auto EC = readSparseBitVector(Stream, Deleted))
140341825Sdim      return EC;
141341825Sdim    if (Present.intersects(Deleted))
142341825Sdim      return make_error<RawError>(raw_error_code::corrupt_file,
143353358Sdim                                  "Present bit vector intersects deleted!");
144317017Sdim
145341825Sdim    for (uint32_t P : Present) {
146341825Sdim      if (auto EC = Stream.readInteger(Buckets[P].first))
147341825Sdim        return EC;
148341825Sdim      const ValueT *Value;
149341825Sdim      if (auto EC = Stream.readObject(Value))
150341825Sdim        return EC;
151341825Sdim      Buckets[P].second = *Value;
152341825Sdim    }
153317017Sdim
154341825Sdim    return Error::success();
155341825Sdim  }
156341825Sdim
157341825Sdim  uint32_t calculateSerializedLength() const {
158341825Sdim    uint32_t Size = sizeof(Header);
159341825Sdim
160341825Sdim    constexpr int BitsPerWord = 8 * sizeof(uint32_t);
161341825Sdim
162341825Sdim    int NumBitsP = Present.find_last() + 1;
163341825Sdim    int NumBitsD = Deleted.find_last() + 1;
164341825Sdim
165341825Sdim    uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
166341825Sdim    uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
167341825Sdim
168341825Sdim    // Present bit set number of words (4 bytes), followed by that many actual
169341825Sdim    // words (4 bytes each).
170341825Sdim    Size += sizeof(uint32_t);
171341825Sdim    Size += NumWordsP * sizeof(uint32_t);
172341825Sdim
173341825Sdim    // Deleted bit set number of words (4 bytes), followed by that many actual
174341825Sdim    // words (4 bytes each).
175341825Sdim    Size += sizeof(uint32_t);
176341825Sdim    Size += NumWordsD * sizeof(uint32_t);
177341825Sdim
178341825Sdim    // One (Key, ValueT) pair for each entry Present.
179341825Sdim    Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
180341825Sdim
181341825Sdim    return Size;
182341825Sdim  }
183341825Sdim
184341825Sdim  Error commit(BinaryStreamWriter &Writer) const {
185341825Sdim    Header H;
186341825Sdim    H.Size = size();
187341825Sdim    H.Capacity = capacity();
188341825Sdim    if (auto EC = Writer.writeObject(H))
189341825Sdim      return EC;
190341825Sdim
191341825Sdim    if (auto EC = writeSparseBitVector(Writer, Present))
192341825Sdim      return EC;
193341825Sdim
194341825Sdim    if (auto EC = writeSparseBitVector(Writer, Deleted))
195341825Sdim      return EC;
196341825Sdim
197341825Sdim    for (const auto &Entry : *this) {
198341825Sdim      if (auto EC = Writer.writeInteger(Entry.first))
199341825Sdim        return EC;
200341825Sdim      if (auto EC = Writer.writeObject(Entry.second))
201341825Sdim        return EC;
202341825Sdim    }
203341825Sdim    return Error::success();
204341825Sdim  }
205341825Sdim
206341825Sdim  void clear() {
207341825Sdim    Buckets.resize(8);
208341825Sdim    Present.clear();
209341825Sdim    Deleted.clear();
210341825Sdim  }
211341825Sdim
212341825Sdim  bool empty() const { return size() == 0; }
213341825Sdim  uint32_t capacity() const { return Buckets.size(); }
214341825Sdim  uint32_t size() const { return Present.count(); }
215341825Sdim
216353358Sdim  const_iterator begin() const { return const_iterator(*this); }
217353358Sdim  const_iterator end() const { return const_iterator(*this, 0, true); }
218341825Sdim
219341825Sdim  /// Find the entry whose key has the specified hash value, using the specified
220341825Sdim  /// traits defining hash function and equality.
221353358Sdim  template <typename Key, typename TraitsT>
222353358Sdim  const_iterator find_as(const Key &K, TraitsT &Traits) const {
223341825Sdim    uint32_t H = Traits.hashLookupKey(K) % capacity();
224341825Sdim    uint32_t I = H;
225341825Sdim    Optional<uint32_t> FirstUnused;
226341825Sdim    do {
227341825Sdim      if (isPresent(I)) {
228341825Sdim        if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
229353358Sdim          return const_iterator(*this, I, false);
230341825Sdim      } else {
231341825Sdim        if (!FirstUnused)
232341825Sdim          FirstUnused = I;
233341825Sdim        // Insertion occurs via linear probing from the slot hint, and will be
234341825Sdim        // inserted at the first empty / deleted location.  Therefore, if we are
235341825Sdim        // probing and find a location that is neither present nor deleted, then
236341825Sdim        // nothing must have EVER been inserted at this location, and thus it is
237341825Sdim        // not possible for a matching value to occur later.
238341825Sdim        if (!isDeleted(I))
239341825Sdim          break;
240341825Sdim      }
241341825Sdim      I = (I + 1) % capacity();
242341825Sdim    } while (I != H);
243341825Sdim
244341825Sdim    // The only way FirstUnused would not be set is if every single entry in the
245341825Sdim    // table were Present.  But this would violate the load factor constraints
246341825Sdim    // that we impose, so it should never happen.
247341825Sdim    assert(FirstUnused);
248353358Sdim    return const_iterator(*this, *FirstUnused, true);
249341825Sdim  }
250341825Sdim
251341825Sdim  /// Set the entry using a key type that the specified Traits can convert
252341825Sdim  /// from a real key to an internal key.
253353358Sdim  template <typename Key, typename TraitsT>
254353358Sdim  bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
255353358Sdim    return set_as_internal(K, std::move(V), Traits, None);
256341825Sdim  }
257341825Sdim
258353358Sdim  template <typename Key, typename TraitsT>
259353358Sdim  ValueT get(const Key &K, TraitsT &Traits) const {
260353358Sdim    auto Iter = find_as(K, Traits);
261341825Sdim    assert(Iter != end());
262341825Sdim    return (*Iter).second;
263341825Sdim  }
264341825Sdim
265317017Sdimprotected:
266317017Sdim  bool isPresent(uint32_t K) const { return Present.test(K); }
267317017Sdim  bool isDeleted(uint32_t K) const { return Deleted.test(K); }
268320572Sdim
269317017Sdim  BucketList Buckets;
270317017Sdim  mutable SparseBitVector<> Present;
271317017Sdim  mutable SparseBitVector<> Deleted;
272317017Sdim
273317017Sdimprivate:
274341825Sdim  /// Set the entry using a key type that the specified Traits can convert
275341825Sdim  /// from a real key to an internal key.
276353358Sdim  template <typename Key, typename TraitsT>
277353358Sdim  bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
278353358Sdim                       Optional<uint32_t> InternalKey) {
279353358Sdim    auto Entry = find_as(K, Traits);
280341825Sdim    if (Entry != end()) {
281341825Sdim      assert(isPresent(Entry.index()));
282341825Sdim      assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
283341825Sdim      // We're updating, no need to do anything special.
284341825Sdim      Buckets[Entry.index()].second = V;
285341825Sdim      return false;
286341825Sdim    }
287317017Sdim
288341825Sdim    auto &B = Buckets[Entry.index()];
289341825Sdim    assert(!isPresent(Entry.index()));
290341825Sdim    assert(Entry.isEnd());
291341825Sdim    B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
292341825Sdim    B.second = V;
293341825Sdim    Present.set(Entry.index());
294341825Sdim    Deleted.reset(Entry.index());
295317017Sdim
296353358Sdim    grow(Traits);
297320572Sdim
298353358Sdim    assert((find_as(K, Traits)) != end());
299341825Sdim    return true;
300341825Sdim  }
301317017Sdim
302341825Sdim  static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
303317017Sdim
304353358Sdim  template <typename TraitsT>
305353358Sdim  void grow(TraitsT &Traits) {
306341825Sdim    uint32_t S = size();
307341825Sdim    uint32_t MaxLoad = maxLoad(capacity());
308341825Sdim    if (S < maxLoad(capacity()))
309341825Sdim      return;
310341825Sdim    assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
311317017Sdim
312341825Sdim    uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
313317017Sdim
314341825Sdim    // Growing requires rebuilding the table and re-hashing every item.  Make a
315341825Sdim    // copy with a larger capacity, insert everything into the copy, then swap
316341825Sdim    // it in.
317353358Sdim    HashTable NewMap(NewCapacity);
318341825Sdim    for (auto I : Present) {
319341825Sdim      auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
320353358Sdim      NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
321353358Sdim                             Buckets[I].first);
322341825Sdim    }
323341825Sdim
324341825Sdim    Buckets.swap(NewMap.Buckets);
325341825Sdim    std::swap(Present, NewMap.Present);
326341825Sdim    std::swap(Deleted, NewMap.Deleted);
327341825Sdim    assert(capacity() == NewCapacity);
328341825Sdim    assert(size() == S);
329341825Sdim  }
330317017Sdim};
331317017Sdim
332317017Sdim} // end namespace pdb
333320572Sdim
334317017Sdim} // end namespace llvm
335317017Sdim
336320572Sdim#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
337