1//===-- MappedHash.h --------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLDB_CORE_MAPPEDHASH_H
10#define LLDB_CORE_MAPPEDHASH_H
11
12#include <cassert>
13#include <cstdint>
14
15#include <algorithm>
16#include <functional>
17#include <map>
18#include <vector>
19
20#include "lldb/Utility/DataExtractor.h"
21#include "lldb/Utility/Stream.h"
22#include "llvm/Support/DJB.h"
23
24class MappedHash {
25public:
26  enum HashFunctionType {
27    eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used
28                          // by the ELF GNU_HASH sections
29  };
30
31  static uint32_t HashString(uint32_t hash_function, llvm::StringRef s) {
32    switch (hash_function) {
33    case MappedHash::eHashFunctionDJB:
34      return llvm::djbHash(s);
35
36    default:
37      break;
38    }
39    llvm_unreachable("Invalid hash function index");
40  }
41
42  static const uint32_t HASH_MAGIC = 0x48415348u;
43  static const uint32_t HASH_CIGAM = 0x48534148u;
44
45  template <typename T> struct Header {
46    typedef T HeaderData;
47
48    uint32_t
49        magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection
50    uint16_t version = 1; // Version number
51    uint16_t hash_function =
52        eHashFunctionDJB;      // The hash function enumeration that was used
53    uint32_t bucket_count = 0; // The number of buckets in this hash table
54    uint32_t hashes_count = 0; // The total number of unique hash values and
55                               // hash data offsets in this table
56    uint32_t header_data_len; // The size in bytes of the "header_data" template
57                              // member below
58    HeaderData header_data;   //
59
60    Header() : magic(HASH_MAGIC), header_data_len(sizeof(T)), header_data() {}
61
62    virtual ~Header() = default;
63
64    size_t GetByteSize() const {
65      return sizeof(magic) + sizeof(version) + sizeof(hash_function) +
66             sizeof(bucket_count) + sizeof(hashes_count) +
67             sizeof(header_data_len) + header_data_len;
68    }
69
70    virtual size_t GetByteSize(const HeaderData &header_data) = 0;
71
72    void SetHeaderDataByteSize(uint32_t header_data_byte_size) {
73      header_data_len = header_data_byte_size;
74    }
75
76    void Dump(lldb_private::Stream &s) {
77      s.Printf("header.magic              = 0x%8.8x\n", magic);
78      s.Printf("header.version            = 0x%4.4x\n", version);
79      s.Printf("header.hash_function      = 0x%4.4x\n", hash_function);
80      s.Printf("header.bucket_count       = 0x%8.8x %u\n", bucket_count,
81               bucket_count);
82      s.Printf("header.hashes_count       = 0x%8.8x %u\n", hashes_count,
83               hashes_count);
84      s.Printf("header.header_data_len    = 0x%8.8x %u\n", header_data_len,
85               header_data_len);
86    }
87
88    virtual lldb::offset_t Read(lldb_private::DataExtractor &data,
89                                lldb::offset_t offset) {
90      if (data.ValidOffsetForDataOfSize(
91              offset, sizeof(magic) + sizeof(version) + sizeof(hash_function) +
92                          sizeof(bucket_count) + sizeof(hashes_count) +
93                          sizeof(header_data_len))) {
94        magic = data.GetU32(&offset);
95        if (magic != HASH_MAGIC) {
96          if (magic == HASH_CIGAM) {
97            switch (data.GetByteOrder()) {
98            case lldb::eByteOrderBig:
99              data.SetByteOrder(lldb::eByteOrderLittle);
100              break;
101            case lldb::eByteOrderLittle:
102              data.SetByteOrder(lldb::eByteOrderBig);
103              break;
104            default:
105              return LLDB_INVALID_OFFSET;
106            }
107          } else {
108            // Magic bytes didn't match
109            version = 0;
110            return LLDB_INVALID_OFFSET;
111          }
112        }
113
114        version = data.GetU16(&offset);
115        if (version != 1) {
116          // Unsupported version
117          return LLDB_INVALID_OFFSET;
118        }
119        hash_function = data.GetU16(&offset);
120        if (hash_function == 4)
121          hash_function = 0; // Deal with pre-release version of this table...
122        bucket_count = data.GetU32(&offset);
123        hashes_count = data.GetU32(&offset);
124        header_data_len = data.GetU32(&offset);
125        return offset;
126      }
127      return LLDB_INVALID_OFFSET;
128    }
129    //
130    //        // Returns a buffer that contains a serialized version of this
131    //        table
132    //        // that must be freed with free().
133    //        virtual void *
134    //        Write (int fd);
135  };
136
137  // A class for reading and using a saved hash table from a block of data
138  // in memory
139  template <typename __KeyType, class __HeaderType, class __HashData>
140  class MemoryTable {
141  public:
142    typedef __HeaderType HeaderType;
143    typedef __KeyType KeyType;
144    typedef __HashData HashData;
145
146    enum Result {
147      eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was
148                            // filled in successfully
149      eResultKeyMismatch =
150          1u, // Bucket hash data collision, but key didn't match
151      eResultEndOfHashData = 2u, // The chain of items for this hash data in
152                                 // this bucket is terminated, search no more
153      eResultError = 3u          // Status parsing the hash data, abort
154    };
155
156    struct Pair {
157      KeyType key;
158      HashData value;
159    };
160
161    MemoryTable(lldb_private::DataExtractor &data)
162        : m_header(), m_hash_indexes(nullptr), m_hash_values(nullptr),
163          m_hash_offsets(nullptr) {
164      lldb::offset_t offset = m_header.Read(data, 0);
165      if (offset != LLDB_INVALID_OFFSET && IsValid()) {
166        m_hash_indexes = (const uint32_t *)data.GetData(
167            &offset, m_header.bucket_count * sizeof(uint32_t));
168        m_hash_values = (const uint32_t *)data.GetData(
169            &offset, m_header.hashes_count * sizeof(uint32_t));
170        m_hash_offsets = (const uint32_t *)data.GetData(
171            &offset, m_header.hashes_count * sizeof(uint32_t));
172      }
173    }
174
175    virtual ~MemoryTable() = default;
176
177    bool IsValid() const {
178      return m_header.version == 1 &&
179             m_header.hash_function == eHashFunctionDJB &&
180             m_header.bucket_count > 0;
181    }
182
183    uint32_t GetHashIndex(uint32_t bucket_idx) const {
184      uint32_t result = UINT32_MAX;
185      if (m_hash_indexes && bucket_idx < m_header.bucket_count)
186        memcpy(&result, m_hash_indexes + bucket_idx, sizeof(uint32_t));
187      return result;
188    }
189
190    uint32_t GetHashValue(uint32_t hash_idx) const {
191      uint32_t result = UINT32_MAX;
192      if (m_hash_values && hash_idx < m_header.hashes_count)
193        memcpy(&result, m_hash_values + hash_idx, sizeof(uint32_t));
194      return result;
195    }
196
197    uint32_t GetHashDataOffset(uint32_t hash_idx) const {
198      uint32_t result = UINT32_MAX;
199      if (m_hash_offsets && hash_idx < m_header.hashes_count)
200        memcpy(&result, m_hash_offsets + hash_idx, sizeof(uint32_t));
201      return result;
202    }
203
204    bool Find(llvm::StringRef name, Pair &pair) const {
205      if (name.empty())
206        return false;
207
208      if (IsValid()) {
209        const uint32_t bucket_count = m_header.bucket_count;
210        const uint32_t hash_count = m_header.hashes_count;
211        const uint32_t hash_value =
212            MappedHash::HashString(m_header.hash_function, name);
213        const uint32_t bucket_idx = hash_value % bucket_count;
214        uint32_t hash_idx = GetHashIndex(bucket_idx);
215        if (hash_idx < hash_count) {
216          for (; hash_idx < hash_count; ++hash_idx) {
217            const uint32_t curr_hash_value = GetHashValue(hash_idx);
218            if (curr_hash_value == hash_value) {
219              lldb::offset_t hash_data_offset = GetHashDataOffset(hash_idx);
220              while (hash_data_offset != UINT32_MAX) {
221                const lldb::offset_t prev_hash_data_offset = hash_data_offset;
222                Result hash_result =
223                    GetHashDataForName(name, &hash_data_offset, pair);
224                // Check the result of getting our hash data
225                switch (hash_result) {
226                case eResultKeyMatch:
227                  return true;
228
229                case eResultKeyMismatch:
230                  if (prev_hash_data_offset == hash_data_offset)
231                    return false;
232                  break;
233
234                case eResultEndOfHashData:
235                  // The last HashData for this key has been reached, stop
236                  // searching
237                  return false;
238                case eResultError:
239                  // Status parsing the hash data, abort
240                  return false;
241                }
242              }
243            }
244            if ((curr_hash_value % bucket_count) != bucket_idx)
245              break;
246          }
247        }
248      }
249      return false;
250    }
251
252    // This method must be implemented in any subclasses. The KeyType is user
253    // specified and must somehow result in a string value. For example, the
254    // KeyType might be a string offset in a string table and subclasses can
255    // store their string table as a member of the subclass and return a valie
256    // "const char *" given a "key". The value could also be a C string
257    // pointer, in which case just returning "key" will suffice.
258    virtual const char *GetStringForKeyType(KeyType key) const = 0;
259
260    virtual bool ReadHashData(uint32_t hash_data_offset,
261                              HashData &hash_data) const = 0;
262
263    // This method must be implemented in any subclasses and it must try to
264    // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr"
265    // parameter. This offset should be updated as bytes are consumed and a
266    // value "Result" enum should be returned. If the "name" matches the full
267    // name for the "pair.key" (which must be filled in by this call), then the
268    // HashData in the pair ("pair.value") should be extracted and filled in
269    // and "eResultKeyMatch" should be returned. If "name" doesn't match this
270    // string for the key, then "eResultKeyMismatch" should be returned and all
271    // data for the current HashData must be consumed or skipped and the
272    // "hash_data_offset_ptr" offset needs to be updated to point to the next
273    // HashData. If the end of the HashData objects for a given hash value have
274    // been reached, then "eResultEndOfHashData" should be returned. If
275    // anything else goes wrong during parsing, return "eResultError" and the
276    // corresponding "Find()" function will be canceled and return false.
277    virtual Result GetHashDataForName(llvm::StringRef name,
278                                      lldb::offset_t *hash_data_offset_ptr,
279                                      Pair &pair) const = 0;
280
281    const HeaderType &GetHeader() { return m_header; }
282
283    void ForEach(
284        std::function<bool(const HashData &hash_data)> const &callback) const {
285      const size_t num_hash_offsets = m_header.hashes_count;
286      for (size_t i = 0; i < num_hash_offsets; ++i) {
287        uint32_t hash_data_offset = GetHashDataOffset(i);
288        if (hash_data_offset != UINT32_MAX) {
289          HashData hash_data;
290          if (ReadHashData(hash_data_offset, hash_data)) {
291            // If the callback returns false, then we are done and should stop
292            if (callback(hash_data) == false)
293              return;
294          }
295        }
296      }
297    }
298
299  protected:
300    // Implementation agnostic information
301    HeaderType m_header;
302    const uint32_t *m_hash_indexes;
303    const uint32_t *m_hash_values;
304    const uint32_t *m_hash_offsets;
305  };
306};
307
308#endif // LLDB_CORE_MAPPEDHASH_H
309