177943Sdfr//===-- MappedHash.h --------------------------------------------*- C++ -*-===//
277943Sdfr//
377943Sdfr// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
477943Sdfr// See https://llvm.org/LICENSE.txt for license information.
577943Sdfr// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
677943Sdfr//
777943Sdfr//===----------------------------------------------------------------------===//
877943Sdfr
977943Sdfr#ifndef liblldb_MappedHash_h_
1077943Sdfr#define liblldb_MappedHash_h_
1177943Sdfr
1277943Sdfr#include <assert.h>
1377943Sdfr#include <stdint.h>
1477943Sdfr
1577943Sdfr#include <algorithm>
1677943Sdfr#include <functional>
1777943Sdfr#include <map>
1877943Sdfr#include <vector>
1977943Sdfr
2077943Sdfr#include "lldb/Utility/DataExtractor.h"
2177943Sdfr#include "lldb/Utility/Stream.h"
2277943Sdfr#include "llvm/Support/DJB.h"
2377943Sdfr
2477943Sdfrclass MappedHash {
2577943Sdfrpublic:
2677943Sdfr  enum HashFunctionType {
27113038Sobrien    eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used
28113038Sobrien                          // by the ELF GNU_HASH sections
2978327Sobrien  };
3077943Sdfr
31271762Semaste  static uint32_t HashString(uint32_t hash_function, llvm::StringRef s) {
3277943Sdfr    switch (hash_function) {
33107723Smarcel    case MappedHash::eHashFunctionDJB:
3477943Sdfr      return llvm::djbHash(s);
35107723Smarcel
36107723Smarcel    default:
37107723Smarcel      break;
38322930Simp    }
39324557Simp    llvm_unreachable("Invalid hash function index");
40322930Simp  }
41322930Simp
42322930Simp  static const uint32_t HASH_MAGIC = 0x48415348u;
43322930Simp  static const uint32_t HASH_CIGAM = 0x48534148u;
44322930Simp
45322930Simp  template <typename T> struct Header {
46324557Simp    typedef T HeaderData;
47324557Simp
48324557Simp    uint32_t
49324557Simp        magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection
50324557Simp    uint16_t version;         // Version number
51324557Simp    uint16_t hash_function;   // The hash function enumeration that was used
52324557Simp    uint32_t bucket_count;    // The number of buckets in this hash table
53107723Smarcel    uint32_t hashes_count;    // The total number of unique hash values and hash
54107723Smarcel                              // data offsets in this table
55107723Smarcel    uint32_t header_data_len; // The size in bytes of the "header_data" template
56107723Smarcel                              // member below
57294769Simp    HeaderData header_data;   //
58107723Smarcel
59107723Smarcel    Header()
60107723Smarcel        : magic(HASH_MAGIC), version(1), hash_function(eHashFunctionDJB),
61107723Smarcel          bucket_count(0), hashes_count(0), header_data_len(sizeof(T)),
62107723Smarcel          header_data() {}
63107723Smarcel
64107723Smarcel    virtual ~Header() = default;
65107723Smarcel
66294769Simp    size_t GetByteSize() const {
67107723Smarcel      return sizeof(magic) + sizeof(version) + sizeof(hash_function) +
68107723Smarcel             sizeof(bucket_count) + sizeof(hashes_count) +
69107723Smarcel             sizeof(header_data_len) + header_data_len;
70107723Smarcel    }
71322931Simp
72107723Smarcel    virtual size_t GetByteSize(const HeaderData &header_data) = 0;
7377943Sdfr
74107723Smarcel    void SetHeaderDataByteSize(uint32_t header_data_byte_size) {
75271762Semaste      header_data_len = header_data_byte_size;
76271762Semaste    }
77271762Semaste
78107723Smarcel    void Dump(lldb_private::Stream &s) {
79107723Smarcel      s.Printf("header.magic              = 0x%8.8x\n", magic);
80107723Smarcel      s.Printf("header.version            = 0x%4.4x\n", version);
81107723Smarcel      s.Printf("header.hash_function      = 0x%4.4x\n", hash_function);
82107723Smarcel      s.Printf("header.bucket_count       = 0x%8.8x %u\n", bucket_count,
8377943Sdfr               bucket_count);
8477943Sdfr      s.Printf("header.hashes_count       = 0x%8.8x %u\n", hashes_count,
8577943Sdfr               hashes_count);
8677943Sdfr      s.Printf("header.header_data_len    = 0x%8.8x %u\n", header_data_len,
87107723Smarcel               header_data_len);
88271762Semaste    }
89271762Semaste
90271762Semaste    virtual lldb::offset_t Read(lldb_private::DataExtractor &data,
91271762Semaste                                lldb::offset_t offset) {
92271762Semaste      if (data.ValidOffsetForDataOfSize(
93271762Semaste              offset, sizeof(magic) + sizeof(version) + sizeof(hash_function) +
94298230Sallanjude                          sizeof(bucket_count) + sizeof(hashes_count) +
95107723Smarcel                          sizeof(header_data_len))) {
96107723Smarcel        magic = data.GetU32(&offset);
97346482Skevans        if (magic != HASH_MAGIC) {
98346482Skevans          if (magic == HASH_CIGAM) {
99107723Smarcel            switch (data.GetByteOrder()) {
100346482Skevans            case lldb::eByteOrderBig:
101107723Smarcel              data.SetByteOrder(lldb::eByteOrderLittle);
102163929Smarcel              break;
103107723Smarcel            case lldb::eByteOrderLittle:
104324557Simp              data.SetByteOrder(lldb::eByteOrderBig);
105107723Smarcel              break;
106107723Smarcel            default:
107107723Smarcel              return LLDB_INVALID_OFFSET;
108324557Simp            }
109107723Smarcel          } else {
110107723Smarcel            // Magic bytes didn't match
111107723Smarcel            version = 0;
112107723Smarcel            return LLDB_INVALID_OFFSET;
113107723Smarcel          }
114107723Smarcel        }
115107723Smarcel
116107723Smarcel        version = data.GetU16(&offset);
117107723Smarcel        if (version != 1) {
118107723Smarcel          // Unsupported version
119107723Smarcel          return LLDB_INVALID_OFFSET;
120107723Smarcel        }
121107723Smarcel        hash_function = data.GetU16(&offset);
122111692Smarcel        if (hash_function == 4)
123107723Smarcel          hash_function = 0; // Deal with pre-release version of this table...
124107723Smarcel        bucket_count = data.GetU32(&offset);
125322930Simp        hashes_count = data.GetU32(&offset);
126107723Smarcel        header_data_len = data.GetU32(&offset);
127107723Smarcel        return offset;
128107723Smarcel      }
129107723Smarcel      return LLDB_INVALID_OFFSET;
130107723Smarcel    }
131107723Smarcel    //
132107723Smarcel    //        // Returns a buffer that contains a serialized version of this
133107723Smarcel    //        table
134107723Smarcel    //        // that must be freed with free().
135107723Smarcel    //        virtual void *
136107723Smarcel    //        Write (int fd);
137107723Smarcel  };
138107723Smarcel
139243978Srpaulo  // A class for reading and using a saved hash table from a block of data
140107723Smarcel  // in memory
141107723Smarcel  template <typename __KeyType, class __HeaderType, class __HashData>
142107723Smarcel  class MemoryTable {
143107723Smarcel  public:
144107723Smarcel    typedef __HeaderType HeaderType;
145107723Smarcel    typedef __KeyType KeyType;
146107723Smarcel    typedef __HashData HashData;
147107723Smarcel
148107723Smarcel    enum Result {
149107723Smarcel      eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was
150107723Smarcel                            // filled in successfully
151107723Smarcel      eResultKeyMismatch =
152107723Smarcel          1u, // Bucket hash data collision, but key didn't match
153107723Smarcel      eResultEndOfHashData = 2u, // The chain of items for this hash data in
154107723Smarcel                                 // this bucket is terminated, search no more
155107723Smarcel      eResultError = 3u          // Status parsing the hash data, abort
156107723Smarcel    };
157107723Smarcel
158107723Smarcel    struct Pair {
159107723Smarcel      KeyType key;
160107723Smarcel      HashData value;
161107723Smarcel    };
162107723Smarcel
163107723Smarcel    MemoryTable(lldb_private::DataExtractor &data)
164107723Smarcel        : m_header(), m_hash_indexes(nullptr), m_hash_values(nullptr),
165107723Smarcel          m_hash_offsets(nullptr) {
166107723Smarcel      lldb::offset_t offset = m_header.Read(data, 0);
167107723Smarcel      if (offset != LLDB_INVALID_OFFSET && IsValid()) {
168107723Smarcel        m_hash_indexes = (const uint32_t *)data.GetData(
169107723Smarcel            &offset, m_header.bucket_count * sizeof(uint32_t));
170107723Smarcel        m_hash_values = (const uint32_t *)data.GetData(
171107723Smarcel            &offset, m_header.hashes_count * sizeof(uint32_t));
172107723Smarcel        m_hash_offsets = (const uint32_t *)data.GetData(
173293796Ssmh            &offset, m_header.hashes_count * sizeof(uint32_t));
174107723Smarcel      }
175107723Smarcel    }
176107723Smarcel
177107723Smarcel    virtual ~MemoryTable() = default;
178107723Smarcel
179107723Smarcel    bool IsValid() const {
180107723Smarcel      return m_header.version == 1 &&
181107723Smarcel             m_header.hash_function == eHashFunctionDJB &&
182107723Smarcel             m_header.bucket_count > 0;
183107723Smarcel    }
184107723Smarcel
185107723Smarcel    uint32_t GetHashIndex(uint32_t bucket_idx) const {
186107723Smarcel      uint32_t result = UINT32_MAX;
187107723Smarcel      if (m_hash_indexes && bucket_idx < m_header.bucket_count)
188324557Simp        memcpy(&result, m_hash_indexes + bucket_idx, sizeof(uint32_t));
189322931Simp      return result;
19077943Sdfr    }
191
192    uint32_t GetHashValue(uint32_t hash_idx) const {
193      uint32_t result = UINT32_MAX;
194      if (m_hash_values && hash_idx < m_header.hashes_count)
195        memcpy(&result, m_hash_values + hash_idx, sizeof(uint32_t));
196      return result;
197    }
198
199    uint32_t GetHashDataOffset(uint32_t hash_idx) const {
200      uint32_t result = UINT32_MAX;
201      if (m_hash_offsets && hash_idx < m_header.hashes_count)
202        memcpy(&result, m_hash_offsets + hash_idx, sizeof(uint32_t));
203      return result;
204    }
205
206    bool Find(llvm::StringRef name, Pair &pair) const {
207      if (name.empty())
208        return false;
209
210      if (IsValid()) {
211        const uint32_t bucket_count = m_header.bucket_count;
212        const uint32_t hash_count = m_header.hashes_count;
213        const uint32_t hash_value =
214            MappedHash::HashString(m_header.hash_function, name);
215        const uint32_t bucket_idx = hash_value % bucket_count;
216        uint32_t hash_idx = GetHashIndex(bucket_idx);
217        if (hash_idx < hash_count) {
218          for (; hash_idx < hash_count; ++hash_idx) {
219            const uint32_t curr_hash_value = GetHashValue(hash_idx);
220            if (curr_hash_value == hash_value) {
221              lldb::offset_t hash_data_offset = GetHashDataOffset(hash_idx);
222              while (hash_data_offset != UINT32_MAX) {
223                const lldb::offset_t prev_hash_data_offset = hash_data_offset;
224                Result hash_result =
225                    GetHashDataForName(name, &hash_data_offset, pair);
226                // Check the result of getting our hash data
227                switch (hash_result) {
228                case eResultKeyMatch:
229                  return true;
230
231                case eResultKeyMismatch:
232                  if (prev_hash_data_offset == hash_data_offset)
233                    return false;
234                  break;
235
236                case eResultEndOfHashData:
237                  // The last HashData for this key has been reached, stop
238                  // searching
239                  return false;
240                case eResultError:
241                  // Status parsing the hash data, abort
242                  return false;
243                }
244              }
245            }
246            if ((curr_hash_value % bucket_count) != bucket_idx)
247              break;
248          }
249        }
250      }
251      return false;
252    }
253
254    // This method must be implemented in any subclasses. The KeyType is user
255    // specified and must somehow result in a string value. For example, the
256    // KeyType might be a string offset in a string table and subclasses can
257    // store their string table as a member of the subclass and return a valie
258    // "const char *" given a "key". The value could also be a C string
259    // pointer, in which case just returning "key" will suffice.
260    virtual const char *GetStringForKeyType(KeyType key) const = 0;
261
262    virtual bool ReadHashData(uint32_t hash_data_offset,
263                              HashData &hash_data) const = 0;
264
265    // This method must be implemented in any subclasses and it must try to
266    // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr"
267    // parameter. This offset should be updated as bytes are consumed and a
268    // value "Result" enum should be returned. If the "name" matches the full
269    // name for the "pair.key" (which must be filled in by this call), then the
270    // HashData in the pair ("pair.value") should be extracted and filled in
271    // and "eResultKeyMatch" should be returned. If "name" doesn't match this
272    // string for the key, then "eResultKeyMismatch" should be returned and all
273    // data for the current HashData must be consumed or skipped and the
274    // "hash_data_offset_ptr" offset needs to be updated to point to the next
275    // HashData. If the end of the HashData objects for a given hash value have
276    // been reached, then "eResultEndOfHashData" should be returned. If
277    // anything else goes wrong during parsing, return "eResultError" and the
278    // corresponding "Find()" function will be canceled and return false.
279    virtual Result GetHashDataForName(llvm::StringRef name,
280                                      lldb::offset_t *hash_data_offset_ptr,
281                                      Pair &pair) const = 0;
282
283    const HeaderType &GetHeader() { return m_header; }
284
285    void ForEach(
286        std::function<bool(const HashData &hash_data)> const &callback) const {
287      const size_t num_hash_offsets = m_header.hashes_count;
288      for (size_t i = 0; i < num_hash_offsets; ++i) {
289        uint32_t hash_data_offset = GetHashDataOffset(i);
290        if (hash_data_offset != UINT32_MAX) {
291          HashData hash_data;
292          if (ReadHashData(hash_data_offset, hash_data)) {
293            // If the callback returns false, then we are done and should stop
294            if (callback(hash_data) == false)
295              return;
296          }
297        }
298      }
299    }
300
301  protected:
302    // Implementation agnostic information
303    HeaderType m_header;
304    const uint32_t *m_hash_indexes;
305    const uint32_t *m_hash_values;
306    const uint32_t *m_hash_offsets;
307  };
308};
309
310#endif // liblldb_MappedHash_h_
311