1//===-- MappedHash.h --------------------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#ifndef LLDB_CORE_MAPPEDHASH_H 10#define LLDB_CORE_MAPPEDHASH_H 11 12#include <cassert> 13#include <cstdint> 14 15#include <algorithm> 16#include <functional> 17#include <map> 18#include <vector> 19 20#include "lldb/Utility/DataExtractor.h" 21#include "lldb/Utility/Stream.h" 22#include "llvm/Support/DJB.h" 23 24class MappedHash { 25public: 26 enum HashFunctionType { 27 eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used 28 // by the ELF GNU_HASH sections 29 }; 30 31 static uint32_t HashString(uint32_t hash_function, llvm::StringRef s) { 32 switch (hash_function) { 33 case MappedHash::eHashFunctionDJB: 34 return llvm::djbHash(s); 35 36 default: 37 break; 38 } 39 llvm_unreachable("Invalid hash function index"); 40 } 41 42 static const uint32_t HASH_MAGIC = 0x48415348u; 43 static const uint32_t HASH_CIGAM = 0x48534148u; 44 45 template <typename T> struct Header { 46 typedef T HeaderData; 47 48 uint32_t 49 magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection 50 uint16_t version = 1; // Version number 51 uint16_t hash_function = 52 eHashFunctionDJB; // The hash function enumeration that was used 53 uint32_t bucket_count = 0; // The number of buckets in this hash table 54 uint32_t hashes_count = 0; // The total number of unique hash values and 55 // hash data offsets in this table 56 uint32_t header_data_len; // The size in bytes of the "header_data" template 57 // member below 58 HeaderData header_data; // 59 60 Header() : magic(HASH_MAGIC), header_data_len(sizeof(T)), header_data() {} 61 62 virtual ~Header() = default; 63 64 size_t GetByteSize() const { 65 return sizeof(magic) + sizeof(version) + sizeof(hash_function) + 66 sizeof(bucket_count) + sizeof(hashes_count) + 67 sizeof(header_data_len) + header_data_len; 68 } 69 70 virtual size_t GetByteSize(const HeaderData &header_data) = 0; 71 72 void SetHeaderDataByteSize(uint32_t header_data_byte_size) { 73 header_data_len = header_data_byte_size; 74 } 75 76 void Dump(lldb_private::Stream &s) { 77 s.Printf("header.magic = 0x%8.8x\n", magic); 78 s.Printf("header.version = 0x%4.4x\n", version); 79 s.Printf("header.hash_function = 0x%4.4x\n", hash_function); 80 s.Printf("header.bucket_count = 0x%8.8x %u\n", bucket_count, 81 bucket_count); 82 s.Printf("header.hashes_count = 0x%8.8x %u\n", hashes_count, 83 hashes_count); 84 s.Printf("header.header_data_len = 0x%8.8x %u\n", header_data_len, 85 header_data_len); 86 } 87 88 virtual lldb::offset_t Read(lldb_private::DataExtractor &data, 89 lldb::offset_t offset) { 90 if (data.ValidOffsetForDataOfSize( 91 offset, sizeof(magic) + sizeof(version) + sizeof(hash_function) + 92 sizeof(bucket_count) + sizeof(hashes_count) + 93 sizeof(header_data_len))) { 94 magic = data.GetU32(&offset); 95 if (magic != HASH_MAGIC) { 96 if (magic == HASH_CIGAM) { 97 switch (data.GetByteOrder()) { 98 case lldb::eByteOrderBig: 99 data.SetByteOrder(lldb::eByteOrderLittle); 100 break; 101 case lldb::eByteOrderLittle: 102 data.SetByteOrder(lldb::eByteOrderBig); 103 break; 104 default: 105 return LLDB_INVALID_OFFSET; 106 } 107 } else { 108 // Magic bytes didn't match 109 version = 0; 110 return LLDB_INVALID_OFFSET; 111 } 112 } 113 114 version = data.GetU16(&offset); 115 if (version != 1) { 116 // Unsupported version 117 return LLDB_INVALID_OFFSET; 118 } 119 hash_function = data.GetU16(&offset); 120 if (hash_function == 4) 121 hash_function = 0; // Deal with pre-release version of this table... 122 bucket_count = data.GetU32(&offset); 123 hashes_count = data.GetU32(&offset); 124 header_data_len = data.GetU32(&offset); 125 return offset; 126 } 127 return LLDB_INVALID_OFFSET; 128 } 129 // 130 // // Returns a buffer that contains a serialized version of this 131 // table 132 // // that must be freed with free(). 133 // virtual void * 134 // Write (int fd); 135 }; 136 137 // A class for reading and using a saved hash table from a block of data 138 // in memory 139 template <typename __KeyType, class __HeaderType, class __HashData> 140 class MemoryTable { 141 public: 142 typedef __HeaderType HeaderType; 143 typedef __KeyType KeyType; 144 typedef __HashData HashData; 145 146 enum Result { 147 eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was 148 // filled in successfully 149 eResultKeyMismatch = 150 1u, // Bucket hash data collision, but key didn't match 151 eResultEndOfHashData = 2u, // The chain of items for this hash data in 152 // this bucket is terminated, search no more 153 eResultError = 3u // Status parsing the hash data, abort 154 }; 155 156 struct Pair { 157 KeyType key; 158 HashData value; 159 }; 160 161 MemoryTable(lldb_private::DataExtractor &data) 162 : m_header(), m_hash_indexes(nullptr), m_hash_values(nullptr), 163 m_hash_offsets(nullptr) { 164 lldb::offset_t offset = m_header.Read(data, 0); 165 if (offset != LLDB_INVALID_OFFSET && IsValid()) { 166 m_hash_indexes = (const uint32_t *)data.GetData( 167 &offset, m_header.bucket_count * sizeof(uint32_t)); 168 m_hash_values = (const uint32_t *)data.GetData( 169 &offset, m_header.hashes_count * sizeof(uint32_t)); 170 m_hash_offsets = (const uint32_t *)data.GetData( 171 &offset, m_header.hashes_count * sizeof(uint32_t)); 172 } 173 } 174 175 virtual ~MemoryTable() = default; 176 177 bool IsValid() const { 178 return m_header.version == 1 && 179 m_header.hash_function == eHashFunctionDJB && 180 m_header.bucket_count > 0; 181 } 182 183 uint32_t GetHashIndex(uint32_t bucket_idx) const { 184 uint32_t result = UINT32_MAX; 185 if (m_hash_indexes && bucket_idx < m_header.bucket_count) 186 memcpy(&result, m_hash_indexes + bucket_idx, sizeof(uint32_t)); 187 return result; 188 } 189 190 uint32_t GetHashValue(uint32_t hash_idx) const { 191 uint32_t result = UINT32_MAX; 192 if (m_hash_values && hash_idx < m_header.hashes_count) 193 memcpy(&result, m_hash_values + hash_idx, sizeof(uint32_t)); 194 return result; 195 } 196 197 uint32_t GetHashDataOffset(uint32_t hash_idx) const { 198 uint32_t result = UINT32_MAX; 199 if (m_hash_offsets && hash_idx < m_header.hashes_count) 200 memcpy(&result, m_hash_offsets + hash_idx, sizeof(uint32_t)); 201 return result; 202 } 203 204 bool Find(llvm::StringRef name, Pair &pair) const { 205 if (name.empty()) 206 return false; 207 208 if (IsValid()) { 209 const uint32_t bucket_count = m_header.bucket_count; 210 const uint32_t hash_count = m_header.hashes_count; 211 const uint32_t hash_value = 212 MappedHash::HashString(m_header.hash_function, name); 213 const uint32_t bucket_idx = hash_value % bucket_count; 214 uint32_t hash_idx = GetHashIndex(bucket_idx); 215 if (hash_idx < hash_count) { 216 for (; hash_idx < hash_count; ++hash_idx) { 217 const uint32_t curr_hash_value = GetHashValue(hash_idx); 218 if (curr_hash_value == hash_value) { 219 lldb::offset_t hash_data_offset = GetHashDataOffset(hash_idx); 220 while (hash_data_offset != UINT32_MAX) { 221 const lldb::offset_t prev_hash_data_offset = hash_data_offset; 222 Result hash_result = 223 GetHashDataForName(name, &hash_data_offset, pair); 224 // Check the result of getting our hash data 225 switch (hash_result) { 226 case eResultKeyMatch: 227 return true; 228 229 case eResultKeyMismatch: 230 if (prev_hash_data_offset == hash_data_offset) 231 return false; 232 break; 233 234 case eResultEndOfHashData: 235 // The last HashData for this key has been reached, stop 236 // searching 237 return false; 238 case eResultError: 239 // Status parsing the hash data, abort 240 return false; 241 } 242 } 243 } 244 if ((curr_hash_value % bucket_count) != bucket_idx) 245 break; 246 } 247 } 248 } 249 return false; 250 } 251 252 // This method must be implemented in any subclasses. The KeyType is user 253 // specified and must somehow result in a string value. For example, the 254 // KeyType might be a string offset in a string table and subclasses can 255 // store their string table as a member of the subclass and return a valie 256 // "const char *" given a "key". The value could also be a C string 257 // pointer, in which case just returning "key" will suffice. 258 virtual const char *GetStringForKeyType(KeyType key) const = 0; 259 260 virtual bool ReadHashData(uint32_t hash_data_offset, 261 HashData &hash_data) const = 0; 262 263 // This method must be implemented in any subclasses and it must try to 264 // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr" 265 // parameter. This offset should be updated as bytes are consumed and a 266 // value "Result" enum should be returned. If the "name" matches the full 267 // name for the "pair.key" (which must be filled in by this call), then the 268 // HashData in the pair ("pair.value") should be extracted and filled in 269 // and "eResultKeyMatch" should be returned. If "name" doesn't match this 270 // string for the key, then "eResultKeyMismatch" should be returned and all 271 // data for the current HashData must be consumed or skipped and the 272 // "hash_data_offset_ptr" offset needs to be updated to point to the next 273 // HashData. If the end of the HashData objects for a given hash value have 274 // been reached, then "eResultEndOfHashData" should be returned. If 275 // anything else goes wrong during parsing, return "eResultError" and the 276 // corresponding "Find()" function will be canceled and return false. 277 virtual Result GetHashDataForName(llvm::StringRef name, 278 lldb::offset_t *hash_data_offset_ptr, 279 Pair &pair) const = 0; 280 281 const HeaderType &GetHeader() { return m_header; } 282 283 void ForEach( 284 std::function<bool(const HashData &hash_data)> const &callback) const { 285 const size_t num_hash_offsets = m_header.hashes_count; 286 for (size_t i = 0; i < num_hash_offsets; ++i) { 287 uint32_t hash_data_offset = GetHashDataOffset(i); 288 if (hash_data_offset != UINT32_MAX) { 289 HashData hash_data; 290 if (ReadHashData(hash_data_offset, hash_data)) { 291 // If the callback returns false, then we are done and should stop 292 if (callback(hash_data) == false) 293 return; 294 } 295 } 296 } 297 } 298 299 protected: 300 // Implementation agnostic information 301 HeaderType m_header; 302 const uint32_t *m_hash_indexes; 303 const uint32_t *m_hash_values; 304 const uint32_t *m_hash_offsets; 305 }; 306}; 307 308#endif // LLDB_CORE_MAPPEDHASH_H 309