177943Sdfr//===-- MappedHash.h --------------------------------------------*- C++ -*-===// 277943Sdfr// 377943Sdfr// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 477943Sdfr// See https://llvm.org/LICENSE.txt for license information. 577943Sdfr// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 677943Sdfr// 777943Sdfr//===----------------------------------------------------------------------===// 877943Sdfr 977943Sdfr#ifndef liblldb_MappedHash_h_ 1077943Sdfr#define liblldb_MappedHash_h_ 1177943Sdfr 1277943Sdfr#include <assert.h> 1377943Sdfr#include <stdint.h> 1477943Sdfr 1577943Sdfr#include <algorithm> 1677943Sdfr#include <functional> 1777943Sdfr#include <map> 1877943Sdfr#include <vector> 1977943Sdfr 2077943Sdfr#include "lldb/Utility/DataExtractor.h" 2177943Sdfr#include "lldb/Utility/Stream.h" 2277943Sdfr#include "llvm/Support/DJB.h" 2377943Sdfr 2477943Sdfrclass MappedHash { 2577943Sdfrpublic: 2677943Sdfr enum HashFunctionType { 27113038Sobrien eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used 28113038Sobrien // by the ELF GNU_HASH sections 2978327Sobrien }; 3077943Sdfr 31271762Semaste static uint32_t HashString(uint32_t hash_function, llvm::StringRef s) { 3277943Sdfr switch (hash_function) { 33107723Smarcel case MappedHash::eHashFunctionDJB: 3477943Sdfr return llvm::djbHash(s); 35107723Smarcel 36107723Smarcel default: 37107723Smarcel break; 38322930Simp } 39324557Simp llvm_unreachable("Invalid hash function index"); 40322930Simp } 41322930Simp 42322930Simp static const uint32_t HASH_MAGIC = 0x48415348u; 43322930Simp static const uint32_t HASH_CIGAM = 0x48534148u; 44322930Simp 45322930Simp template <typename T> struct Header { 46324557Simp typedef T HeaderData; 47324557Simp 48324557Simp uint32_t 49324557Simp magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection 50324557Simp uint16_t version; // Version number 51324557Simp uint16_t hash_function; // The hash function enumeration that was used 52324557Simp uint32_t bucket_count; // The number of buckets in this hash table 53107723Smarcel uint32_t hashes_count; // The total number of unique hash values and hash 54107723Smarcel // data offsets in this table 55107723Smarcel uint32_t header_data_len; // The size in bytes of the "header_data" template 56107723Smarcel // member below 57294769Simp HeaderData header_data; // 58107723Smarcel 59107723Smarcel Header() 60107723Smarcel : magic(HASH_MAGIC), version(1), hash_function(eHashFunctionDJB), 61107723Smarcel bucket_count(0), hashes_count(0), header_data_len(sizeof(T)), 62107723Smarcel header_data() {} 63107723Smarcel 64107723Smarcel virtual ~Header() = default; 65107723Smarcel 66294769Simp size_t GetByteSize() const { 67107723Smarcel return sizeof(magic) + sizeof(version) + sizeof(hash_function) + 68107723Smarcel sizeof(bucket_count) + sizeof(hashes_count) + 69107723Smarcel sizeof(header_data_len) + header_data_len; 70107723Smarcel } 71322931Simp 72107723Smarcel virtual size_t GetByteSize(const HeaderData &header_data) = 0; 7377943Sdfr 74107723Smarcel void SetHeaderDataByteSize(uint32_t header_data_byte_size) { 75271762Semaste header_data_len = header_data_byte_size; 76271762Semaste } 77271762Semaste 78107723Smarcel void Dump(lldb_private::Stream &s) { 79107723Smarcel s.Printf("header.magic = 0x%8.8x\n", magic); 80107723Smarcel s.Printf("header.version = 0x%4.4x\n", version); 81107723Smarcel s.Printf("header.hash_function = 0x%4.4x\n", hash_function); 82107723Smarcel s.Printf("header.bucket_count = 0x%8.8x %u\n", bucket_count, 8377943Sdfr bucket_count); 8477943Sdfr s.Printf("header.hashes_count = 0x%8.8x %u\n", hashes_count, 8577943Sdfr hashes_count); 8677943Sdfr s.Printf("header.header_data_len = 0x%8.8x %u\n", header_data_len, 87107723Smarcel header_data_len); 88271762Semaste } 89271762Semaste 90271762Semaste virtual lldb::offset_t Read(lldb_private::DataExtractor &data, 91271762Semaste lldb::offset_t offset) { 92271762Semaste if (data.ValidOffsetForDataOfSize( 93271762Semaste offset, sizeof(magic) + sizeof(version) + sizeof(hash_function) + 94298230Sallanjude sizeof(bucket_count) + sizeof(hashes_count) + 95107723Smarcel sizeof(header_data_len))) { 96107723Smarcel magic = data.GetU32(&offset); 97346482Skevans if (magic != HASH_MAGIC) { 98346482Skevans if (magic == HASH_CIGAM) { 99107723Smarcel switch (data.GetByteOrder()) { 100346482Skevans case lldb::eByteOrderBig: 101107723Smarcel data.SetByteOrder(lldb::eByteOrderLittle); 102163929Smarcel break; 103107723Smarcel case lldb::eByteOrderLittle: 104324557Simp data.SetByteOrder(lldb::eByteOrderBig); 105107723Smarcel break; 106107723Smarcel default: 107107723Smarcel return LLDB_INVALID_OFFSET; 108324557Simp } 109107723Smarcel } else { 110107723Smarcel // Magic bytes didn't match 111107723Smarcel version = 0; 112107723Smarcel return LLDB_INVALID_OFFSET; 113107723Smarcel } 114107723Smarcel } 115107723Smarcel 116107723Smarcel version = data.GetU16(&offset); 117107723Smarcel if (version != 1) { 118107723Smarcel // Unsupported version 119107723Smarcel return LLDB_INVALID_OFFSET; 120107723Smarcel } 121107723Smarcel hash_function = data.GetU16(&offset); 122111692Smarcel if (hash_function == 4) 123107723Smarcel hash_function = 0; // Deal with pre-release version of this table... 124107723Smarcel bucket_count = data.GetU32(&offset); 125322930Simp hashes_count = data.GetU32(&offset); 126107723Smarcel header_data_len = data.GetU32(&offset); 127107723Smarcel return offset; 128107723Smarcel } 129107723Smarcel return LLDB_INVALID_OFFSET; 130107723Smarcel } 131107723Smarcel // 132107723Smarcel // // Returns a buffer that contains a serialized version of this 133107723Smarcel // table 134107723Smarcel // // that must be freed with free(). 135107723Smarcel // virtual void * 136107723Smarcel // Write (int fd); 137107723Smarcel }; 138107723Smarcel 139243978Srpaulo // A class for reading and using a saved hash table from a block of data 140107723Smarcel // in memory 141107723Smarcel template <typename __KeyType, class __HeaderType, class __HashData> 142107723Smarcel class MemoryTable { 143107723Smarcel public: 144107723Smarcel typedef __HeaderType HeaderType; 145107723Smarcel typedef __KeyType KeyType; 146107723Smarcel typedef __HashData HashData; 147107723Smarcel 148107723Smarcel enum Result { 149107723Smarcel eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was 150107723Smarcel // filled in successfully 151107723Smarcel eResultKeyMismatch = 152107723Smarcel 1u, // Bucket hash data collision, but key didn't match 153107723Smarcel eResultEndOfHashData = 2u, // The chain of items for this hash data in 154107723Smarcel // this bucket is terminated, search no more 155107723Smarcel eResultError = 3u // Status parsing the hash data, abort 156107723Smarcel }; 157107723Smarcel 158107723Smarcel struct Pair { 159107723Smarcel KeyType key; 160107723Smarcel HashData value; 161107723Smarcel }; 162107723Smarcel 163107723Smarcel MemoryTable(lldb_private::DataExtractor &data) 164107723Smarcel : m_header(), m_hash_indexes(nullptr), m_hash_values(nullptr), 165107723Smarcel m_hash_offsets(nullptr) { 166107723Smarcel lldb::offset_t offset = m_header.Read(data, 0); 167107723Smarcel if (offset != LLDB_INVALID_OFFSET && IsValid()) { 168107723Smarcel m_hash_indexes = (const uint32_t *)data.GetData( 169107723Smarcel &offset, m_header.bucket_count * sizeof(uint32_t)); 170107723Smarcel m_hash_values = (const uint32_t *)data.GetData( 171107723Smarcel &offset, m_header.hashes_count * sizeof(uint32_t)); 172107723Smarcel m_hash_offsets = (const uint32_t *)data.GetData( 173293796Ssmh &offset, m_header.hashes_count * sizeof(uint32_t)); 174107723Smarcel } 175107723Smarcel } 176107723Smarcel 177107723Smarcel virtual ~MemoryTable() = default; 178107723Smarcel 179107723Smarcel bool IsValid() const { 180107723Smarcel return m_header.version == 1 && 181107723Smarcel m_header.hash_function == eHashFunctionDJB && 182107723Smarcel m_header.bucket_count > 0; 183107723Smarcel } 184107723Smarcel 185107723Smarcel uint32_t GetHashIndex(uint32_t bucket_idx) const { 186107723Smarcel uint32_t result = UINT32_MAX; 187107723Smarcel if (m_hash_indexes && bucket_idx < m_header.bucket_count) 188324557Simp memcpy(&result, m_hash_indexes + bucket_idx, sizeof(uint32_t)); 189322931Simp return result; 19077943Sdfr } 191 192 uint32_t GetHashValue(uint32_t hash_idx) const { 193 uint32_t result = UINT32_MAX; 194 if (m_hash_values && hash_idx < m_header.hashes_count) 195 memcpy(&result, m_hash_values + hash_idx, sizeof(uint32_t)); 196 return result; 197 } 198 199 uint32_t GetHashDataOffset(uint32_t hash_idx) const { 200 uint32_t result = UINT32_MAX; 201 if (m_hash_offsets && hash_idx < m_header.hashes_count) 202 memcpy(&result, m_hash_offsets + hash_idx, sizeof(uint32_t)); 203 return result; 204 } 205 206 bool Find(llvm::StringRef name, Pair &pair) const { 207 if (name.empty()) 208 return false; 209 210 if (IsValid()) { 211 const uint32_t bucket_count = m_header.bucket_count; 212 const uint32_t hash_count = m_header.hashes_count; 213 const uint32_t hash_value = 214 MappedHash::HashString(m_header.hash_function, name); 215 const uint32_t bucket_idx = hash_value % bucket_count; 216 uint32_t hash_idx = GetHashIndex(bucket_idx); 217 if (hash_idx < hash_count) { 218 for (; hash_idx < hash_count; ++hash_idx) { 219 const uint32_t curr_hash_value = GetHashValue(hash_idx); 220 if (curr_hash_value == hash_value) { 221 lldb::offset_t hash_data_offset = GetHashDataOffset(hash_idx); 222 while (hash_data_offset != UINT32_MAX) { 223 const lldb::offset_t prev_hash_data_offset = hash_data_offset; 224 Result hash_result = 225 GetHashDataForName(name, &hash_data_offset, pair); 226 // Check the result of getting our hash data 227 switch (hash_result) { 228 case eResultKeyMatch: 229 return true; 230 231 case eResultKeyMismatch: 232 if (prev_hash_data_offset == hash_data_offset) 233 return false; 234 break; 235 236 case eResultEndOfHashData: 237 // The last HashData for this key has been reached, stop 238 // searching 239 return false; 240 case eResultError: 241 // Status parsing the hash data, abort 242 return false; 243 } 244 } 245 } 246 if ((curr_hash_value % bucket_count) != bucket_idx) 247 break; 248 } 249 } 250 } 251 return false; 252 } 253 254 // This method must be implemented in any subclasses. The KeyType is user 255 // specified and must somehow result in a string value. For example, the 256 // KeyType might be a string offset in a string table and subclasses can 257 // store their string table as a member of the subclass and return a valie 258 // "const char *" given a "key". The value could also be a C string 259 // pointer, in which case just returning "key" will suffice. 260 virtual const char *GetStringForKeyType(KeyType key) const = 0; 261 262 virtual bool ReadHashData(uint32_t hash_data_offset, 263 HashData &hash_data) const = 0; 264 265 // This method must be implemented in any subclasses and it must try to 266 // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr" 267 // parameter. This offset should be updated as bytes are consumed and a 268 // value "Result" enum should be returned. If the "name" matches the full 269 // name for the "pair.key" (which must be filled in by this call), then the 270 // HashData in the pair ("pair.value") should be extracted and filled in 271 // and "eResultKeyMatch" should be returned. If "name" doesn't match this 272 // string for the key, then "eResultKeyMismatch" should be returned and all 273 // data for the current HashData must be consumed or skipped and the 274 // "hash_data_offset_ptr" offset needs to be updated to point to the next 275 // HashData. If the end of the HashData objects for a given hash value have 276 // been reached, then "eResultEndOfHashData" should be returned. If 277 // anything else goes wrong during parsing, return "eResultError" and the 278 // corresponding "Find()" function will be canceled and return false. 279 virtual Result GetHashDataForName(llvm::StringRef name, 280 lldb::offset_t *hash_data_offset_ptr, 281 Pair &pair) const = 0; 282 283 const HeaderType &GetHeader() { return m_header; } 284 285 void ForEach( 286 std::function<bool(const HashData &hash_data)> const &callback) const { 287 const size_t num_hash_offsets = m_header.hashes_count; 288 for (size_t i = 0; i < num_hash_offsets; ++i) { 289 uint32_t hash_data_offset = GetHashDataOffset(i); 290 if (hash_data_offset != UINT32_MAX) { 291 HashData hash_data; 292 if (ReadHashData(hash_data_offset, hash_data)) { 293 // If the callback returns false, then we are done and should stop 294 if (callback(hash_data) == false) 295 return; 296 } 297 } 298 } 299 } 300 301 protected: 302 // Implementation agnostic information 303 HeaderType m_header; 304 const uint32_t *m_hash_indexes; 305 const uint32_t *m_hash_values; 306 const uint32_t *m_hash_offsets; 307 }; 308}; 309 310#endif // liblldb_MappedHash_h_ 311