StringMap.h revision 193323
1193323Sed//===--- StringMap.h - String Hash table map interface ----------*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file defines the StringMap class. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#ifndef LLVM_ADT_STRINGMAP_H 15193323Sed#define LLVM_ADT_STRINGMAP_H 16193323Sed 17193323Sed#include "llvm/Support/Allocator.h" 18193323Sed#include <cstring> 19193323Sed#include <string> 20193323Sed 21193323Sednamespace llvm { 22193323Sed template<typename ValueT> 23193323Sed class StringMapConstIterator; 24193323Sed template<typename ValueT> 25193323Sed class StringMapIterator; 26193323Sed template<typename ValueTy> 27193323Sed class StringMapEntry; 28193323Sed 29193323Sed/// StringMapEntryInitializer - This datatype can be partially specialized for 30193323Sed/// various datatypes in a stringmap to allow them to be initialized when an 31193323Sed/// entry is default constructed for the map. 32193323Sedtemplate<typename ValueTy> 33193323Sedclass StringMapEntryInitializer { 34193323Sedpublic: 35193323Sed template <typename InitTy> 36193323Sed static void Initialize(StringMapEntry<ValueTy> &T, InitTy InitVal) { 37193323Sed T.second = InitVal; 38193323Sed } 39193323Sed}; 40193323Sed 41193323Sed 42193323Sed/// StringMapEntryBase - Shared base class of StringMapEntry instances. 43193323Sedclass StringMapEntryBase { 44193323Sed unsigned StrLen; 45193323Sedpublic: 46193323Sed explicit StringMapEntryBase(unsigned Len) : StrLen(Len) {} 47193323Sed 48193323Sed unsigned getKeyLength() const { return StrLen; } 49193323Sed}; 50193323Sed 51193323Sed/// StringMapImpl - This is the base class of StringMap that is shared among 52193323Sed/// all of its instantiations. 53193323Sedclass StringMapImpl { 54193323Sedpublic: 55193323Sed /// ItemBucket - The hash table consists of an array of these. If Item is 56193323Sed /// non-null, this is an extant entry, otherwise, it is a hole. 57193323Sed struct ItemBucket { 58193323Sed /// FullHashValue - This remembers the full hash value of the key for 59193323Sed /// easy scanning. 60193323Sed unsigned FullHashValue; 61193323Sed 62193323Sed /// Item - This is a pointer to the actual item object. 63193323Sed StringMapEntryBase *Item; 64193323Sed }; 65193323Sed 66193323Sedprotected: 67193323Sed ItemBucket *TheTable; 68193323Sed unsigned NumBuckets; 69193323Sed unsigned NumItems; 70193323Sed unsigned NumTombstones; 71193323Sed unsigned ItemSize; 72193323Sedprotected: 73193323Sed explicit StringMapImpl(unsigned itemSize) : ItemSize(itemSize) { 74193323Sed // Initialize the map with zero buckets to allocation. 75193323Sed TheTable = 0; 76193323Sed NumBuckets = 0; 77193323Sed NumItems = 0; 78193323Sed NumTombstones = 0; 79193323Sed } 80193323Sed StringMapImpl(unsigned InitSize, unsigned ItemSize); 81193323Sed void RehashTable(); 82193323Sed 83193323Sed /// ShouldRehash - Return true if the table should be rehashed after a new 84193323Sed /// element was recently inserted. 85193323Sed bool ShouldRehash() const { 86193323Sed // If the hash table is now more than 3/4 full, or if fewer than 1/8 of 87193323Sed // the buckets are empty (meaning that many are filled with tombstones), 88193323Sed // grow the table. 89193323Sed return NumItems*4 > NumBuckets*3 || 90193323Sed NumBuckets-(NumItems+NumTombstones) < NumBuckets/8; 91193323Sed } 92193323Sed 93193323Sed /// LookupBucketFor - Look up the bucket that the specified string should end 94193323Sed /// up in. If it already exists as a key in the map, the Item pointer for the 95193323Sed /// specified bucket will be non-null. Otherwise, it will be null. In either 96193323Sed /// case, the FullHashValue field of the bucket will be set to the hash value 97193323Sed /// of the string. 98193323Sed unsigned LookupBucketFor(const char *KeyStart, const char *KeyEnd); 99193323Sed 100193323Sed /// FindKey - Look up the bucket that contains the specified key. If it exists 101193323Sed /// in the map, return the bucket number of the key. Otherwise return -1. 102193323Sed /// This does not modify the map. 103193323Sed int FindKey(const char *KeyStart, const char *KeyEnd) const; 104193323Sed 105193323Sed /// RemoveKey - Remove the specified StringMapEntry from the table, but do not 106193323Sed /// delete it. This aborts if the value isn't in the table. 107193323Sed void RemoveKey(StringMapEntryBase *V); 108193323Sed 109193323Sed /// RemoveKey - Remove the StringMapEntry for the specified key from the 110193323Sed /// table, returning it. If the key is not in the table, this returns null. 111193323Sed StringMapEntryBase *RemoveKey(const char *KeyStart, const char *KeyEnd); 112193323Sedprivate: 113193323Sed void init(unsigned Size); 114193323Sedpublic: 115193323Sed static StringMapEntryBase *getTombstoneVal() { 116193323Sed return (StringMapEntryBase*)-1; 117193323Sed } 118193323Sed 119193323Sed unsigned getNumBuckets() const { return NumBuckets; } 120193323Sed unsigned getNumItems() const { return NumItems; } 121193323Sed 122193323Sed bool empty() const { return NumItems == 0; } 123193323Sed unsigned size() const { return NumItems; } 124193323Sed}; 125193323Sed 126193323Sed/// StringMapEntry - This is used to represent one value that is inserted into 127193323Sed/// a StringMap. It contains the Value itself and the key: the string length 128193323Sed/// and data. 129193323Sedtemplate<typename ValueTy> 130193323Sedclass StringMapEntry : public StringMapEntryBase { 131193323Sedpublic: 132193323Sed ValueTy second; 133193323Sed 134193323Sed explicit StringMapEntry(unsigned strLen) 135193323Sed : StringMapEntryBase(strLen), second() {} 136193323Sed StringMapEntry(unsigned strLen, const ValueTy &V) 137193323Sed : StringMapEntryBase(strLen), second(V) {} 138193323Sed 139193323Sed const ValueTy &getValue() const { return second; } 140193323Sed ValueTy &getValue() { return second; } 141193323Sed 142193323Sed void setValue(const ValueTy &V) { second = V; } 143193323Sed 144193323Sed /// getKeyData - Return the start of the string data that is the key for this 145193323Sed /// value. The string data is always stored immediately after the 146193323Sed /// StringMapEntry object. 147193323Sed const char *getKeyData() const {return reinterpret_cast<const char*>(this+1);} 148193323Sed 149193323Sed const char *first() const { return getKeyData(); } 150193323Sed 151193323Sed /// Create - Create a StringMapEntry for the specified key and default 152193323Sed /// construct the value. 153193323Sed template<typename AllocatorTy, typename InitType> 154193323Sed static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd, 155193323Sed AllocatorTy &Allocator, 156193323Sed InitType InitVal) { 157193323Sed unsigned KeyLength = static_cast<unsigned>(KeyEnd-KeyStart); 158193323Sed 159193323Sed // Okay, the item doesn't already exist, and 'Bucket' is the bucket to fill 160193323Sed // in. Allocate a new item with space for the string at the end and a null 161193323Sed // terminator. 162193323Sed 163193323Sed unsigned AllocSize = static_cast<unsigned>(sizeof(StringMapEntry))+ 164193323Sed KeyLength+1; 165193323Sed unsigned Alignment = alignof<StringMapEntry>(); 166193323Sed 167193323Sed StringMapEntry *NewItem = 168193323Sed static_cast<StringMapEntry*>(Allocator.Allocate(AllocSize,Alignment)); 169193323Sed 170193323Sed // Default construct the value. 171193323Sed new (NewItem) StringMapEntry(KeyLength); 172193323Sed 173193323Sed // Copy the string information. 174193323Sed char *StrBuffer = const_cast<char*>(NewItem->getKeyData()); 175193323Sed memcpy(StrBuffer, KeyStart, KeyLength); 176193323Sed StrBuffer[KeyLength] = 0; // Null terminate for convenience of clients. 177193323Sed 178193323Sed // Initialize the value if the client wants to. 179193323Sed StringMapEntryInitializer<ValueTy>::Initialize(*NewItem, InitVal); 180193323Sed return NewItem; 181193323Sed } 182193323Sed 183193323Sed template<typename AllocatorTy> 184193323Sed static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd, 185193323Sed AllocatorTy &Allocator) { 186193323Sed return Create(KeyStart, KeyEnd, Allocator, 0); 187193323Sed } 188193323Sed 189193323Sed 190193323Sed /// Create - Create a StringMapEntry with normal malloc/free. 191193323Sed template<typename InitType> 192193323Sed static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd, 193193323Sed InitType InitVal) { 194193323Sed MallocAllocator A; 195193323Sed return Create(KeyStart, KeyEnd, A, InitVal); 196193323Sed } 197193323Sed 198193323Sed static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd) { 199193323Sed return Create(KeyStart, KeyEnd, ValueTy()); 200193323Sed } 201193323Sed 202193323Sed /// GetStringMapEntryFromValue - Given a value that is known to be embedded 203193323Sed /// into a StringMapEntry, return the StringMapEntry itself. 204193323Sed static StringMapEntry &GetStringMapEntryFromValue(ValueTy &V) { 205193323Sed StringMapEntry *EPtr = 0; 206193323Sed char *Ptr = reinterpret_cast<char*>(&V) - 207193323Sed (reinterpret_cast<char*>(&EPtr->second) - 208193323Sed reinterpret_cast<char*>(EPtr)); 209193323Sed return *reinterpret_cast<StringMapEntry*>(Ptr); 210193323Sed } 211193323Sed static const StringMapEntry &GetStringMapEntryFromValue(const ValueTy &V) { 212193323Sed return GetStringMapEntryFromValue(const_cast<ValueTy&>(V)); 213193323Sed } 214193323Sed 215193323Sed /// Destroy - Destroy this StringMapEntry, releasing memory back to the 216193323Sed /// specified allocator. 217193323Sed template<typename AllocatorTy> 218193323Sed void Destroy(AllocatorTy &Allocator) { 219193323Sed // Free memory referenced by the item. 220193323Sed this->~StringMapEntry(); 221193323Sed Allocator.Deallocate(this); 222193323Sed } 223193323Sed 224193323Sed /// Destroy this object, releasing memory back to the malloc allocator. 225193323Sed void Destroy() { 226193323Sed MallocAllocator A; 227193323Sed Destroy(A); 228193323Sed } 229193323Sed}; 230193323Sed 231193323Sed 232193323Sed/// StringMap - This is an unconventional map that is specialized for handling 233193323Sed/// keys that are "strings", which are basically ranges of bytes. This does some 234193323Sed/// funky memory allocation and hashing things to make it extremely efficient, 235193323Sed/// storing the string data *after* the value in the map. 236193323Sedtemplate<typename ValueTy, typename AllocatorTy = MallocAllocator> 237193323Sedclass StringMap : public StringMapImpl { 238193323Sed AllocatorTy Allocator; 239193323Sed typedef StringMapEntry<ValueTy> MapEntryTy; 240193323Sedpublic: 241193323Sed StringMap() : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {} 242193323Sed explicit StringMap(unsigned InitialSize) 243193323Sed : StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))) {} 244193323Sed explicit StringMap(const StringMap &RHS) 245193323Sed : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) { 246193323Sed assert(RHS.empty() && 247193323Sed "Copy ctor from non-empty stringmap not implemented yet!"); 248193323Sed } 249193323Sed void operator=(const StringMap &RHS) { 250193323Sed assert(RHS.empty() && 251193323Sed "assignment from non-empty stringmap not implemented yet!"); 252193323Sed clear(); 253193323Sed } 254193323Sed 255193323Sed 256193323Sed AllocatorTy &getAllocator() { return Allocator; } 257193323Sed const AllocatorTy &getAllocator() const { return Allocator; } 258193323Sed 259193323Sed typedef const char* key_type; 260193323Sed typedef ValueTy mapped_type; 261193323Sed typedef StringMapEntry<ValueTy> value_type; 262193323Sed typedef size_t size_type; 263193323Sed 264193323Sed typedef StringMapConstIterator<ValueTy> const_iterator; 265193323Sed typedef StringMapIterator<ValueTy> iterator; 266193323Sed 267193323Sed iterator begin() { 268193323Sed return iterator(TheTable, NumBuckets == 0); 269193323Sed } 270193323Sed iterator end() { 271193323Sed return iterator(TheTable+NumBuckets, true); 272193323Sed } 273193323Sed const_iterator begin() const { 274193323Sed return const_iterator(TheTable, NumBuckets == 0); 275193323Sed } 276193323Sed const_iterator end() const { 277193323Sed return const_iterator(TheTable+NumBuckets, true); 278193323Sed } 279193323Sed 280193323Sed iterator find(const char *KeyStart, const char *KeyEnd) { 281193323Sed int Bucket = FindKey(KeyStart, KeyEnd); 282193323Sed if (Bucket == -1) return end(); 283193323Sed return iterator(TheTable+Bucket); 284193323Sed } 285193323Sed iterator find(const char *Key) { 286193323Sed return find(Key, Key + strlen(Key)); 287193323Sed } 288193323Sed iterator find(const std::string &Key) { 289193323Sed return find(Key.data(), Key.data() + Key.size()); 290193323Sed } 291193323Sed 292193323Sed const_iterator find(const char *KeyStart, const char *KeyEnd) const { 293193323Sed int Bucket = FindKey(KeyStart, KeyEnd); 294193323Sed if (Bucket == -1) return end(); 295193323Sed return const_iterator(TheTable+Bucket); 296193323Sed } 297193323Sed const_iterator find(const char *Key) const { 298193323Sed return find(Key, Key + strlen(Key)); 299193323Sed } 300193323Sed const_iterator find(const std::string &Key) const { 301193323Sed return find(Key.data(), Key.data() + Key.size()); 302193323Sed } 303193323Sed 304193323Sed /// lookup - Return the entry for the specified key, or a default 305193323Sed /// constructed value if no such entry exists. 306193323Sed ValueTy lookup(const char *KeyStart, const char *KeyEnd) const { 307193323Sed const_iterator it = find(KeyStart, KeyEnd); 308193323Sed if (it != end()) 309193323Sed return it->second; 310193323Sed return ValueTy(); 311193323Sed } 312193323Sed ValueTy lookup(const char *Key) const { 313193323Sed const_iterator it = find(Key); 314193323Sed if (it != end()) 315193323Sed return it->second; 316193323Sed return ValueTy(); 317193323Sed } 318193323Sed ValueTy lookup(const std::string &Key) const { 319193323Sed const_iterator it = find(Key); 320193323Sed if (it != end()) 321193323Sed return it->second; 322193323Sed return ValueTy(); 323193323Sed } 324193323Sed 325193323Sed ValueTy& operator[](const char *Key) { 326193323Sed return GetOrCreateValue(Key, Key + strlen(Key)).getValue(); 327193323Sed } 328193323Sed ValueTy& operator[](const std::string &Key) { 329193323Sed return GetOrCreateValue(Key.data(), Key.data() + Key.size()).getValue(); 330193323Sed } 331193323Sed 332193323Sed size_type count(const char *KeyStart, const char *KeyEnd) const { 333193323Sed return find(KeyStart, KeyEnd) == end() ? 0 : 1; 334193323Sed } 335193323Sed size_type count(const char *Key) const { 336193323Sed return count(Key, Key + strlen(Key)); 337193323Sed } 338193323Sed size_type count(const std::string &Key) const { 339193323Sed return count(Key.data(), Key.data() + Key.size()); 340193323Sed } 341193323Sed 342193323Sed /// insert - Insert the specified key/value pair into the map. If the key 343193323Sed /// already exists in the map, return false and ignore the request, otherwise 344193323Sed /// insert it and return true. 345193323Sed bool insert(MapEntryTy *KeyValue) { 346193323Sed unsigned BucketNo = 347193323Sed LookupBucketFor(KeyValue->getKeyData(), 348193323Sed KeyValue->getKeyData()+KeyValue->getKeyLength()); 349193323Sed ItemBucket &Bucket = TheTable[BucketNo]; 350193323Sed if (Bucket.Item && Bucket.Item != getTombstoneVal()) 351193323Sed return false; // Already exists in map. 352193323Sed 353193323Sed if (Bucket.Item == getTombstoneVal()) 354193323Sed --NumTombstones; 355193323Sed Bucket.Item = KeyValue; 356193323Sed ++NumItems; 357193323Sed 358193323Sed if (ShouldRehash()) 359193323Sed RehashTable(); 360193323Sed return true; 361193323Sed } 362193323Sed 363193323Sed // clear - Empties out the StringMap 364193323Sed void clear() { 365193323Sed if (empty()) return; 366193323Sed 367193323Sed // Zap all values, resetting the keys back to non-present (not tombstone), 368193323Sed // which is safe because we're removing all elements. 369193323Sed for (ItemBucket *I = TheTable, *E = TheTable+NumBuckets; I != E; ++I) { 370193323Sed if (I->Item && I->Item != getTombstoneVal()) { 371193323Sed static_cast<MapEntryTy*>(I->Item)->Destroy(Allocator); 372193323Sed I->Item = 0; 373193323Sed } 374193323Sed } 375193323Sed 376193323Sed NumItems = 0; 377193323Sed } 378193323Sed 379193323Sed /// GetOrCreateValue - Look up the specified key in the table. If a value 380193323Sed /// exists, return it. Otherwise, default construct a value, insert it, and 381193323Sed /// return. 382193323Sed template <typename InitTy> 383193323Sed StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart, 384193323Sed const char *KeyEnd, 385193323Sed InitTy Val) { 386193323Sed unsigned BucketNo = LookupBucketFor(KeyStart, KeyEnd); 387193323Sed ItemBucket &Bucket = TheTable[BucketNo]; 388193323Sed if (Bucket.Item && Bucket.Item != getTombstoneVal()) 389193323Sed return *static_cast<MapEntryTy*>(Bucket.Item); 390193323Sed 391193323Sed MapEntryTy *NewItem = MapEntryTy::Create(KeyStart, KeyEnd, Allocator, Val); 392193323Sed 393193323Sed if (Bucket.Item == getTombstoneVal()) 394193323Sed --NumTombstones; 395193323Sed ++NumItems; 396193323Sed 397193323Sed // Fill in the bucket for the hash table. The FullHashValue was already 398193323Sed // filled in by LookupBucketFor. 399193323Sed Bucket.Item = NewItem; 400193323Sed 401193323Sed if (ShouldRehash()) 402193323Sed RehashTable(); 403193323Sed return *NewItem; 404193323Sed } 405193323Sed 406193323Sed StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart, 407193323Sed const char *KeyEnd) { 408193323Sed return GetOrCreateValue(KeyStart, KeyEnd, ValueTy()); 409193323Sed } 410193323Sed 411193323Sed /// remove - Remove the specified key/value pair from the map, but do not 412193323Sed /// erase it. This aborts if the key is not in the map. 413193323Sed void remove(MapEntryTy *KeyValue) { 414193323Sed RemoveKey(KeyValue); 415193323Sed } 416193323Sed 417193323Sed void erase(iterator I) { 418193323Sed MapEntryTy &V = *I; 419193323Sed remove(&V); 420193323Sed V.Destroy(Allocator); 421193323Sed } 422193323Sed 423193323Sed bool erase(const char *Key) { 424193323Sed iterator I = find(Key); 425193323Sed if (I == end()) return false; 426193323Sed erase(I); 427193323Sed return true; 428193323Sed } 429193323Sed 430193323Sed bool erase(const std::string &Key) { 431193323Sed iterator I = find(Key); 432193323Sed if (I == end()) return false; 433193323Sed erase(I); 434193323Sed return true; 435193323Sed } 436193323Sed 437193323Sed ~StringMap() { 438193323Sed clear(); 439193323Sed free(TheTable); 440193323Sed } 441193323Sed}; 442193323Sed 443193323Sed 444193323Sedtemplate<typename ValueTy> 445193323Sedclass StringMapConstIterator { 446193323Sedprotected: 447193323Sed StringMapImpl::ItemBucket *Ptr; 448193323Sedpublic: 449193323Sed typedef StringMapEntry<ValueTy> value_type; 450193323Sed 451193323Sed explicit StringMapConstIterator(StringMapImpl::ItemBucket *Bucket, 452193323Sed bool NoAdvance = false) 453193323Sed : Ptr(Bucket) { 454193323Sed if (!NoAdvance) AdvancePastEmptyBuckets(); 455193323Sed } 456193323Sed 457193323Sed const value_type &operator*() const { 458193323Sed return *static_cast<StringMapEntry<ValueTy>*>(Ptr->Item); 459193323Sed } 460193323Sed const value_type *operator->() const { 461193323Sed return static_cast<StringMapEntry<ValueTy>*>(Ptr->Item); 462193323Sed } 463193323Sed 464193323Sed bool operator==(const StringMapConstIterator &RHS) const { 465193323Sed return Ptr == RHS.Ptr; 466193323Sed } 467193323Sed bool operator!=(const StringMapConstIterator &RHS) const { 468193323Sed return Ptr != RHS.Ptr; 469193323Sed } 470193323Sed 471193323Sed inline StringMapConstIterator& operator++() { // Preincrement 472193323Sed ++Ptr; 473193323Sed AdvancePastEmptyBuckets(); 474193323Sed return *this; 475193323Sed } 476193323Sed StringMapConstIterator operator++(int) { // Postincrement 477193323Sed StringMapConstIterator tmp = *this; ++*this; return tmp; 478193323Sed } 479193323Sed 480193323Sedprivate: 481193323Sed void AdvancePastEmptyBuckets() { 482193323Sed while (Ptr->Item == 0 || Ptr->Item == StringMapImpl::getTombstoneVal()) 483193323Sed ++Ptr; 484193323Sed } 485193323Sed}; 486193323Sed 487193323Sedtemplate<typename ValueTy> 488193323Sedclass StringMapIterator : public StringMapConstIterator<ValueTy> { 489193323Sedpublic: 490193323Sed explicit StringMapIterator(StringMapImpl::ItemBucket *Bucket, 491193323Sed bool NoAdvance = false) 492193323Sed : StringMapConstIterator<ValueTy>(Bucket, NoAdvance) { 493193323Sed } 494193323Sed StringMapEntry<ValueTy> &operator*() const { 495193323Sed return *static_cast<StringMapEntry<ValueTy>*>(this->Ptr->Item); 496193323Sed } 497193323Sed StringMapEntry<ValueTy> *operator->() const { 498193323Sed return static_cast<StringMapEntry<ValueTy>*>(this->Ptr->Item); 499193323Sed } 500193323Sed}; 501193323Sed 502193323Sed} 503193323Sed 504193323Sed#endif 505