StringMap.h revision 193323
1193323Sed//===--- StringMap.h - String Hash table map interface ----------*- C++ -*-===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file defines the StringMap class.
11193323Sed//
12193323Sed//===----------------------------------------------------------------------===//
13193323Sed
14193323Sed#ifndef LLVM_ADT_STRINGMAP_H
15193323Sed#define LLVM_ADT_STRINGMAP_H
16193323Sed
17193323Sed#include "llvm/Support/Allocator.h"
18193323Sed#include <cstring>
19193323Sed#include <string>
20193323Sed
21193323Sednamespace llvm {
22193323Sed  template<typename ValueT>
23193323Sed  class StringMapConstIterator;
24193323Sed  template<typename ValueT>
25193323Sed  class StringMapIterator;
26193323Sed  template<typename ValueTy>
27193323Sed  class StringMapEntry;
28193323Sed
29193323Sed/// StringMapEntryInitializer - This datatype can be partially specialized for
30193323Sed/// various datatypes in a stringmap to allow them to be initialized when an
31193323Sed/// entry is default constructed for the map.
32193323Sedtemplate<typename ValueTy>
33193323Sedclass StringMapEntryInitializer {
34193323Sedpublic:
35193323Sed  template <typename InitTy>
36193323Sed  static void Initialize(StringMapEntry<ValueTy> &T, InitTy InitVal) {
37193323Sed    T.second = InitVal;
38193323Sed  }
39193323Sed};
40193323Sed
41193323Sed
42193323Sed/// StringMapEntryBase - Shared base class of StringMapEntry instances.
43193323Sedclass StringMapEntryBase {
44193323Sed  unsigned StrLen;
45193323Sedpublic:
46193323Sed  explicit StringMapEntryBase(unsigned Len) : StrLen(Len) {}
47193323Sed
48193323Sed  unsigned getKeyLength() const { return StrLen; }
49193323Sed};
50193323Sed
51193323Sed/// StringMapImpl - This is the base class of StringMap that is shared among
52193323Sed/// all of its instantiations.
53193323Sedclass StringMapImpl {
54193323Sedpublic:
55193323Sed  /// ItemBucket - The hash table consists of an array of these.  If Item is
56193323Sed  /// non-null, this is an extant entry, otherwise, it is a hole.
57193323Sed  struct ItemBucket {
58193323Sed    /// FullHashValue - This remembers the full hash value of the key for
59193323Sed    /// easy scanning.
60193323Sed    unsigned FullHashValue;
61193323Sed
62193323Sed    /// Item - This is a pointer to the actual item object.
63193323Sed    StringMapEntryBase *Item;
64193323Sed  };
65193323Sed
66193323Sedprotected:
67193323Sed  ItemBucket *TheTable;
68193323Sed  unsigned NumBuckets;
69193323Sed  unsigned NumItems;
70193323Sed  unsigned NumTombstones;
71193323Sed  unsigned ItemSize;
72193323Sedprotected:
73193323Sed  explicit StringMapImpl(unsigned itemSize) : ItemSize(itemSize) {
74193323Sed    // Initialize the map with zero buckets to allocation.
75193323Sed    TheTable = 0;
76193323Sed    NumBuckets = 0;
77193323Sed    NumItems = 0;
78193323Sed    NumTombstones = 0;
79193323Sed  }
80193323Sed  StringMapImpl(unsigned InitSize, unsigned ItemSize);
81193323Sed  void RehashTable();
82193323Sed
83193323Sed  /// ShouldRehash - Return true if the table should be rehashed after a new
84193323Sed  /// element was recently inserted.
85193323Sed  bool ShouldRehash() const {
86193323Sed    // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
87193323Sed    // the buckets are empty (meaning that many are filled with tombstones),
88193323Sed    // grow the table.
89193323Sed    return NumItems*4 > NumBuckets*3 ||
90193323Sed           NumBuckets-(NumItems+NumTombstones) < NumBuckets/8;
91193323Sed  }
92193323Sed
93193323Sed  /// LookupBucketFor - Look up the bucket that the specified string should end
94193323Sed  /// up in.  If it already exists as a key in the map, the Item pointer for the
95193323Sed  /// specified bucket will be non-null.  Otherwise, it will be null.  In either
96193323Sed  /// case, the FullHashValue field of the bucket will be set to the hash value
97193323Sed  /// of the string.
98193323Sed  unsigned LookupBucketFor(const char *KeyStart, const char *KeyEnd);
99193323Sed
100193323Sed  /// FindKey - Look up the bucket that contains the specified key. If it exists
101193323Sed  /// in the map, return the bucket number of the key.  Otherwise return -1.
102193323Sed  /// This does not modify the map.
103193323Sed  int FindKey(const char *KeyStart, const char *KeyEnd) const;
104193323Sed
105193323Sed  /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
106193323Sed  /// delete it.  This aborts if the value isn't in the table.
107193323Sed  void RemoveKey(StringMapEntryBase *V);
108193323Sed
109193323Sed  /// RemoveKey - Remove the StringMapEntry for the specified key from the
110193323Sed  /// table, returning it.  If the key is not in the table, this returns null.
111193323Sed  StringMapEntryBase *RemoveKey(const char *KeyStart, const char *KeyEnd);
112193323Sedprivate:
113193323Sed  void init(unsigned Size);
114193323Sedpublic:
115193323Sed  static StringMapEntryBase *getTombstoneVal() {
116193323Sed    return (StringMapEntryBase*)-1;
117193323Sed  }
118193323Sed
119193323Sed  unsigned getNumBuckets() const { return NumBuckets; }
120193323Sed  unsigned getNumItems() const { return NumItems; }
121193323Sed
122193323Sed  bool empty() const { return NumItems == 0; }
123193323Sed  unsigned size() const { return NumItems; }
124193323Sed};
125193323Sed
126193323Sed/// StringMapEntry - This is used to represent one value that is inserted into
127193323Sed/// a StringMap.  It contains the Value itself and the key: the string length
128193323Sed/// and data.
129193323Sedtemplate<typename ValueTy>
130193323Sedclass StringMapEntry : public StringMapEntryBase {
131193323Sedpublic:
132193323Sed  ValueTy second;
133193323Sed
134193323Sed  explicit StringMapEntry(unsigned strLen)
135193323Sed    : StringMapEntryBase(strLen), second() {}
136193323Sed  StringMapEntry(unsigned strLen, const ValueTy &V)
137193323Sed    : StringMapEntryBase(strLen), second(V) {}
138193323Sed
139193323Sed  const ValueTy &getValue() const { return second; }
140193323Sed  ValueTy &getValue() { return second; }
141193323Sed
142193323Sed  void setValue(const ValueTy &V) { second = V; }
143193323Sed
144193323Sed  /// getKeyData - Return the start of the string data that is the key for this
145193323Sed  /// value.  The string data is always stored immediately after the
146193323Sed  /// StringMapEntry object.
147193323Sed  const char *getKeyData() const {return reinterpret_cast<const char*>(this+1);}
148193323Sed
149193323Sed  const char *first() const { return getKeyData(); }
150193323Sed
151193323Sed  /// Create - Create a StringMapEntry for the specified key and default
152193323Sed  /// construct the value.
153193323Sed  template<typename AllocatorTy, typename InitType>
154193323Sed  static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd,
155193323Sed                                AllocatorTy &Allocator,
156193323Sed                                InitType InitVal) {
157193323Sed    unsigned KeyLength = static_cast<unsigned>(KeyEnd-KeyStart);
158193323Sed
159193323Sed    // Okay, the item doesn't already exist, and 'Bucket' is the bucket to fill
160193323Sed    // in.  Allocate a new item with space for the string at the end and a null
161193323Sed    // terminator.
162193323Sed
163193323Sed    unsigned AllocSize = static_cast<unsigned>(sizeof(StringMapEntry))+
164193323Sed      KeyLength+1;
165193323Sed    unsigned Alignment = alignof<StringMapEntry>();
166193323Sed
167193323Sed    StringMapEntry *NewItem =
168193323Sed      static_cast<StringMapEntry*>(Allocator.Allocate(AllocSize,Alignment));
169193323Sed
170193323Sed    // Default construct the value.
171193323Sed    new (NewItem) StringMapEntry(KeyLength);
172193323Sed
173193323Sed    // Copy the string information.
174193323Sed    char *StrBuffer = const_cast<char*>(NewItem->getKeyData());
175193323Sed    memcpy(StrBuffer, KeyStart, KeyLength);
176193323Sed    StrBuffer[KeyLength] = 0;  // Null terminate for convenience of clients.
177193323Sed
178193323Sed    // Initialize the value if the client wants to.
179193323Sed    StringMapEntryInitializer<ValueTy>::Initialize(*NewItem, InitVal);
180193323Sed    return NewItem;
181193323Sed  }
182193323Sed
183193323Sed  template<typename AllocatorTy>
184193323Sed  static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd,
185193323Sed                                AllocatorTy &Allocator) {
186193323Sed    return Create(KeyStart, KeyEnd, Allocator, 0);
187193323Sed  }
188193323Sed
189193323Sed
190193323Sed  /// Create - Create a StringMapEntry with normal malloc/free.
191193323Sed  template<typename InitType>
192193323Sed  static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd,
193193323Sed                                InitType InitVal) {
194193323Sed    MallocAllocator A;
195193323Sed    return Create(KeyStart, KeyEnd, A, InitVal);
196193323Sed  }
197193323Sed
198193323Sed  static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd) {
199193323Sed    return Create(KeyStart, KeyEnd, ValueTy());
200193323Sed  }
201193323Sed
202193323Sed  /// GetStringMapEntryFromValue - Given a value that is known to be embedded
203193323Sed  /// into a StringMapEntry, return the StringMapEntry itself.
204193323Sed  static StringMapEntry &GetStringMapEntryFromValue(ValueTy &V) {
205193323Sed    StringMapEntry *EPtr = 0;
206193323Sed    char *Ptr = reinterpret_cast<char*>(&V) -
207193323Sed                  (reinterpret_cast<char*>(&EPtr->second) -
208193323Sed                   reinterpret_cast<char*>(EPtr));
209193323Sed    return *reinterpret_cast<StringMapEntry*>(Ptr);
210193323Sed  }
211193323Sed  static const StringMapEntry &GetStringMapEntryFromValue(const ValueTy &V) {
212193323Sed    return GetStringMapEntryFromValue(const_cast<ValueTy&>(V));
213193323Sed  }
214193323Sed
215193323Sed  /// Destroy - Destroy this StringMapEntry, releasing memory back to the
216193323Sed  /// specified allocator.
217193323Sed  template<typename AllocatorTy>
218193323Sed  void Destroy(AllocatorTy &Allocator) {
219193323Sed    // Free memory referenced by the item.
220193323Sed    this->~StringMapEntry();
221193323Sed    Allocator.Deallocate(this);
222193323Sed  }
223193323Sed
224193323Sed  /// Destroy this object, releasing memory back to the malloc allocator.
225193323Sed  void Destroy() {
226193323Sed    MallocAllocator A;
227193323Sed    Destroy(A);
228193323Sed  }
229193323Sed};
230193323Sed
231193323Sed
232193323Sed/// StringMap - This is an unconventional map that is specialized for handling
233193323Sed/// keys that are "strings", which are basically ranges of bytes. This does some
234193323Sed/// funky memory allocation and hashing things to make it extremely efficient,
235193323Sed/// storing the string data *after* the value in the map.
236193323Sedtemplate<typename ValueTy, typename AllocatorTy = MallocAllocator>
237193323Sedclass StringMap : public StringMapImpl {
238193323Sed  AllocatorTy Allocator;
239193323Sed  typedef StringMapEntry<ValueTy> MapEntryTy;
240193323Sedpublic:
241193323Sed  StringMap() : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {}
242193323Sed  explicit StringMap(unsigned InitialSize)
243193323Sed    : StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))) {}
244193323Sed  explicit StringMap(const StringMap &RHS)
245193323Sed    : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {
246193323Sed    assert(RHS.empty() &&
247193323Sed           "Copy ctor from non-empty stringmap not implemented yet!");
248193323Sed  }
249193323Sed  void operator=(const StringMap &RHS) {
250193323Sed    assert(RHS.empty() &&
251193323Sed           "assignment from non-empty stringmap not implemented yet!");
252193323Sed    clear();
253193323Sed  }
254193323Sed
255193323Sed
256193323Sed  AllocatorTy &getAllocator() { return Allocator; }
257193323Sed  const AllocatorTy &getAllocator() const { return Allocator; }
258193323Sed
259193323Sed  typedef const char* key_type;
260193323Sed  typedef ValueTy mapped_type;
261193323Sed  typedef StringMapEntry<ValueTy> value_type;
262193323Sed  typedef size_t size_type;
263193323Sed
264193323Sed  typedef StringMapConstIterator<ValueTy> const_iterator;
265193323Sed  typedef StringMapIterator<ValueTy> iterator;
266193323Sed
267193323Sed  iterator begin() {
268193323Sed    return iterator(TheTable, NumBuckets == 0);
269193323Sed  }
270193323Sed  iterator end() {
271193323Sed    return iterator(TheTable+NumBuckets, true);
272193323Sed  }
273193323Sed  const_iterator begin() const {
274193323Sed    return const_iterator(TheTable, NumBuckets == 0);
275193323Sed  }
276193323Sed  const_iterator end() const {
277193323Sed    return const_iterator(TheTable+NumBuckets, true);
278193323Sed  }
279193323Sed
280193323Sed  iterator find(const char *KeyStart, const char *KeyEnd) {
281193323Sed    int Bucket = FindKey(KeyStart, KeyEnd);
282193323Sed    if (Bucket == -1) return end();
283193323Sed    return iterator(TheTable+Bucket);
284193323Sed  }
285193323Sed  iterator find(const char *Key) {
286193323Sed    return find(Key, Key + strlen(Key));
287193323Sed  }
288193323Sed  iterator find(const std::string &Key) {
289193323Sed    return find(Key.data(), Key.data() + Key.size());
290193323Sed  }
291193323Sed
292193323Sed  const_iterator find(const char *KeyStart, const char *KeyEnd) const {
293193323Sed    int Bucket = FindKey(KeyStart, KeyEnd);
294193323Sed    if (Bucket == -1) return end();
295193323Sed    return const_iterator(TheTable+Bucket);
296193323Sed  }
297193323Sed  const_iterator find(const char *Key) const {
298193323Sed    return find(Key, Key + strlen(Key));
299193323Sed  }
300193323Sed  const_iterator find(const std::string &Key) const {
301193323Sed    return find(Key.data(), Key.data() + Key.size());
302193323Sed  }
303193323Sed
304193323Sed   /// lookup - Return the entry for the specified key, or a default
305193323Sed  /// constructed value if no such entry exists.
306193323Sed  ValueTy lookup(const char *KeyStart, const char *KeyEnd) const {
307193323Sed    const_iterator it = find(KeyStart, KeyEnd);
308193323Sed    if (it != end())
309193323Sed      return it->second;
310193323Sed    return ValueTy();
311193323Sed  }
312193323Sed  ValueTy lookup(const char *Key) const {
313193323Sed    const_iterator it = find(Key);
314193323Sed    if (it != end())
315193323Sed      return it->second;
316193323Sed    return ValueTy();
317193323Sed  }
318193323Sed  ValueTy lookup(const std::string &Key) const {
319193323Sed    const_iterator it = find(Key);
320193323Sed    if (it != end())
321193323Sed      return it->second;
322193323Sed    return ValueTy();
323193323Sed  }
324193323Sed
325193323Sed  ValueTy& operator[](const char *Key) {
326193323Sed    return GetOrCreateValue(Key, Key + strlen(Key)).getValue();
327193323Sed  }
328193323Sed  ValueTy& operator[](const std::string &Key) {
329193323Sed    return GetOrCreateValue(Key.data(), Key.data() + Key.size()).getValue();
330193323Sed  }
331193323Sed
332193323Sed  size_type count(const char *KeyStart, const char *KeyEnd) const {
333193323Sed    return find(KeyStart, KeyEnd) == end() ? 0 : 1;
334193323Sed  }
335193323Sed  size_type count(const char *Key) const {
336193323Sed    return count(Key, Key + strlen(Key));
337193323Sed  }
338193323Sed  size_type count(const std::string &Key) const {
339193323Sed    return count(Key.data(), Key.data() + Key.size());
340193323Sed  }
341193323Sed
342193323Sed  /// insert - Insert the specified key/value pair into the map.  If the key
343193323Sed  /// already exists in the map, return false and ignore the request, otherwise
344193323Sed  /// insert it and return true.
345193323Sed  bool insert(MapEntryTy *KeyValue) {
346193323Sed    unsigned BucketNo =
347193323Sed      LookupBucketFor(KeyValue->getKeyData(),
348193323Sed                      KeyValue->getKeyData()+KeyValue->getKeyLength());
349193323Sed    ItemBucket &Bucket = TheTable[BucketNo];
350193323Sed    if (Bucket.Item && Bucket.Item != getTombstoneVal())
351193323Sed      return false;  // Already exists in map.
352193323Sed
353193323Sed    if (Bucket.Item == getTombstoneVal())
354193323Sed      --NumTombstones;
355193323Sed    Bucket.Item = KeyValue;
356193323Sed    ++NumItems;
357193323Sed
358193323Sed    if (ShouldRehash())
359193323Sed      RehashTable();
360193323Sed    return true;
361193323Sed  }
362193323Sed
363193323Sed  // clear - Empties out the StringMap
364193323Sed  void clear() {
365193323Sed    if (empty()) return;
366193323Sed
367193323Sed    // Zap all values, resetting the keys back to non-present (not tombstone),
368193323Sed    // which is safe because we're removing all elements.
369193323Sed    for (ItemBucket *I = TheTable, *E = TheTable+NumBuckets; I != E; ++I) {
370193323Sed      if (I->Item && I->Item != getTombstoneVal()) {
371193323Sed        static_cast<MapEntryTy*>(I->Item)->Destroy(Allocator);
372193323Sed        I->Item = 0;
373193323Sed      }
374193323Sed    }
375193323Sed
376193323Sed    NumItems = 0;
377193323Sed  }
378193323Sed
379193323Sed  /// GetOrCreateValue - Look up the specified key in the table.  If a value
380193323Sed  /// exists, return it.  Otherwise, default construct a value, insert it, and
381193323Sed  /// return.
382193323Sed  template <typename InitTy>
383193323Sed  StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart,
384193323Sed                                            const char *KeyEnd,
385193323Sed                                            InitTy Val) {
386193323Sed    unsigned BucketNo = LookupBucketFor(KeyStart, KeyEnd);
387193323Sed    ItemBucket &Bucket = TheTable[BucketNo];
388193323Sed    if (Bucket.Item && Bucket.Item != getTombstoneVal())
389193323Sed      return *static_cast<MapEntryTy*>(Bucket.Item);
390193323Sed
391193323Sed    MapEntryTy *NewItem = MapEntryTy::Create(KeyStart, KeyEnd, Allocator, Val);
392193323Sed
393193323Sed    if (Bucket.Item == getTombstoneVal())
394193323Sed      --NumTombstones;
395193323Sed    ++NumItems;
396193323Sed
397193323Sed    // Fill in the bucket for the hash table.  The FullHashValue was already
398193323Sed    // filled in by LookupBucketFor.
399193323Sed    Bucket.Item = NewItem;
400193323Sed
401193323Sed    if (ShouldRehash())
402193323Sed      RehashTable();
403193323Sed    return *NewItem;
404193323Sed  }
405193323Sed
406193323Sed  StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart,
407193323Sed                                            const char *KeyEnd) {
408193323Sed    return GetOrCreateValue(KeyStart, KeyEnd, ValueTy());
409193323Sed  }
410193323Sed
411193323Sed  /// remove - Remove the specified key/value pair from the map, but do not
412193323Sed  /// erase it.  This aborts if the key is not in the map.
413193323Sed  void remove(MapEntryTy *KeyValue) {
414193323Sed    RemoveKey(KeyValue);
415193323Sed  }
416193323Sed
417193323Sed  void erase(iterator I) {
418193323Sed    MapEntryTy &V = *I;
419193323Sed    remove(&V);
420193323Sed    V.Destroy(Allocator);
421193323Sed  }
422193323Sed
423193323Sed  bool erase(const char *Key) {
424193323Sed    iterator I = find(Key);
425193323Sed    if (I == end()) return false;
426193323Sed    erase(I);
427193323Sed    return true;
428193323Sed  }
429193323Sed
430193323Sed  bool erase(const std::string &Key) {
431193323Sed    iterator I = find(Key);
432193323Sed    if (I == end()) return false;
433193323Sed    erase(I);
434193323Sed    return true;
435193323Sed  }
436193323Sed
437193323Sed  ~StringMap() {
438193323Sed    clear();
439193323Sed    free(TheTable);
440193323Sed  }
441193323Sed};
442193323Sed
443193323Sed
444193323Sedtemplate<typename ValueTy>
445193323Sedclass StringMapConstIterator {
446193323Sedprotected:
447193323Sed  StringMapImpl::ItemBucket *Ptr;
448193323Sedpublic:
449193323Sed  typedef StringMapEntry<ValueTy> value_type;
450193323Sed
451193323Sed  explicit StringMapConstIterator(StringMapImpl::ItemBucket *Bucket,
452193323Sed                                  bool NoAdvance = false)
453193323Sed  : Ptr(Bucket) {
454193323Sed    if (!NoAdvance) AdvancePastEmptyBuckets();
455193323Sed  }
456193323Sed
457193323Sed  const value_type &operator*() const {
458193323Sed    return *static_cast<StringMapEntry<ValueTy>*>(Ptr->Item);
459193323Sed  }
460193323Sed  const value_type *operator->() const {
461193323Sed    return static_cast<StringMapEntry<ValueTy>*>(Ptr->Item);
462193323Sed  }
463193323Sed
464193323Sed  bool operator==(const StringMapConstIterator &RHS) const {
465193323Sed    return Ptr == RHS.Ptr;
466193323Sed  }
467193323Sed  bool operator!=(const StringMapConstIterator &RHS) const {
468193323Sed    return Ptr != RHS.Ptr;
469193323Sed  }
470193323Sed
471193323Sed  inline StringMapConstIterator& operator++() {          // Preincrement
472193323Sed    ++Ptr;
473193323Sed    AdvancePastEmptyBuckets();
474193323Sed    return *this;
475193323Sed  }
476193323Sed  StringMapConstIterator operator++(int) {        // Postincrement
477193323Sed    StringMapConstIterator tmp = *this; ++*this; return tmp;
478193323Sed  }
479193323Sed
480193323Sedprivate:
481193323Sed  void AdvancePastEmptyBuckets() {
482193323Sed    while (Ptr->Item == 0 || Ptr->Item == StringMapImpl::getTombstoneVal())
483193323Sed      ++Ptr;
484193323Sed  }
485193323Sed};
486193323Sed
487193323Sedtemplate<typename ValueTy>
488193323Sedclass StringMapIterator : public StringMapConstIterator<ValueTy> {
489193323Sedpublic:
490193323Sed  explicit StringMapIterator(StringMapImpl::ItemBucket *Bucket,
491193323Sed                             bool NoAdvance = false)
492193323Sed    : StringMapConstIterator<ValueTy>(Bucket, NoAdvance) {
493193323Sed  }
494193323Sed  StringMapEntry<ValueTy> &operator*() const {
495193323Sed    return *static_cast<StringMapEntry<ValueTy>*>(this->Ptr->Item);
496193323Sed  }
497193323Sed  StringMapEntry<ValueTy> *operator->() const {
498193323Sed    return static_cast<StringMapEntry<ValueTy>*>(this->Ptr->Item);
499193323Sed  }
500193323Sed};
501193323Sed
502193323Sed}
503193323Sed
504193323Sed#endif
505