1306196Sjkim//===--- StringMap.cpp - String Hash table map implementation -------------===//
2127131Snectar//
3127131Snectar//                     The LLVM Compiler Infrastructure
4142429Snectar//
5127131Snectar// This file is distributed under the University of Illinois Open Source
6127131Snectar// License. See LICENSE.TXT for details.
7127131Snectar//
8127131Snectar//===----------------------------------------------------------------------===//
9127131Snectar//
10127131Snectar// This file implements the StringMap class.
11127131Snectar//
12127131Snectar//===----------------------------------------------------------------------===//
13127131Snectar
14127131Snectar#include "llvm/ADT/StringMap.h"
15127131Snectar#include "llvm/ADT/StringExtras.h"
16127131Snectar#include "llvm/Support/Compiler.h"
17127131Snectar#include <cassert>
18127131Snectarusing namespace llvm;
19127131Snectar
20215698SsimonStringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
21215698Ssimon  ItemSize = itemSize;
22215698Ssimon
23215698Ssimon  // If a size is specified, initialize the table with that many buckets.
24215698Ssimon  if (InitSize) {
25127131Snectar    init(InitSize);
26127131Snectar    return;
27127131Snectar  }
28127131Snectar
29127131Snectar  // Otherwise, initialize it with zero buckets to avoid the allocation.
30127131Snectar  TheTable = 0;
31127131Snectar  NumBuckets = 0;
32127131Snectar  NumItems = 0;
33127131Snectar  NumTombstones = 0;
34127131Snectar}
35127131Snectar
36127131Snectarvoid StringMapImpl::init(unsigned InitSize) {
37127131Snectar  assert((InitSize & (InitSize-1)) == 0 &&
38127131Snectar         "Init Size must be a power of 2 or zero!");
39127131Snectar  NumBuckets = InitSize ? InitSize : 16;
40127131Snectar  NumItems = 0;
41276864Sjkim  NumTombstones = 0;
42276864Sjkim
43127131Snectar  TheTable = (StringMapEntryBase **)calloc(NumBuckets+1,
44127131Snectar                                           sizeof(StringMapEntryBase **) +
45215698Ssimon                                           sizeof(unsigned));
46215698Ssimon
47215698Ssimon  // Allocate one extra bucket, set it to look filled so the iterators stop at
48215698Ssimon  // end.
49142429Snectar  TheTable[NumBuckets] = (StringMapEntryBase*)2;
50215698Ssimon}
51142429Snectar
52142429Snectar
53276864Sjkim/// LookupBucketFor - Look up the bucket that the specified string should end
54276864Sjkim/// up in.  If it already exists as a key in the map, the Item pointer for the
55276864Sjkim/// specified bucket will be non-null.  Otherwise, it will be null.  In either
56127131Snectar/// case, the FullHashValue field of the bucket will be set to the hash value
57276864Sjkim/// of the string.
58276864Sjkimunsigned StringMapImpl::LookupBucketFor(StringRef Name) {
59276864Sjkim  unsigned HTSize = NumBuckets;
60276864Sjkim  if (HTSize == 0) {  // Hash table unallocated so far?
61276864Sjkim    init(16);
62276864Sjkim    HTSize = NumBuckets;
63215698Ssimon  }
64276864Sjkim  unsigned FullHashValue = HashString(Name);
65276864Sjkim  unsigned BucketNo = FullHashValue & (HTSize-1);
66276864Sjkim  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
67276864Sjkim
68276864Sjkim  unsigned ProbeAmt = 1;
69215698Ssimon  int FirstTombstone = -1;
70276864Sjkim  while (1) {
71127131Snectar    StringMapEntryBase *BucketItem = TheTable[BucketNo];
72127131Snectar    // If we found an empty bucket, this key isn't in the table yet, return it.
73127131Snectar    if (LLVM_LIKELY(BucketItem == 0)) {
74127131Snectar      // If we found a tombstone, we want to reuse the tombstone instead of an
75127131Snectar      // empty bucket.  This reduces probing.
76127131Snectar      if (FirstTombstone != -1) {
77127131Snectar        HashTable[FirstTombstone] = FullHashValue;
78127131Snectar        return FirstTombstone;
79127131Snectar      }
80127131Snectar
81127131Snectar      HashTable[BucketNo] = FullHashValue;
82127131Snectar      return BucketNo;
83127131Snectar    }
84127131Snectar
85127131Snectar    if (BucketItem == getTombstoneVal()) {
86127131Snectar      // Skip over tombstones.  However, remember the first one we see.
87127131Snectar      if (FirstTombstone == -1) FirstTombstone = BucketNo;
88127131Snectar    } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
89127131Snectar      // If the full hash value matches, check deeply for a match.  The common
90127131Snectar      // case here is that we are only looking at the buckets (for item info
91127131Snectar      // being non-null and for the full hash value) not at the items.  This
92127131Snectar      // is important for cache locality.
93127131Snectar
94127131Snectar      // Do the comparison like this because Name isn't necessarily
95127131Snectar      // null-terminated!
96127131Snectar      char *ItemStr = (char*)BucketItem+ItemSize;
97127131Snectar      if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
98127131Snectar        // We found a match!
99127131Snectar        return BucketNo;
100127131Snectar      }
101127131Snectar    }
102127131Snectar
103127131Snectar    // Okay, we didn't find the item.  Probe to the next bucket.
104127131Snectar    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
105127131Snectar
106127131Snectar    // Use quadratic probing, it has fewer clumping artifacts than linear
107127131Snectar    // probing and has good cache behavior in the common case.
108127131Snectar    ++ProbeAmt;
109127131Snectar  }
110127131Snectar}
111127131Snectar
112127131Snectar
113127131Snectar/// FindKey - Look up the bucket that contains the specified key. If it exists
114127131Snectar/// in the map, return the bucket number of the key.  Otherwise return -1.
115127131Snectar/// This does not modify the map.
116127131Snectarint StringMapImpl::FindKey(StringRef Key) const {
117127131Snectar  unsigned HTSize = NumBuckets;
118127131Snectar  if (HTSize == 0) return -1;  // Really empty table?
119127131Snectar  unsigned FullHashValue = HashString(Key);
120127131Snectar  unsigned BucketNo = FullHashValue & (HTSize-1);
121127131Snectar  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
122127131Snectar
123127131Snectar  unsigned ProbeAmt = 1;
124127131Snectar  while (1) {
125127131Snectar    StringMapEntryBase *BucketItem = TheTable[BucketNo];
126127131Snectar    // If we found an empty bucket, this key isn't in the table yet, return.
127127131Snectar    if (LLVM_LIKELY(BucketItem == 0))
128127131Snectar      return -1;
129127131Snectar
130127131Snectar    if (BucketItem == getTombstoneVal()) {
131127131Snectar      // Ignore tombstones.
132127131Snectar    } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) {
133142429Snectar      // If the full hash value matches, check deeply for a match.  The common
134127131Snectar      // case here is that we are only looking at the buckets (for item info
135127131Snectar      // being non-null and for the full hash value) not at the items.  This
136306196Sjkim      // is important for cache locality.
137215698Ssimon
138215698Ssimon      // Do the comparison like this because NameStart isn't necessarily
139215698Ssimon      // null-terminated!
140215698Ssimon      char *ItemStr = (char*)BucketItem+ItemSize;
141127131Snectar      if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
142127131Snectar        // We found a match!
143215698Ssimon        return BucketNo;
144127131Snectar      }
145127131Snectar    }
146127131Snectar
147127131Snectar    // Okay, we didn't find the item.  Probe to the next bucket.
148127131Snectar    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
149127131Snectar
150215698Ssimon    // Use quadratic probing, it has fewer clumping artifacts than linear
151127131Snectar    // probing and has good cache behavior in the common case.
152167616Ssimon    ++ProbeAmt;
153127131Snectar  }
154127131Snectar}
155127131Snectar
156127131Snectar/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
157127131Snectar/// delete it.  This aborts if the value isn't in the table.
158127131Snectarvoid StringMapImpl::RemoveKey(StringMapEntryBase *V) {
159127131Snectar  const char *VStr = (char*)V + ItemSize;
160127131Snectar  StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
161127131Snectar  (void)V2;
162127131Snectar  assert(V == V2 && "Didn't find key?");
163127131Snectar}
164127131Snectar
165127131Snectar/// RemoveKey - Remove the StringMapEntry for the specified key from the
166127131Snectar/// table, returning it.  If the key is not in the table, this returns null.
167127131SnectarStringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
168127131Snectar  int Bucket = FindKey(Key);
169127131Snectar  if (Bucket == -1) return 0;
170127131Snectar
171127131Snectar  StringMapEntryBase *Result = TheTable[Bucket];
172127131Snectar  TheTable[Bucket] = getTombstoneVal();
173127131Snectar  --NumItems;
174127131Snectar  ++NumTombstones;
175142429Snectar  assert(NumItems + NumTombstones <= NumBuckets);
176267258Sjkim
177127131Snectar  return Result;
178127131Snectar}
179127131Snectar
180127131Snectar
181
182/// RehashTable - Grow the table, redistributing values into the buckets with
183/// the appropriate mod-of-hashtable-size.
184void StringMapImpl::RehashTable() {
185  unsigned NewSize;
186  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
187
188  // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
189  // the buckets are empty (meaning that many are filled with tombstones),
190  // grow/rehash the table.
191  if (NumItems*4 > NumBuckets*3) {
192    NewSize = NumBuckets*2;
193  } else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) {
194    NewSize = NumBuckets;
195  } else {
196    return;
197  }
198
199  // Allocate one extra bucket which will always be non-empty.  This allows the
200  // iterators to stop at end.
201  StringMapEntryBase **NewTableArray =
202    (StringMapEntryBase **)calloc(NewSize+1, sizeof(StringMapEntryBase *) +
203                                             sizeof(unsigned));
204  unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
205  NewTableArray[NewSize] = (StringMapEntryBase*)2;
206
207  // Rehash all the items into their new buckets.  Luckily :) we already have
208  // the hash values available, so we don't have to rehash any strings.
209  for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
210    StringMapEntryBase *Bucket = TheTable[I];
211    if (Bucket && Bucket != getTombstoneVal()) {
212      // Fast case, bucket available.
213      unsigned FullHash = HashTable[I];
214      unsigned NewBucket = FullHash & (NewSize-1);
215      if (NewTableArray[NewBucket] == 0) {
216        NewTableArray[FullHash & (NewSize-1)] = Bucket;
217        NewHashArray[FullHash & (NewSize-1)] = FullHash;
218        continue;
219      }
220
221      // Otherwise probe for a spot.
222      unsigned ProbeSize = 1;
223      do {
224        NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
225      } while (NewTableArray[NewBucket]);
226
227      // Finally found a slot.  Fill it in.
228      NewTableArray[NewBucket] = Bucket;
229      NewHashArray[NewBucket] = FullHash;
230    }
231  }
232
233  free(TheTable);
234
235  TheTable = NewTableArray;
236  NumBuckets = NewSize;
237  NumTombstones = 0;
238}
239