FoldingSet.cpp revision 226633
1189251Ssam//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
2189251Ssam//
3189251Ssam//                     The LLVM Compiler Infrastructure
4189251Ssam//
5252726Srpaulo// This file is distributed under the University of Illinois Open Source
6252726Srpaulo// License. See LICENSE.TXT for details.
7189251Ssam//
8189251Ssam//===----------------------------------------------------------------------===//
9189251Ssam//
10189251Ssam// This file implements a hash set that can be used to remove duplication of
11189251Ssam// nodes in a graph.  This code was originally created by Chris Lattner for use
12189251Ssam// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code
13189251Ssam// set.
14189251Ssam//
15189251Ssam//===----------------------------------------------------------------------===//
16189251Ssam
17189251Ssam#include "llvm/ADT/FoldingSet.h"
18214734Srpaulo#include "llvm/Support/Allocator.h"
19189251Ssam#include "llvm/Support/ErrorHandling.h"
20189251Ssam#include "llvm/Support/MathExtras.h"
21214734Srpaulo#include "llvm/Support/Host.h"
22214734Srpaulo#include <cassert>
23189251Ssam#include <cstring>
24189251Ssamusing namespace llvm;
25189251Ssam
26189251Ssam//===----------------------------------------------------------------------===//
27189251Ssam// FoldingSetNodeIDRef Implementation
28189251Ssam
29189251Ssam/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
30189251Ssam/// used to lookup the node in the FoldingSetImpl.
31189251Ssamunsigned FoldingSetNodeIDRef::ComputeHash() const {
32189251Ssam  // This is adapted from SuperFastHash by Paul Hsieh.
33189251Ssam  unsigned Hash = static_cast<unsigned>(Size);
34189251Ssam  for (const unsigned *BP = Data, *E = BP+Size; BP != E; ++BP) {
35189251Ssam    unsigned Data = *BP;
36189251Ssam    Hash         += Data & 0xFFFF;
37189251Ssam    unsigned Tmp  = ((Data >> 16) << 11) ^ Hash;
38189251Ssam    Hash          = (Hash << 16) ^ Tmp;
39189251Ssam    Hash         += Hash >> 11;
40189251Ssam  }
41189251Ssam
42189251Ssam  // Force "avalanching" of final 127 bits.
43189251Ssam  Hash ^= Hash << 3;
44189251Ssam  Hash += Hash >> 5;
45189251Ssam  Hash ^= Hash << 4;
46189251Ssam  Hash += Hash >> 17;
47189251Ssam  Hash ^= Hash << 25;
48189251Ssam  Hash += Hash >> 6;
49189251Ssam  return Hash;
50189251Ssam}
51189251Ssam
52189251Ssambool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
53189251Ssam  if (Size != RHS.Size) return false;
54189251Ssam  return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
55189251Ssam}
56189251Ssam
57189251Ssam//===----------------------------------------------------------------------===//
58189251Ssam// FoldingSetNodeID Implementation
59189251Ssam
60189251Ssam/// Add* - Add various data types to Bit data.
61189251Ssam///
62189251Ssamvoid FoldingSetNodeID::AddPointer(const void *Ptr) {
63189251Ssam  // Note: this adds pointers to the hash using sizes and endianness that
64189251Ssam  // depend on the host.  It doesn't matter however, because hashing on
65189251Ssam  // pointer values in inherently unstable.  Nothing  should depend on the
66189251Ssam  // ordering of nodes in the folding set.
67189251Ssam  Bits.append(reinterpret_cast<unsigned *>(&Ptr),
68189251Ssam              reinterpret_cast<unsigned *>(&Ptr+1));
69189251Ssam}
70189251Ssamvoid FoldingSetNodeID::AddInteger(signed I) {
71189251Ssam  Bits.push_back(I);
72189251Ssam}
73189251Ssamvoid FoldingSetNodeID::AddInteger(unsigned I) {
74189251Ssam  Bits.push_back(I);
75189251Ssam}
76189251Ssamvoid FoldingSetNodeID::AddInteger(long I) {
77189251Ssam  AddInteger((unsigned long)I);
78189251Ssam}
79189251Ssamvoid FoldingSetNodeID::AddInteger(unsigned long I) {
80189251Ssam  if (sizeof(long) == sizeof(int))
81189251Ssam    AddInteger(unsigned(I));
82189251Ssam  else if (sizeof(long) == sizeof(long long)) {
83189251Ssam    AddInteger((unsigned long long)I);
84189251Ssam  } else {
85189251Ssam    llvm_unreachable("unexpected sizeof(long)");
86189251Ssam  }
87189251Ssam}
88189251Ssamvoid FoldingSetNodeID::AddInteger(long long I) {
89189251Ssam  AddInteger((unsigned long long)I);
90189251Ssam}
91189251Ssamvoid FoldingSetNodeID::AddInteger(unsigned long long I) {
92189251Ssam  AddInteger(unsigned(I));
93189251Ssam  if ((uint64_t)(unsigned)I != I)
94189251Ssam    Bits.push_back(unsigned(I >> 32));
95189251Ssam}
96189251Ssam
97189251Ssamvoid FoldingSetNodeID::AddString(StringRef String) {
98214734Srpaulo  unsigned Size =  String.size();
99214734Srpaulo  Bits.push_back(Size);
100189251Ssam  if (!Size) return;
101189251Ssam
102189251Ssam  unsigned Units = Size / 4;
103214734Srpaulo  unsigned Pos = 0;
104214734Srpaulo  const unsigned *Base = (const unsigned*) String.data();
105214734Srpaulo
106214734Srpaulo  // If the string is aligned do a bulk transfer.
107214734Srpaulo  if (!((intptr_t)Base & 3)) {
108214734Srpaulo    Bits.append(Base, Base + Units);
109214734Srpaulo    Pos = (Units + 1) * 4;
110214734Srpaulo  } else {
111214734Srpaulo    // Otherwise do it the hard way.
112189251Ssam    // To be compatible with above bulk transfer, we need to take endianness
113189251Ssam    // into account.
114189251Ssam    if (sys::isBigEndianHost()) {
115189251Ssam      for (Pos += 4; Pos <= Size; Pos += 4) {
116189251Ssam        unsigned V = ((unsigned char)String[Pos - 4] << 24) |
117189251Ssam                     ((unsigned char)String[Pos - 3] << 16) |
118189251Ssam                     ((unsigned char)String[Pos - 2] << 8) |
119189251Ssam                      (unsigned char)String[Pos - 1];
120189251Ssam        Bits.push_back(V);
121189251Ssam      }
122189251Ssam    } else {
123189251Ssam      assert(sys::isLittleEndianHost() && "Unexpected host endianness");
124189251Ssam      for (Pos += 4; Pos <= Size; Pos += 4) {
125189251Ssam        unsigned V = ((unsigned char)String[Pos - 1] << 24) |
126189251Ssam                     ((unsigned char)String[Pos - 2] << 16) |
127189251Ssam                     ((unsigned char)String[Pos - 3] << 8) |
128189251Ssam                      (unsigned char)String[Pos - 4];
129189251Ssam        Bits.push_back(V);
130189251Ssam      }
131189251Ssam    }
132189251Ssam  }
133189251Ssam
134189251Ssam  // With the leftover bits.
135189251Ssam  unsigned V = 0;
136189251Ssam  // Pos will have overshot size by 4 - #bytes left over.
137189251Ssam  // No need to take endianness into account here - this is always executed.
138189251Ssam  switch (Pos - Size) {
139189251Ssam  case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
140189251Ssam  case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
141189251Ssam  case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
142189251Ssam  default: return; // Nothing left.
143189251Ssam  }
144189251Ssam
145189251Ssam  Bits.push_back(V);
146189251Ssam}
147189251Ssam
148189251Ssam// AddNodeID - Adds the Bit data of another ID to *this.
149189251Ssamvoid FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
150189251Ssam  Bits.append(ID.Bits.begin(), ID.Bits.end());
151209158Srpaulo}
152189251Ssam
153189251Ssam/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
154189251Ssam/// lookup the node in the FoldingSetImpl.
155189251Ssamunsigned FoldingSetNodeID::ComputeHash() const {
156209158Srpaulo  return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
157189251Ssam}
158189251Ssam
159189251Ssam/// operator== - Used to compare two nodes to each other.
160189251Ssam///
161189251Ssambool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
162189251Ssam  return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
163189251Ssam}
164189251Ssam
165189251Ssam/// operator== - Used to compare two nodes to each other.
166189251Ssam///
167189251Ssambool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
168189251Ssam  return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
169189251Ssam}
170189251Ssam
171189251Ssam/// Intern - Copy this node's data to a memory region allocated from the
172189251Ssam/// given allocator and return a FoldingSetNodeIDRef describing the
173189251Ssam/// interned data.
174189251SsamFoldingSetNodeIDRef
175189251SsamFoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
176189251Ssam  unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
177189251Ssam  std::uninitialized_copy(Bits.begin(), Bits.end(), New);
178189251Ssam  return FoldingSetNodeIDRef(New, Bits.size());
179189251Ssam}
180189251Ssam
181189251Ssam//===----------------------------------------------------------------------===//
182189251Ssam/// Helper functions for FoldingSetImpl.
183189251Ssam
184189251Ssam/// GetNextPtr - In order to save space, each bucket is a
185189251Ssam/// singly-linked-list. In order to make deletion more efficient, we make
186189251Ssam/// the list circular, so we can delete a node without computing its hash.
187189251Ssam/// The problem with this is that the start of the hash buckets are not
188189251Ssam/// Nodes.  If NextInBucketPtr is a bucket pointer, this method returns null:
189189251Ssam/// use GetBucketPtr when this happens.
190189251Ssamstatic FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
191189251Ssam  // The low bit is set if this is the pointer back to the bucket.
192189251Ssam  if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
193189251Ssam    return 0;
194189251Ssam
195189251Ssam  return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
196189251Ssam}
197189251Ssam
198189251Ssam
199189251Ssam/// testing.
200189251Ssamstatic void **GetBucketPtr(void *NextInBucketPtr) {
201189251Ssam  intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
202189251Ssam  assert((Ptr & 1) && "Not a bucket pointer");
203189251Ssam  return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
204189251Ssam}
205189251Ssam
206189251Ssam/// GetBucketFor - Hash the specified node ID and return the hash bucket for
207189251Ssam/// the specified ID.
208189251Ssamstatic void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
209189251Ssam  // NumBuckets is always a power of 2.
210189251Ssam  unsigned BucketNum = Hash & (NumBuckets-1);
211189251Ssam  return Buckets + BucketNum;
212189251Ssam}
213189251Ssam
214189251Ssam/// AllocateBuckets - Allocated initialized bucket memory.
215189251Ssamstatic void **AllocateBuckets(unsigned NumBuckets) {
216189251Ssam  void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*)));
217189251Ssam  // Set the very last bucket to be a non-null "pointer".
218189251Ssam  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
219189251Ssam  return Buckets;
220189251Ssam}
221189251Ssam
222189251Ssam//===----------------------------------------------------------------------===//
223189251Ssam// FoldingSetImpl Implementation
224189251Ssam
225189251SsamFoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
226189251Ssam  assert(5 < Log2InitSize && Log2InitSize < 32 &&
227189251Ssam         "Initial hash table size out of range");
228189251Ssam  NumBuckets = 1 << Log2InitSize;
229189251Ssam  Buckets = AllocateBuckets(NumBuckets);
230189251Ssam  NumNodes = 0;
231189251Ssam}
232189251SsamFoldingSetImpl::~FoldingSetImpl() {
233189251Ssam  free(Buckets);
234189251Ssam}
235189251Ssamvoid FoldingSetImpl::clear() {
236189251Ssam  // Set all but the last bucket to null pointers.
237189251Ssam  memset(Buckets, 0, NumBuckets*sizeof(void*));
238189251Ssam
239189251Ssam  // Set the very last bucket to be a non-null "pointer".
240189251Ssam  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
241189251Ssam
242189251Ssam  // Reset the node count to zero.
243189251Ssam  NumNodes = 0;
244189251Ssam}
245189251Ssam
246189251Ssam/// GrowHashTable - Double the size of the hash table and rehash everything.
247189251Ssam///
248189251Ssamvoid FoldingSetImpl::GrowHashTable() {
249189251Ssam  void **OldBuckets = Buckets;
250189251Ssam  unsigned OldNumBuckets = NumBuckets;
251189251Ssam  NumBuckets <<= 1;
252189251Ssam
253189251Ssam  // Clear out new buckets.
254189251Ssam  Buckets = AllocateBuckets(NumBuckets);
255189251Ssam  NumNodes = 0;
256189251Ssam
257189251Ssam  // Walk the old buckets, rehashing nodes into their new place.
258189251Ssam  FoldingSetNodeID TempID;
259189251Ssam  for (unsigned i = 0; i != OldNumBuckets; ++i) {
260189251Ssam    void *Probe = OldBuckets[i];
261189251Ssam    if (!Probe) continue;
262189251Ssam    while (Node *NodeInBucket = GetNextPtr(Probe)) {
263189251Ssam      // Figure out the next link, remove NodeInBucket from the old link.
264189251Ssam      Probe = NodeInBucket->getNextInBucket();
265189251Ssam      NodeInBucket->SetNextInBucket(0);
266189251Ssam
267189251Ssam      // Insert the node into the new bucket, after recomputing the hash.
268189251Ssam      InsertNode(NodeInBucket,
269189251Ssam                 GetBucketFor(ComputeNodeHash(NodeInBucket, TempID),
270189251Ssam                              Buckets, NumBuckets));
271189251Ssam      TempID.clear();
272189251Ssam    }
273189251Ssam  }
274189251Ssam
275189251Ssam  free(OldBuckets);
276189251Ssam}
277189251Ssam
278189251Ssam/// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
279189251Ssam/// return it.  If not, return the insertion token that will make insertion
280189251Ssam/// faster.
281189251SsamFoldingSetImpl::Node
282189251Ssam*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
283189251Ssam                                     void *&InsertPos) {
284214734Srpaulo
285214734Srpaulo  void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets);
286189251Ssam  void *Probe = *Bucket;
287189251Ssam
288189251Ssam  InsertPos = 0;
289189251Ssam
290189251Ssam  FoldingSetNodeID TempID;
291189251Ssam  while (Node *NodeInBucket = GetNextPtr(Probe)) {
292189251Ssam    if (NodeEquals(NodeInBucket, ID, TempID))
293189251Ssam      return NodeInBucket;
294189251Ssam    TempID.clear();
295189251Ssam
296189251Ssam    Probe = NodeInBucket->getNextInBucket();
297189251Ssam  }
298189251Ssam
299189251Ssam  // Didn't find the node, return null with the bucket as the InsertPos.
300189251Ssam  InsertPos = Bucket;
301189251Ssam  return 0;
302189251Ssam}
303189251Ssam
304189251Ssam/// InsertNode - Insert the specified node into the folding set, knowing that it
305189251Ssam/// is not already in the map.  InsertPos must be obtained from
306189251Ssam/// FindNodeOrInsertPos.
307189251Ssamvoid FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
308189251Ssam  assert(N->getNextInBucket() == 0);
309189251Ssam  // Do we need to grow the hashtable?
310189251Ssam  if (NumNodes+1 > NumBuckets*2) {
311189251Ssam    GrowHashTable();
312189251Ssam    FoldingSetNodeID TempID;
313189251Ssam    InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets);
314189251Ssam  }
315189251Ssam
316189251Ssam  ++NumNodes;
317189251Ssam
318189251Ssam  /// The insert position is actually a bucket pointer.
319189251Ssam  void **Bucket = static_cast<void**>(InsertPos);
320189251Ssam
321189251Ssam  void *Next = *Bucket;
322189251Ssam
323189251Ssam  // If this is the first insertion into this bucket, its next pointer will be
324189251Ssam  // null.  Pretend as if it pointed to itself, setting the low bit to indicate
325189251Ssam  // that it is a pointer to the bucket.
326189251Ssam  if (Next == 0)
327189251Ssam    Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
328189251Ssam
329189251Ssam  // Set the node's next pointer, and make the bucket point to the node.
330189251Ssam  N->SetNextInBucket(Next);
331189251Ssam  *Bucket = N;
332189251Ssam}
333189251Ssam
334189251Ssam/// RemoveNode - Remove a node from the folding set, returning true if one was
335189251Ssam/// removed or false if the node was not in the folding set.
336189251Ssambool FoldingSetImpl::RemoveNode(Node *N) {
337189251Ssam  // Because each bucket is a circular list, we don't need to compute N's hash
338189251Ssam  // to remove it.
339189251Ssam  void *Ptr = N->getNextInBucket();
340189251Ssam  if (Ptr == 0) return false;  // Not in folding set.
341189251Ssam
342189251Ssam  --NumNodes;
343189251Ssam  N->SetNextInBucket(0);
344189251Ssam
345189251Ssam  // Remember what N originally pointed to, either a bucket or another node.
346189251Ssam  void *NodeNextPtr = Ptr;
347189251Ssam
348189251Ssam  // Chase around the list until we find the node (or bucket) which points to N.
349189251Ssam  while (true) {
350189251Ssam    if (Node *NodeInBucket = GetNextPtr(Ptr)) {
351189251Ssam      // Advance pointer.
352189251Ssam      Ptr = NodeInBucket->getNextInBucket();
353189251Ssam
354189251Ssam      // We found a node that points to N, change it to point to N's next node,
355189251Ssam      // removing N from the list.
356189251Ssam      if (Ptr == N) {
357189251Ssam        NodeInBucket->SetNextInBucket(NodeNextPtr);
358189251Ssam        return true;
359189251Ssam      }
360189251Ssam    } else {
361189251Ssam      void **Bucket = GetBucketPtr(Ptr);
362189251Ssam      Ptr = *Bucket;
363189251Ssam
364189251Ssam      // If we found that the bucket points to N, update the bucket to point to
365189251Ssam      // whatever is next.
366189251Ssam      if (Ptr == N) {
367189251Ssam        *Bucket = NodeNextPtr;
368189251Ssam        return true;
369189251Ssam      }
370189251Ssam    }
371189251Ssam  }
372189251Ssam}
373189251Ssam
374189251Ssam/// GetOrInsertNode - If there is an existing simple Node exactly
375189251Ssam/// equal to the specified node, return it.  Otherwise, insert 'N' and it
376189251Ssam/// instead.
377189251SsamFoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
378189251Ssam  FoldingSetNodeID ID;
379189251Ssam  GetNodeProfile(N, ID);
380189251Ssam  void *IP;
381189251Ssam  if (Node *E = FindNodeOrInsertPos(ID, IP))
382189251Ssam    return E;
383189251Ssam  InsertNode(N, IP);
384189251Ssam  return N;
385189251Ssam}
386189251Ssam
387189251Ssam//===----------------------------------------------------------------------===//
388189251Ssam// FoldingSetIteratorImpl Implementation
389189251Ssam
390189251SsamFoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
391189251Ssam  // Skip to the first non-null non-self-cycle bucket.
392189251Ssam  while (*Bucket != reinterpret_cast<void*>(-1) &&
393189251Ssam         (*Bucket == 0 || GetNextPtr(*Bucket) == 0))
394189251Ssam    ++Bucket;
395189251Ssam
396189251Ssam  NodePtr = static_cast<FoldingSetNode*>(*Bucket);
397189251Ssam}
398189251Ssam
399189251Ssamvoid FoldingSetIteratorImpl::advance() {
400189251Ssam  // If there is another link within this bucket, go to it.
401189251Ssam  void *Probe = NodePtr->getNextInBucket();
402189251Ssam
403189251Ssam  if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
404189251Ssam    NodePtr = NextNodeInBucket;
405189251Ssam  else {
406189251Ssam    // Otherwise, this is the last link in this bucket.
407189251Ssam    void **Bucket = GetBucketPtr(Probe);
408189251Ssam
409189251Ssam    // Skip to the next non-null non-self-cycle bucket.
410189251Ssam    do {
411189251Ssam      ++Bucket;
412189251Ssam    } while (*Bucket != reinterpret_cast<void*>(-1) &&
413189251Ssam             (*Bucket == 0 || GetNextPtr(*Bucket) == 0));
414189251Ssam
415189251Ssam    NodePtr = static_cast<FoldingSetNode*>(*Bucket);
416189251Ssam  }
417189251Ssam}
418189251Ssam
419189251Ssam//===----------------------------------------------------------------------===//
420189251Ssam// FoldingSetBucketIteratorImpl Implementation
421189251Ssam
422189251SsamFoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
423189251Ssam  Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket;
424189251Ssam}
425189251Ssam