1193323Sed//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file implements a hash set that can be used to remove duplication of
11249423Sdim// nodes in a graph.
12193323Sed//
13193323Sed//===----------------------------------------------------------------------===//
14193323Sed
15193323Sed#include "llvm/ADT/FoldingSet.h"
16234353Sdim#include "llvm/ADT/Hashing.h"
17205407Srdivacky#include "llvm/Support/Allocator.h"
18198090Srdivacky#include "llvm/Support/ErrorHandling.h"
19249423Sdim#include "llvm/Support/Host.h"
20193323Sed#include "llvm/Support/MathExtras.h"
21193323Sed#include <cassert>
22193323Sed#include <cstring>
23193323Sedusing namespace llvm;
24193323Sed
25193323Sed//===----------------------------------------------------------------------===//
26212904Sdim// FoldingSetNodeIDRef Implementation
27212904Sdim
28212904Sdim/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
29212904Sdim/// used to lookup the node in the FoldingSetImpl.
30212904Sdimunsigned FoldingSetNodeIDRef::ComputeHash() const {
31234353Sdim  return static_cast<unsigned>(hash_combine_range(Data, Data+Size));
32212904Sdim}
33212904Sdim
34212904Sdimbool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
35212904Sdim  if (Size != RHS.Size) return false;
36212904Sdim  return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
37212904Sdim}
38212904Sdim
39243830Sdim/// Used to compare the "ordering" of two nodes as defined by the
40243830Sdim/// profiled bits and their ordering defined by memcmp().
41243830Sdimbool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const {
42243830Sdim  if (Size != RHS.Size)
43243830Sdim    return Size < RHS.Size;
44243830Sdim  return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0;
45243830Sdim}
46243830Sdim
47212904Sdim//===----------------------------------------------------------------------===//
48193323Sed// FoldingSetNodeID Implementation
49193323Sed
50193323Sed/// Add* - Add various data types to Bit data.
51193323Sed///
52193323Sedvoid FoldingSetNodeID::AddPointer(const void *Ptr) {
53193323Sed  // Note: this adds pointers to the hash using sizes and endianness that
54193323Sed  // depend on the host.  It doesn't matter however, because hashing on
55193323Sed  // pointer values in inherently unstable.  Nothing  should depend on the
56193323Sed  // ordering of nodes in the folding set.
57226633Sdim  Bits.append(reinterpret_cast<unsigned *>(&Ptr),
58226633Sdim              reinterpret_cast<unsigned *>(&Ptr+1));
59193323Sed}
60193323Sedvoid FoldingSetNodeID::AddInteger(signed I) {
61193323Sed  Bits.push_back(I);
62193323Sed}
63193323Sedvoid FoldingSetNodeID::AddInteger(unsigned I) {
64193323Sed  Bits.push_back(I);
65193323Sed}
66193323Sedvoid FoldingSetNodeID::AddInteger(long I) {
67193323Sed  AddInteger((unsigned long)I);
68193323Sed}
69193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long I) {
70193323Sed  if (sizeof(long) == sizeof(int))
71193323Sed    AddInteger(unsigned(I));
72193323Sed  else if (sizeof(long) == sizeof(long long)) {
73193323Sed    AddInteger((unsigned long long)I);
74193323Sed  } else {
75198090Srdivacky    llvm_unreachable("unexpected sizeof(long)");
76193323Sed  }
77193323Sed}
78193323Sedvoid FoldingSetNodeID::AddInteger(long long I) {
79193323Sed  AddInteger((unsigned long long)I);
80193323Sed}
81193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long long I) {
82193323Sed  AddInteger(unsigned(I));
83223017Sdim  if ((uint64_t)(unsigned)I != I)
84193323Sed    Bits.push_back(unsigned(I >> 32));
85193323Sed}
86193323Sed
87198090Srdivackyvoid FoldingSetNodeID::AddString(StringRef String) {
88198090Srdivacky  unsigned Size =  String.size();
89193323Sed  Bits.push_back(Size);
90193323Sed  if (!Size) return;
91193323Sed
92193323Sed  unsigned Units = Size / 4;
93193323Sed  unsigned Pos = 0;
94198090Srdivacky  const unsigned *Base = (const unsigned*) String.data();
95193323Sed
96193323Sed  // If the string is aligned do a bulk transfer.
97193323Sed  if (!((intptr_t)Base & 3)) {
98193323Sed    Bits.append(Base, Base + Units);
99193323Sed    Pos = (Units + 1) * 4;
100193323Sed  } else {
101193323Sed    // Otherwise do it the hard way.
102218893Sdim    // To be compatible with above bulk transfer, we need to take endianness
103218893Sdim    // into account.
104251662Sdim    if (sys::IsBigEndianHost) {
105218893Sdim      for (Pos += 4; Pos <= Size; Pos += 4) {
106218893Sdim        unsigned V = ((unsigned char)String[Pos - 4] << 24) |
107218893Sdim                     ((unsigned char)String[Pos - 3] << 16) |
108218893Sdim                     ((unsigned char)String[Pos - 2] << 8) |
109218893Sdim                      (unsigned char)String[Pos - 1];
110218893Sdim        Bits.push_back(V);
111218893Sdim      }
112218893Sdim    } else {
113251662Sdim      assert(sys::IsLittleEndianHost && "Unexpected host endianness");
114218893Sdim      for (Pos += 4; Pos <= Size; Pos += 4) {
115218893Sdim        unsigned V = ((unsigned char)String[Pos - 1] << 24) |
116218893Sdim                     ((unsigned char)String[Pos - 2] << 16) |
117218893Sdim                     ((unsigned char)String[Pos - 3] << 8) |
118218893Sdim                      (unsigned char)String[Pos - 4];
119218893Sdim        Bits.push_back(V);
120218893Sdim      }
121193323Sed    }
122193323Sed  }
123193323Sed
124193323Sed  // With the leftover bits.
125193323Sed  unsigned V = 0;
126218893Sdim  // Pos will have overshot size by 4 - #bytes left over.
127218893Sdim  // No need to take endianness into account here - this is always executed.
128193323Sed  switch (Pos - Size) {
129193323Sed  case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
130193323Sed  case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
131193323Sed  case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
132193323Sed  default: return; // Nothing left.
133193323Sed  }
134193323Sed
135193323Sed  Bits.push_back(V);
136193323Sed}
137193323Sed
138221345Sdim// AddNodeID - Adds the Bit data of another ID to *this.
139221345Sdimvoid FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
140221345Sdim  Bits.append(ID.Bits.begin(), ID.Bits.end());
141221345Sdim}
142221345Sdim
143193323Sed/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
144193323Sed/// lookup the node in the FoldingSetImpl.
145193323Sedunsigned FoldingSetNodeID::ComputeHash() const {
146212904Sdim  return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
147193323Sed}
148193323Sed
149193323Sed/// operator== - Used to compare two nodes to each other.
150193323Sed///
151249423Sdimbool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const {
152212904Sdim  return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
153193323Sed}
154193323Sed
155212904Sdim/// operator== - Used to compare two nodes to each other.
156212904Sdim///
157212904Sdimbool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
158212904Sdim  return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
159212904Sdim}
160212904Sdim
161243830Sdim/// Used to compare the "ordering" of two nodes as defined by the
162243830Sdim/// profiled bits and their ordering defined by memcmp().
163249423Sdimbool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const {
164243830Sdim  return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
165243830Sdim}
166243830Sdim
167243830Sdimbool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const {
168243830Sdim  return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS;
169243830Sdim}
170243830Sdim
171205407Srdivacky/// Intern - Copy this node's data to a memory region allocated from the
172205407Srdivacky/// given allocator and return a FoldingSetNodeIDRef describing the
173205407Srdivacky/// interned data.
174205407SrdivackyFoldingSetNodeIDRef
175205407SrdivackyFoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
176205407Srdivacky  unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
177205407Srdivacky  std::uninitialized_copy(Bits.begin(), Bits.end(), New);
178205407Srdivacky  return FoldingSetNodeIDRef(New, Bits.size());
179205407Srdivacky}
180193323Sed
181193323Sed//===----------------------------------------------------------------------===//
182193323Sed/// Helper functions for FoldingSetImpl.
183193323Sed
184193323Sed/// GetNextPtr - In order to save space, each bucket is a
185193323Sed/// singly-linked-list. In order to make deletion more efficient, we make
186193323Sed/// the list circular, so we can delete a node without computing its hash.
187193323Sed/// The problem with this is that the start of the hash buckets are not
188193323Sed/// Nodes.  If NextInBucketPtr is a bucket pointer, this method returns null:
189193323Sed/// use GetBucketPtr when this happens.
190193323Sedstatic FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
191193323Sed  // The low bit is set if this is the pointer back to the bucket.
192193323Sed  if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
193193323Sed    return 0;
194193323Sed
195193323Sed  return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
196193323Sed}
197193323Sed
198193323Sed
199193323Sed/// testing.
200193323Sedstatic void **GetBucketPtr(void *NextInBucketPtr) {
201193323Sed  intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
202193323Sed  assert((Ptr & 1) && "Not a bucket pointer");
203193323Sed  return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
204193323Sed}
205193323Sed
206193323Sed/// GetBucketFor - Hash the specified node ID and return the hash bucket for
207193323Sed/// the specified ID.
208212904Sdimstatic void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
209193323Sed  // NumBuckets is always a power of 2.
210212904Sdim  unsigned BucketNum = Hash & (NumBuckets-1);
211193323Sed  return Buckets + BucketNum;
212193323Sed}
213193323Sed
214210299Sed/// AllocateBuckets - Allocated initialized bucket memory.
215210299Sedstatic void **AllocateBuckets(unsigned NumBuckets) {
216210299Sed  void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*)));
217210299Sed  // Set the very last bucket to be a non-null "pointer".
218210299Sed  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
219210299Sed  return Buckets;
220210299Sed}
221210299Sed
222193323Sed//===----------------------------------------------------------------------===//
223193323Sed// FoldingSetImpl Implementation
224193323Sed
225193323SedFoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
226193323Sed  assert(5 < Log2InitSize && Log2InitSize < 32 &&
227193323Sed         "Initial hash table size out of range");
228193323Sed  NumBuckets = 1 << Log2InitSize;
229210299Sed  Buckets = AllocateBuckets(NumBuckets);
230210299Sed  NumNodes = 0;
231193323Sed}
232193323SedFoldingSetImpl::~FoldingSetImpl() {
233210299Sed  free(Buckets);
234193323Sed}
235193323Sedvoid FoldingSetImpl::clear() {
236193323Sed  // Set all but the last bucket to null pointers.
237193323Sed  memset(Buckets, 0, NumBuckets*sizeof(void*));
238193323Sed
239193323Sed  // Set the very last bucket to be a non-null "pointer".
240193323Sed  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
241193323Sed
242193323Sed  // Reset the node count to zero.
243193323Sed  NumNodes = 0;
244193323Sed}
245193323Sed
246193323Sed/// GrowHashTable - Double the size of the hash table and rehash everything.
247193323Sed///
248193323Sedvoid FoldingSetImpl::GrowHashTable() {
249193323Sed  void **OldBuckets = Buckets;
250193323Sed  unsigned OldNumBuckets = NumBuckets;
251193323Sed  NumBuckets <<= 1;
252193323Sed
253193323Sed  // Clear out new buckets.
254210299Sed  Buckets = AllocateBuckets(NumBuckets);
255210299Sed  NumNodes = 0;
256193323Sed
257193323Sed  // Walk the old buckets, rehashing nodes into their new place.
258212904Sdim  FoldingSetNodeID TempID;
259193323Sed  for (unsigned i = 0; i != OldNumBuckets; ++i) {
260193323Sed    void *Probe = OldBuckets[i];
261193323Sed    if (!Probe) continue;
262193323Sed    while (Node *NodeInBucket = GetNextPtr(Probe)) {
263193323Sed      // Figure out the next link, remove NodeInBucket from the old link.
264193323Sed      Probe = NodeInBucket->getNextInBucket();
265193323Sed      NodeInBucket->SetNextInBucket(0);
266193323Sed
267193323Sed      // Insert the node into the new bucket, after recomputing the hash.
268212904Sdim      InsertNode(NodeInBucket,
269212904Sdim                 GetBucketFor(ComputeNodeHash(NodeInBucket, TempID),
270212904Sdim                              Buckets, NumBuckets));
271212904Sdim      TempID.clear();
272193323Sed    }
273193323Sed  }
274193323Sed
275210299Sed  free(OldBuckets);
276193323Sed}
277193323Sed
278193323Sed/// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
279193323Sed/// return it.  If not, return the insertion token that will make insertion
280193323Sed/// faster.
281193323SedFoldingSetImpl::Node
282193323Sed*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
283193323Sed                                     void *&InsertPos) {
284234353Sdim  unsigned IDHash = ID.ComputeHash();
285234353Sdim  void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
286193323Sed  void *Probe = *Bucket;
287193323Sed
288193323Sed  InsertPos = 0;
289193323Sed
290212904Sdim  FoldingSetNodeID TempID;
291193323Sed  while (Node *NodeInBucket = GetNextPtr(Probe)) {
292234353Sdim    if (NodeEquals(NodeInBucket, ID, IDHash, TempID))
293193323Sed      return NodeInBucket;
294212904Sdim    TempID.clear();
295193323Sed
296193323Sed    Probe = NodeInBucket->getNextInBucket();
297193323Sed  }
298193323Sed
299193323Sed  // Didn't find the node, return null with the bucket as the InsertPos.
300193323Sed  InsertPos = Bucket;
301193323Sed  return 0;
302193323Sed}
303193323Sed
304193323Sed/// InsertNode - Insert the specified node into the folding set, knowing that it
305193323Sed/// is not already in the map.  InsertPos must be obtained from
306193323Sed/// FindNodeOrInsertPos.
307193323Sedvoid FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
308193323Sed  assert(N->getNextInBucket() == 0);
309193323Sed  // Do we need to grow the hashtable?
310193323Sed  if (NumNodes+1 > NumBuckets*2) {
311193323Sed    GrowHashTable();
312212904Sdim    FoldingSetNodeID TempID;
313212904Sdim    InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets);
314193323Sed  }
315193323Sed
316193323Sed  ++NumNodes;
317193323Sed
318193323Sed  /// The insert position is actually a bucket pointer.
319193323Sed  void **Bucket = static_cast<void**>(InsertPos);
320193323Sed
321193323Sed  void *Next = *Bucket;
322193323Sed
323193323Sed  // If this is the first insertion into this bucket, its next pointer will be
324193323Sed  // null.  Pretend as if it pointed to itself, setting the low bit to indicate
325193323Sed  // that it is a pointer to the bucket.
326193323Sed  if (Next == 0)
327193323Sed    Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
328193323Sed
329193323Sed  // Set the node's next pointer, and make the bucket point to the node.
330193323Sed  N->SetNextInBucket(Next);
331193323Sed  *Bucket = N;
332193323Sed}
333193323Sed
334193323Sed/// RemoveNode - Remove a node from the folding set, returning true if one was
335193323Sed/// removed or false if the node was not in the folding set.
336193323Sedbool FoldingSetImpl::RemoveNode(Node *N) {
337193323Sed  // Because each bucket is a circular list, we don't need to compute N's hash
338193323Sed  // to remove it.
339193323Sed  void *Ptr = N->getNextInBucket();
340193323Sed  if (Ptr == 0) return false;  // Not in folding set.
341193323Sed
342193323Sed  --NumNodes;
343193323Sed  N->SetNextInBucket(0);
344193323Sed
345193323Sed  // Remember what N originally pointed to, either a bucket or another node.
346193323Sed  void *NodeNextPtr = Ptr;
347193323Sed
348193323Sed  // Chase around the list until we find the node (or bucket) which points to N.
349193323Sed  while (true) {
350193323Sed    if (Node *NodeInBucket = GetNextPtr(Ptr)) {
351193323Sed      // Advance pointer.
352193323Sed      Ptr = NodeInBucket->getNextInBucket();
353193323Sed
354193323Sed      // We found a node that points to N, change it to point to N's next node,
355193323Sed      // removing N from the list.
356193323Sed      if (Ptr == N) {
357193323Sed        NodeInBucket->SetNextInBucket(NodeNextPtr);
358193323Sed        return true;
359193323Sed      }
360193323Sed    } else {
361193323Sed      void **Bucket = GetBucketPtr(Ptr);
362193323Sed      Ptr = *Bucket;
363193323Sed
364193323Sed      // If we found that the bucket points to N, update the bucket to point to
365193323Sed      // whatever is next.
366193323Sed      if (Ptr == N) {
367193323Sed        *Bucket = NodeNextPtr;
368193323Sed        return true;
369193323Sed      }
370193323Sed    }
371193323Sed  }
372193323Sed}
373193323Sed
374193323Sed/// GetOrInsertNode - If there is an existing simple Node exactly
375193323Sed/// equal to the specified node, return it.  Otherwise, insert 'N' and it
376193323Sed/// instead.
377193323SedFoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
378193323Sed  FoldingSetNodeID ID;
379212904Sdim  GetNodeProfile(N, ID);
380193323Sed  void *IP;
381193323Sed  if (Node *E = FindNodeOrInsertPos(ID, IP))
382193323Sed    return E;
383193323Sed  InsertNode(N, IP);
384193323Sed  return N;
385193323Sed}
386193323Sed
387193323Sed//===----------------------------------------------------------------------===//
388193323Sed// FoldingSetIteratorImpl Implementation
389193323Sed
390193323SedFoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
391193323Sed  // Skip to the first non-null non-self-cycle bucket.
392193323Sed  while (*Bucket != reinterpret_cast<void*>(-1) &&
393193323Sed         (*Bucket == 0 || GetNextPtr(*Bucket) == 0))
394193323Sed    ++Bucket;
395193323Sed
396193323Sed  NodePtr = static_cast<FoldingSetNode*>(*Bucket);
397193323Sed}
398193323Sed
399193323Sedvoid FoldingSetIteratorImpl::advance() {
400193323Sed  // If there is another link within this bucket, go to it.
401193323Sed  void *Probe = NodePtr->getNextInBucket();
402193323Sed
403193323Sed  if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
404193323Sed    NodePtr = NextNodeInBucket;
405193323Sed  else {
406193323Sed    // Otherwise, this is the last link in this bucket.
407193323Sed    void **Bucket = GetBucketPtr(Probe);
408193323Sed
409193323Sed    // Skip to the next non-null non-self-cycle bucket.
410193323Sed    do {
411193323Sed      ++Bucket;
412193323Sed    } while (*Bucket != reinterpret_cast<void*>(-1) &&
413193323Sed             (*Bucket == 0 || GetNextPtr(*Bucket) == 0));
414193323Sed
415193323Sed    NodePtr = static_cast<FoldingSetNode*>(*Bucket);
416193323Sed  }
417193323Sed}
418193323Sed
419193323Sed//===----------------------------------------------------------------------===//
420193323Sed// FoldingSetBucketIteratorImpl Implementation
421193323Sed
422193323SedFoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
423193323Sed  Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket;
424193323Sed}
425